[Kernel] Raise an exception in MoE kernel if the batch size is larger then 65k (#5939)
This commit is contained in:
parent
7c01f70641
commit
f7dac83d95
@ -423,6 +423,11 @@ def fused_experts(hidden_states: torch.Tensor,
|
||||
M, _ = hidden_states.shape
|
||||
E, N, _ = w1.shape
|
||||
|
||||
if M > 65536:
|
||||
# https://github.com/vllm-project/vllm/issues/5938
|
||||
raise ValueError("MoE kernel does not support more than 65536 tokens, "
|
||||
f"but got {M}")
|
||||
|
||||
if override_config:
|
||||
config = override_config
|
||||
else:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user