[Kernel] Raise an exception in MoE kernel if the batch size is larger then 65k (#5939)
This commit is contained in:
parent
7c01f70641
commit
f7dac83d95
@ -423,6 +423,11 @@ def fused_experts(hidden_states: torch.Tensor,
|
|||||||
M, _ = hidden_states.shape
|
M, _ = hidden_states.shape
|
||||||
E, N, _ = w1.shape
|
E, N, _ = w1.shape
|
||||||
|
|
||||||
|
if M > 65536:
|
||||||
|
# https://github.com/vllm-project/vllm/issues/5938
|
||||||
|
raise ValueError("MoE kernel does not support more than 65536 tokens, "
|
||||||
|
f"but got {M}")
|
||||||
|
|
||||||
if override_config:
|
if override_config:
|
||||||
config = override_config
|
config = override_config
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user