[Bugfix] Fix illegal memory access in FP8 MoE kernel (#6382)
This commit is contained in:
parent
21b2dcedab
commit
75f64d8b94
@ -492,12 +492,14 @@ def fused_experts(hidden_states: torch.Tensor,
|
|||||||
if tokens_in_chunk == 0:
|
if tokens_in_chunk == 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
if tokens_in_chunk < CHUNK_SIZE:
|
if tokens_in_chunk < CHUNK_SIZE and chunk > 0:
|
||||||
# will only happen in the last chunk
|
# Adjust the intermediate cache size and config for the last
|
||||||
|
# chunk. Note that in most cases we only have one chunk
|
||||||
|
# so the cache size and config are already set correctly and
|
||||||
|
# do not need to be adjusted.
|
||||||
intermediate_cache1 = intermediate_cache1[:tokens_in_chunk]
|
intermediate_cache1 = intermediate_cache1[:tokens_in_chunk]
|
||||||
intermediate_cache2 = intermediate_cache2[:tokens_in_chunk]
|
intermediate_cache2 = intermediate_cache2[:tokens_in_chunk]
|
||||||
intermediate_cache3 = intermediate_cache3[:tokens_in_chunk]
|
intermediate_cache3 = intermediate_cache3[:tokens_in_chunk]
|
||||||
# reload config to get better performance on the last chunk
|
|
||||||
config = get_config_func(tokens_in_chunk)
|
config = get_config_func(tokens_in_chunk)
|
||||||
|
|
||||||
curr_topk_ids = topk_ids[begin_chunk_idx:end_chunk_idx]
|
curr_topk_ids = topk_ids[begin_chunk_idx:end_chunk_idx]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user