diff --git a/vllm/attention/ops/prefix_prefill.py b/vllm/attention/ops/prefix_prefill.py index 4cd4976a..70b544b6 100644 --- a/vllm/attention/ops/prefix_prefill.py +++ b/vllm/attention/ops/prefix_prefill.py @@ -718,7 +718,7 @@ if triton.__version__ >= "2.1.0": b_ctx_len, alibi_slopes, v_cache.shape[3], - 8, + k_cache.shape[4], o, b_loc.stride(0), b_loc.stride(1), @@ -768,7 +768,7 @@ if triton.__version__ >= "2.1.0": b_seq_len, b_ctx_len, v_cache.shape[3], - 8, + k_cache.shape[4], o, b_loc.stride(0), b_loc.stride(1),