From e1684a766ad3f2f1531c273fa8a056fc14c4c71e Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Sat, 13 Jul 2024 03:30:54 +0200 Subject: [PATCH] [Bugfix] Fix hard-coded value of x in context_attention_fwd (#6373) Signed-off-by: Thomas Parnell --- vllm/attention/ops/prefix_prefill.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/attention/ops/prefix_prefill.py b/vllm/attention/ops/prefix_prefill.py index 4cd4976a..70b544b6 100644 --- a/vllm/attention/ops/prefix_prefill.py +++ b/vllm/attention/ops/prefix_prefill.py @@ -718,7 +718,7 @@ if triton.__version__ >= "2.1.0": b_ctx_len, alibi_slopes, v_cache.shape[3], - 8, + k_cache.shape[4], o, b_loc.stride(0), b_loc.stride(1), @@ -768,7 +768,7 @@ if triton.__version__ >= "2.1.0": b_seq_len, b_ctx_len, v_cache.shape[3], - 8, + k_cache.shape[4], o, b_loc.stride(0), b_loc.stride(1),