From 9e74d9d003d546c17dca472c3f4b48be651f1d7c Mon Sep 17 00:00:00 2001 From: Charles Riggins Date: Tue, 18 Jun 2024 00:05:33 +0800 Subject: [PATCH] Correct alignment in the seq_len diagram. (#5592) Co-authored-by: Liqian Chen --- vllm/attention/backends/flash_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 300bab72..1c48e2a0 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -83,7 +83,7 @@ class FlashAttentionMetadata(AttentionMetadata): # |---------------- N iteration ---------------------| # |- tokenA -|......................|-- newTokens ---| # |---------- context_len ----------| - # |-------------------- seq_len ----------------------| + # |-------------------- seq_len ---------------------| # |-- query_len ---| # Maximum query length in the batch. None for decoding.