Correct alignment in the seq_len diagram. (#5592)

Co-authored-by: Liqian Chen <liqian.chen@deeplang.ai>
2024-06-18 00:05:33 +08:00 · 2024-06-18 00:05:33 +08:00 · 9e74d9d003
commit 9e74d9d003
parent 9333fb8eb9
1 changed files with 1 additions and 1 deletions
--- a/vllm/attention/backends/flash_attn.py
+++ b/vllm/attention/backends/flash_attn.py
@ -83,7 +83,7 @@ class FlashAttentionMetadata(AttentionMetadata):
    # |---------------- N iteration ---------------------|
    # |- tokenA -|......................|-- newTokens ---|
    # |---------- context_len ----------|
-    # |-------------------- seq_len ----------------------|
+    # |-------------------- seq_len ---------------------|
    #                                   |-- query_len ---|
    # Maximum query length in the batch. None for decoding.