From e9bd114a62921f2620c3290ab95e93fc579ea07d Mon Sep 17 00:00:00 2001 From: Liqian Chen Date: Mon, 17 Jun 2024 14:53:02 +0800 Subject: [PATCH] Correct alignment in the seq_len diagram. --- vllm/attention/backends/flash_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 300bab72877b8..1c48e2a0bb33d 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -83,7 +83,7 @@ class FlashAttentionMetadata(AttentionMetadata): # |---------------- N iteration ---------------------| # |- tokenA -|......................|-- newTokens ---| # |---------- context_len ----------| - # |-------------------- seq_len ----------------------| + # |-------------------- seq_len ---------------------| # |-- query_len ---| # Maximum query length in the batch. None for decoding.