diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py index ed04f0a373c51..c21606057fa02 100644 --- a/tests/v1/core/test_prefix_caching.py +++ b/tests/v1/core/test_prefix_caching.py @@ -469,9 +469,9 @@ def test_mm_prefix_caching(): # Completed block should have hashes with extra keys. assert not computed_blocks assert len(req0.kv_block_hashes) == 3 - assert req0.kv_block_hashes[0].extra_keys == (("aaa", 0), ) - assert req0.kv_block_hashes[1].extra_keys == (("aaa", 5), ("bbb", 0)) - assert req0.kv_block_hashes[2].extra_keys == (("bbb", 2), ) + assert req0.kv_block_hashes[0].extra_keys == ("aaa", ) + assert req0.kv_block_hashes[1].extra_keys == ("aaa", "bbb") + assert req0.kv_block_hashes[2].extra_keys == ("bbb", ) blocks = manager.allocate_slots(req0, 59, computed_blocks) assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4] @@ -485,7 +485,7 @@ def test_mm_prefix_caching(): # The just completed block should have hashes with extra keys. assert len(req0.kv_block_hashes) == 4 - assert req0.kv_block_hashes[3].extra_keys == (("ccc", 0), ) + assert req0.kv_block_hashes[3].extra_keys == ("ccc", ) # Cache hit. unique_token_ids = [-1] * 7 + [200] * 5 diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 9ddbff7c9a604..84ff48bf428a0 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -218,8 +218,8 @@ def generate_block_hash_extra_keys( continue # The block contains the current mm input. - mm_start = max(0, start_token_idx - offset) - extra_keys.append((mm_hashes[curr_mm_idx], mm_start)) + extra_keys.append(mm_hashes[curr_mm_idx]) + if end_token_idx >= offset + length: # If this block contains the end of the current mm input, # move to the next mm input as this block may also contain