[V1] Simpify vision block hash for prefix caching by removing offset from hash (#11646)

2026-07-28 12:34:27 +08:00 · 2024-12-31 16:56:01 +08:00 · 2024-12-31 16:56:01 +08:00 · 8c3230d8c1
commit 8c3230d8c1
parent 2c5718809b
2 changed files with 6 additions and 6 deletions
--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@ -469,9 +469,9 @@ def test_mm_prefix_caching():
    # Completed block should have hashes with extra keys.
    assert not computed_blocks
    assert len(req0.kv_block_hashes) == 3
-    assert req0.kv_block_hashes[0].extra_keys == (("aaa", 0), )
-    assert req0.kv_block_hashes[1].extra_keys == (("aaa", 5), ("bbb", 0))
-    assert req0.kv_block_hashes[2].extra_keys == (("bbb", 2), )
+    assert req0.kv_block_hashes[0].extra_keys == ("aaa", )
+    assert req0.kv_block_hashes[1].extra_keys == ("aaa", "bbb")
+    assert req0.kv_block_hashes[2].extra_keys == ("bbb", )

    blocks = manager.allocate_slots(req0, 59, computed_blocks)
    assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4]
@ -485,7 +485,7 @@ def test_mm_prefix_caching():

    # The just completed block should have hashes with extra keys.
    assert len(req0.kv_block_hashes) == 4
-    assert req0.kv_block_hashes[3].extra_keys == (("ccc", 0), )
+    assert req0.kv_block_hashes[3].extra_keys == ("ccc", )

    # Cache hit.
    unique_token_ids = [-1] * 7 + [200] * 5
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@ -218,8 +218,8 @@ def generate_block_hash_extra_keys(
                continue

            # The block contains the current mm input.
-            mm_start = max(0, start_token_idx - offset)
-            extra_keys.append((mm_hashes[curr_mm_idx], mm_start))
+            extra_keys.append(mm_hashes[curr_mm_idx])
+
            if end_token_idx >= offset + length:
                # If this block contains the end of the current mm input,
                # move to the next mm input as this block may also contain