mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 20:35:26 +08:00
[V1] Simpify vision block hash for prefix caching by removing offset from hash (#11646)
This commit is contained in:
parent
2c5718809b
commit
8c3230d8c1
@ -469,9 +469,9 @@ def test_mm_prefix_caching():
|
|||||||
# Completed block should have hashes with extra keys.
|
# Completed block should have hashes with extra keys.
|
||||||
assert not computed_blocks
|
assert not computed_blocks
|
||||||
assert len(req0.kv_block_hashes) == 3
|
assert len(req0.kv_block_hashes) == 3
|
||||||
assert req0.kv_block_hashes[0].extra_keys == (("aaa", 0), )
|
assert req0.kv_block_hashes[0].extra_keys == ("aaa", )
|
||||||
assert req0.kv_block_hashes[1].extra_keys == (("aaa", 5), ("bbb", 0))
|
assert req0.kv_block_hashes[1].extra_keys == ("aaa", "bbb")
|
||||||
assert req0.kv_block_hashes[2].extra_keys == (("bbb", 2), )
|
assert req0.kv_block_hashes[2].extra_keys == ("bbb", )
|
||||||
|
|
||||||
blocks = manager.allocate_slots(req0, 59, computed_blocks)
|
blocks = manager.allocate_slots(req0, 59, computed_blocks)
|
||||||
assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4]
|
assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4]
|
||||||
@ -485,7 +485,7 @@ def test_mm_prefix_caching():
|
|||||||
|
|
||||||
# The just completed block should have hashes with extra keys.
|
# The just completed block should have hashes with extra keys.
|
||||||
assert len(req0.kv_block_hashes) == 4
|
assert len(req0.kv_block_hashes) == 4
|
||||||
assert req0.kv_block_hashes[3].extra_keys == (("ccc", 0), )
|
assert req0.kv_block_hashes[3].extra_keys == ("ccc", )
|
||||||
|
|
||||||
# Cache hit.
|
# Cache hit.
|
||||||
unique_token_ids = [-1] * 7 + [200] * 5
|
unique_token_ids = [-1] * 7 + [200] * 5
|
||||||
|
|||||||
@ -218,8 +218,8 @@ def generate_block_hash_extra_keys(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# The block contains the current mm input.
|
# The block contains the current mm input.
|
||||||
mm_start = max(0, start_token_idx - offset)
|
extra_keys.append(mm_hashes[curr_mm_idx])
|
||||||
extra_keys.append((mm_hashes[curr_mm_idx], mm_start))
|
|
||||||
if end_token_idx >= offset + length:
|
if end_token_idx >= offset + length:
|
||||||
# If this block contains the end of the current mm input,
|
# If this block contains the end of the current mm input,
|
||||||
# move to the next mm input as this block may also contain
|
# move to the next mm input as this block may also contain
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user