mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-29 03:37:13 +08:00
[v1][Bugfix] Add extra_keys to block_hash for prefix caching (#12603)
This pr adds extra key to block hash, to generate different hash value
for two blocks with the same token string but different extra_keys in
their parent blocks. For example, it can generate different hash value
for the second block of the following two requests:
```python
request1 = make_request(
request_id=0,
prompt_token_ids=[_ for _ in range(6)],
mm_positions=[{
"offset": 0,
"length": 3
}, {
"offset": 3,
"length": 3
}],
mm_hashes=["hash1", "hash2"],
)
request2 = make_request(
request_id=1,
prompt_token_ids=[_ for _ in range(6)],
mm_positions=[{
"offset": 0,
"length": 3
}, {
"offset": 3,
"length": 3
}],
mm_hashes=["hash3", "hash2"],
)
```
---------
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
parent
60bcef000e
commit
89003c4082
@ -192,7 +192,7 @@ def test_hash_block_tokens():
|
||||
extra_keys)
|
||||
assert isinstance(block_hash, BlockHashType)
|
||||
assert block_hash.hash_value == hash(
|
||||
(parent_block_hash, *curr_block_token_ids))
|
||||
(parent_block_hash, curr_block_token_ids, extra_keys))
|
||||
assert block_hash.token_ids == curr_block_token_ids
|
||||
assert block_hash.extra_keys == extra_keys
|
||||
|
||||
@ -227,6 +227,38 @@ def test_hash_request_tokens():
|
||||
assert block_hashes[1].extra_keys == ("hash2", )
|
||||
|
||||
|
||||
def test_hash_tokens_different_mm_input():
|
||||
request1 = make_request(
|
||||
request_id=0,
|
||||
prompt_token_ids=[_ for _ in range(6)],
|
||||
mm_positions=[{
|
||||
"offset": 0,
|
||||
"length": 3
|
||||
}, {
|
||||
"offset": 3,
|
||||
"length": 3
|
||||
}],
|
||||
mm_hashes=["hash1", "hash2"],
|
||||
)
|
||||
request2 = make_request(
|
||||
request_id=1,
|
||||
prompt_token_ids=[_ for _ in range(6)],
|
||||
mm_positions=[{
|
||||
"offset": 0,
|
||||
"length": 3
|
||||
}, {
|
||||
"offset": 3,
|
||||
"length": 3
|
||||
}],
|
||||
mm_hashes=["hash3", "hash2"],
|
||||
)
|
||||
block_size = 3
|
||||
block_hashes1 = hash_request_tokens(block_size, request1)
|
||||
block_hashes2 = hash_request_tokens(block_size, request2)
|
||||
assert block_hashes1[0] != block_hashes2[0]
|
||||
assert block_hashes1[1] != block_hashes2[1]
|
||||
|
||||
|
||||
def test_hash_request_tokens_no_mm_inputs():
|
||||
request = make_request(
|
||||
request_id=0,
|
||||
|
||||
@ -262,8 +262,10 @@ def hash_block_tokens(
|
||||
The hash value of the block and the token ids in the block.
|
||||
The entire tuple is used as the hash key of the block.
|
||||
"""
|
||||
return BlockHashType(hash((parent_block_hash, *curr_block_token_ids)),
|
||||
tuple(curr_block_token_ids), extra_keys)
|
||||
curr_block_token_ids_tuple = tuple(curr_block_token_ids)
|
||||
return BlockHashType(
|
||||
hash((parent_block_hash, curr_block_token_ids_tuple, extra_keys)),
|
||||
curr_block_token_ids_tuple, extra_keys)
|
||||
|
||||
|
||||
def hash_request_tokens(block_size: int,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user