[Core] Avoid repeated len(block_token_ids) check in hash_request_tokens (#21781)

Signed-off-by: linzebing <linzebing1995@gmail.com>
2026-07-17 08:27:10 +08:00 · 2025-08-01 03:23:29 -04:00 · 2025-08-01 03:23:29 -04:00 · e0f63e4a35
commit e0f63e4a35
parent b4e081cb15
1 changed files with 2 additions and 4 deletions
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@ -567,12 +567,10 @@ def hash_request_tokens(hash_function: Any, block_size: int,

    ret = []
    parent_block_hash_value = None
-    for start in range(0, len(token_ids), block_size):
+    # Only full blocks will be hashed
+    for start in range(0, len(token_ids) - block_size + 1, block_size):
        end = start + block_size
        block_token_ids = token_ids[start:end]
-        # Do not hash the block if it is not full.
-        if len(block_token_ids) < block_size:
-            break

        if req_need_extra_keys:
            # MM and LoRA requests need extra keys for block-hash computation.