From e0f63e4a3509a9323339eee67c96ac3c93d15923 Mon Sep 17 00:00:00 2001 From: Zebing Lin Date: Fri, 1 Aug 2025 03:23:29 -0400 Subject: [PATCH] [Core] Avoid repeated len(block_token_ids) check in hash_request_tokens (#21781) Signed-off-by: linzebing --- vllm/v1/core/kv_cache_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 25520eb655111..eab1560b1a18c 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -567,12 +567,10 @@ def hash_request_tokens(hash_function: Any, block_size: int, ret = [] parent_block_hash_value = None - for start in range(0, len(token_ids), block_size): + # Only full blocks will be hashed + for start in range(0, len(token_ids) - block_size + 1, block_size): end = start + block_size block_token_ids = token_ids[start:end] - # Do not hash the block if it is not full. - if len(block_token_ids) < block_size: - break if req_need_extra_keys: # MM and LoRA requests need extra keys for block-hash computation.