From e0f63e4a3509a9323339eee67c96ac3c93d15923 Mon Sep 17 00:00:00 2001
From: Zebing Lin <linzebing1995@gmail.com>
Date: Fri, 1 Aug 2025 03:23:29 -0400
Subject: [PATCH] [Core] Avoid repeated len(block_token_ids) check in
 hash_request_tokens (#21781)

Signed-off-by: linzebing <linzebing1995@gmail.com>
---
 vllm/v1/core/kv_cache_utils.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 25520eb655111..eab1560b1a18c 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -567,12 +567,10 @@ def hash_request_tokens(hash_function: Any, block_size: int,
 
     ret = []
     parent_block_hash_value = None
-    for start in range(0, len(token_ids), block_size):
+    # Only full blocks will be hashed
+    for start in range(0, len(token_ids) - block_size + 1, block_size):
         end = start + block_size
         block_token_ids = token_ids[start:end]
-        # Do not hash the block if it is not full.
-        if len(block_token_ids) < block_size:
-            break
 
         if req_need_extra_keys:
             # MM and LoRA requests need extra keys for block-hash computation.