From 201c971e96c103273a42cb5606760b498f162f76 Mon Sep 17 00:00:00 2001
From: Jialin Ouyang <Jialin.Ouyang@gmail.com>
Date: Sun, 5 Oct 2025 01:46:03 -0700
Subject: [PATCH] [Perf][Easy] Early stop in request_block_hasher (#26112)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
---
 vllm/v1/core/kv_cache_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 2ff1bb681d80..bbfd93413fbc 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -585,6 +585,10 @@ def get_request_block_hasher(
         start_token_idx = len(request.block_hashes) * block_size
         num_tokens = request.num_tokens
 
+        if start_token_idx + block_size > num_tokens:
+            # Early stop when there no new full blocks created.
+            return []
+
         curr_mm_idx = 0
         if start_token_idx > 0:
             # Set curr_mm_idx = -1 to indicate the last mm input.