diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 2ff1bb681d80..bbfd93413fbc 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -585,6 +585,10 @@ def get_request_block_hasher( start_token_idx = len(request.block_hashes) * block_size num_tokens = request.num_tokens + if start_token_idx + block_size > num_tokens: + # Early stop when there no new full blocks created. + return [] + curr_mm_idx = 0 if start_token_idx > 0: # Set curr_mm_idx = -1 to indicate the last mm input.