From f1981db101783a68c53ba0ea2be4bd0850f3e817 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 15 Sep 2025 19:53:58 +0000 Subject: [PATCH] minor Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu/model_runner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/v1/worker/gpu/model_runner.py b/vllm/v1/worker/gpu/model_runner.py index 79fdd91ed6b5d..334c6f953ff2f 100644 --- a/vllm/v1/worker/gpu/model_runner.py +++ b/vllm/v1/worker/gpu/model_runner.py @@ -48,6 +48,7 @@ class GPUModelRunner: if self.cache_config.cache_dtype == "auto": self.kv_cache_dtype = self.dtype else: + # Quantized KV cache. self.kv_cache_dtype = STR_DTYPE_TO_TORCH_DTYPE[ self.cache_config.cache_dtype] @@ -217,6 +218,8 @@ class GPUModelRunner: self.kv_cache_config.kv_cache_groups): block_table = block_tables[i] slot_mapping = slot_mappings[i] + num_common_prefix_blocks = 0 + return InputBatch( req_ids=req_ids,