minor

Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
2026-07-29 07:41:18 +08:00 · 2025-09-15 19:53:58 +00:00 · 2025-09-15 19:53:58 +00:00 · f1981db101
commit f1981db101
parent 69b17891a3
1 changed files with 3 additions and 0 deletions
--- a/vllm/v1/worker/gpu/model_runner.py
+++ b/vllm/v1/worker/gpu/model_runner.py
@ -48,6 +48,7 @@ class GPUModelRunner:
        if self.cache_config.cache_dtype == "auto":
            self.kv_cache_dtype = self.dtype
        else:
+            # Quantized KV cache.
            self.kv_cache_dtype = STR_DTYPE_TO_TORCH_DTYPE[
                self.cache_config.cache_dtype]

@ -217,6 +218,8 @@ class GPUModelRunner:
                self.kv_cache_config.kv_cache_groups):
            block_table = block_tables[i]
            slot_mapping = slot_mappings[i]
+            num_common_prefix_blocks = 0
+

        return InputBatch(
            req_ids=req_ids,