From 7e8d97dd3f0aaf05265f947997310ca3827d3c06 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 13 Jun 2025 02:46:32 -0700 Subject: [PATCH] [BugFix] Honor `enable_caching` in connector-delayed kvcache load case (#19435) Signed-off-by: Nick Hill --- vllm/v1/core/kv_cache_manager.py | 9 +++++---- vllm/v1/core/sched/scheduler.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 2e09f4c0aacfc..99531e7d213dd 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -381,10 +381,11 @@ class KVCacheManager: self.coordinator.get_blocks(request_id)).get_block_ids() def cache_blocks(self, request: Request, num_computed_tokens: int) -> None: - """Cache the blocks for the request.""" - block_hashes = self.req_to_block_hashes[request.request_id] - self.coordinator.cache_blocks(request, block_hashes, - num_computed_tokens) + """Cache the blocks for the request, if enabled.""" + if self.enable_caching: + block_hashes = self.req_to_block_hashes[request.request_id] + self.coordinator.cache_blocks(request, block_hashes, + num_computed_tokens) def create_empty_block_list(self) -> KVCacheBlocks: """Creates a new KVCacheBlocks instance with no blocks.""" diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 3d7bbe7e0e396..d4eb8b8d622f1 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -1015,6 +1015,7 @@ class Scheduler(SchedulerInterface): num_computed_tokens = min(num_computed_tokens, request.num_tokens) if num_computed_tokens == request.num_tokens: num_computed_tokens -= 1 + # This will cache the blocks iff caching is enabled. self.kv_cache_manager.cache_blocks(request, num_computed_tokens) # Update the request state for scheduling.