[Core] Fix abrupt request abort (#18485)

Signed-off-by: nicklucche <nlucches@redhat.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com>
2025-12-18 05:35:01 +08:00 · 2025-06-07 01:27:59 +02:00 · 2025-06-07 01:27:59 +02:00 · b6a3a9f76d
commit b6a3a9f76d
parent ca27f0f9c1
2 changed files with 9 additions and 7 deletions
--- a/vllm/v1/core/kv_cache_coordinator.py
+++ b/vllm/v1/core/kv_cache_coordinator.py
@ -164,7 +164,7 @@ class KVCacheCoordinator(ABC):
        Get the blocks for the request.
        """
        return [
-            manager.req_to_blocks[request_id]
+            manager.req_to_blocks.get(request_id) or []
            for manager in self.single_type_managers
        ]
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@ -76,6 +76,9 @@ class Scheduler(SchedulerInterface):
        # KV Connector pushes/pull of remote KVs for P/D and offloading.
        self.connector = None
        if self.vllm_config.kv_transfer_config is not None:
            assert len(self.kv_cache_config.kv_cache_groups) == 1, (
                "Multiple KV cache groups are not currently supported "
                "with KV connectors")
            self.connector = KVConnectorFactory.create_connector_v1(
                config=self.vllm_config, role=KVConnectorRole.SCHEDULER)
@ -985,9 +988,8 @@ class Scheduler(SchedulerInterface):
        """
        if self.connector is None:
            return False, None
-        assert len(self.kv_cache_config.kv_cache_groups
+
-                   ) == 1, "KV connector only supports one KV cache group now"
+        (block_ids, ) = self.kv_cache_manager.get_block_ids(request.request_id)
        block_ids = self.kv_cache_manager.get_block_ids(request.request_id)[0]
        return self.connector.request_finished(request, block_ids)
    def _update_waiting_for_remote_kv(self, request: Request) -> bool:
@ -1002,12 +1004,12 @@ class Scheduler(SchedulerInterface):
        and the request state will be moved back to WAITING from
        WAITING_FOR_REMOTE_KV.
        """
        assert self.connector is not None
        if request.request_id not in self.finished_recving_kv_req_ids:
            return False
-        assert len(self.kv_cache_config.kv_cache_groups
+
                   ) == 1, "KV connector only supports one KV cache group now"
        # Now that the blocks are ready, actually cache them.
-        block_ids = self.kv_cache_manager.get_block_ids(request.request_id)[0]
+        (block_ids, ) = self.kv_cache_manager.get_block_ids(request.request_id)
        num_computed_tokens = len(block_ids) * self.block_size
        # Handle the case where num request tokens less then one block.
        num_computed_tokens = min(num_computed_tokens, request.num_tokens)