From 699d62e6cf4d9a570e2441141c65f687b3b1eef2 Mon Sep 17 00:00:00 2001 From: "Chendi.Xue" Date: Fri, 24 Oct 2025 12:01:41 -0500 Subject: [PATCH] [NIXL][BUGFIX] delay done_recving queue cleanup to bottom of get_finished (#27297) Signed-off-by: Chendi Xue --- .../kv_connector/v1/nixl_connector.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index c6437610e3bab..fecf08c7e647c 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -1452,11 +1452,17 @@ class NixlConnectorWorker: len(done_recving), ) - # clean up metadata for completed requests + block_ids_to_permute = [] for req_id in done_recving: + # clean up metadata for completed requests meta = self._recving_metadata.pop(req_id, None) - if self.use_host_buffer and meta: + assert meta is not None, f"{req_id} not found in recving_metadata list" + if self.use_host_buffer: self.sync_recved_kv_to_device(req_id, meta) + if self.enable_permute_local_kv: + block_ids_to_permute += meta.local_block_ids + if len(block_ids_to_permute) > 0: + self.permute_device_kv(block_ids_to_permute) # Handle timeout to avoid stranding blocks on remote. now = time.perf_counter() @@ -1477,15 +1483,6 @@ class NixlConnectorWorker: del self._reqs_to_send[req_id] done_sending.add(req_id) - if self.enable_permute_local_kv and len(done_recving) > 0: - block_ids = [] - for req_id in done_recving: - meta = self._recving_metadata.pop(req_id) - assert meta, f"{req_id} not found in recving_metadata list" - block_ids += meta.local_block_ids - - self.permute_device_kv(block_ids) - return done_sending, done_recving def _get_new_notifs(self) -> set[str]: