diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/base.py b/vllm/distributed/kv_transfer/kv_connector/v1/base.py index 9fdb5340f0e2c..ef4460a592bd6 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/base.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/base.py @@ -183,7 +183,8 @@ class KVConnectorBase_V1(ABC): finished generating tokens. Returns: - ids of requests that have finished asynchronous (recving, sending). + ids of requests that have finished asynchronous transfer, + tuple of (sending/saving ids, recving/loading ids). The finished saves/sends req ids must belong to a set provided in a call to this method (this call or a prior one). """ diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py index c44fc06d41999..cea454a0b5977 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py @@ -103,10 +103,10 @@ class MultiConnector(KVConnectorBase_V1): def get_finished( self, finished_req_ids: set[str] ) -> tuple[Optional[set[str]], Optional[set[str]]]: - finished_recving: set[str] = set() finished_sending: set[str] = set() + finished_recving: set[str] = set() for c in self._connectors: - recving, sending = c.get_finished(finished_req_ids) + sending, recving = c.get_finished(finished_req_ids) if not recving and not sending: continue # Aggregate finished recving request ids. @@ -125,7 +125,7 @@ class MultiConnector(KVConnectorBase_V1): else: self._extra_async_saves[req_id] = extra_pending - 1 - return finished_recving or None, finished_sending or None + return finished_sending or None, finished_recving or None # ============================== # Scheduler-side methods