diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py index e1f4b86b81bc7..c6297eba6ae50 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py @@ -116,6 +116,15 @@ class DecodeBenchConnector(KVConnectorBase_V1): assert self.connector_scheduler is not None return self.connector_scheduler.build_connector_meta(scheduler_output) + def request_finished( + self, + request: "Request", + block_ids: list[int], + ) -> tuple[bool, Optional[dict[str, Any]]]: + assert self.connector_scheduler is not None + self.connector_scheduler.request_finished(request) + return False, None + class DecodeBenchConnectorScheduler: """Scheduler-side implementation for DecodeBenchConnector.""" @@ -211,6 +220,12 @@ class DecodeBenchConnectorScheduler: return meta + def request_finished(self, request: "Request"): + """ + Called when a request has finished. Clean up any state. + """ + self._filled_requests.discard(request.request_id) + class DecodeBenchConnectorWorker: """Worker-side implementation for DecodeBenchConnector."""