diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py index 71274420c3426..0658b59a2e215 100644 --- a/vllm/compilation/collective_fusion.py +++ b/vllm/compilation/collective_fusion.py @@ -1183,7 +1183,7 @@ class AllReduceFusionPass(VllmInductorPass): self.end_and_log() def __del__(self): - if self.disabled: + if getattr(self, "disabled", True): return if flashinfer_comm is not None: flashinfer_comm.trtllm_destroy_ipc_workspace_for_all_reduce( diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index ff62f60e5a42c..d3a08af088c11 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -569,9 +569,10 @@ class NixlConnectorWorker: def __del__(self): """Cleanup background threads on destruction.""" - self._handshake_initiation_executor.shutdown(wait=False) - if self._nixl_handshake_listener_t: - self._nixl_handshake_listener_t.join(timeout=0) + if executor := getattr(self, "_handshake_initiation_executor", None): + executor.shutdown(wait=False) + if listener_t := getattr(self, "_nixl_handshake_listener_t", None): + listener_t.join(timeout=0) @staticmethod def _nixl_handshake_listener(metadata: NixlAgentMetadata, @@ -1379,4 +1380,4 @@ class NixlKVConnectorStats(KVConnectorStats): # TODO: reduce stats to a single value, calculate latency/throughput return { "num_successful_transfers": self.data["num_successful_transfers"] - } \ No newline at end of file + } diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index d18bef1256af5..42aa8d14a21eb 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -235,9 +235,6 @@ class ExecutorBase(ABC): """Shutdown the executor.""" self.collective_rpc("shutdown") - def __del__(self): - self.shutdown() - async def execute_model_async( self, execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]: diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 6855526583f04..8b1e1bb8f45ca 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -683,7 +683,8 @@ class Worker(WorkerBase): tensorizer_config=tensorizer_config, ) def shutdown(self) -> None: - self.model_runner.ensure_kv_transfer_shutdown() + if runner := getattr(self, "model_runner", None): + runner.ensure_kv_transfer_shutdown() def init_worker_distributed_environment(