From e2d8c27f68687476cb6c45723c44193b517308ea Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Wed, 10 Sep 2025 23:05:30 -0700 Subject: [PATCH] [BugFix] Fix pipeline parallel (#24621) Signed-off-by: Nick Hill --- vllm/executor/uniproc_executor.py | 4 ++++ vllm/v1/worker/gpu_model_runner.py | 1 - vllm/v1/worker/kv_connector_model_runner_mixin.py | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/executor/uniproc_executor.py b/vllm/executor/uniproc_executor.py index aabc9ed9b80a2..f45a94f3151b6 100644 --- a/vllm/executor/uniproc_executor.py +++ b/vllm/executor/uniproc_executor.py @@ -71,6 +71,10 @@ class UniProcExecutor(ExecutorBase): self.shutdown() return + def shutdown(self) -> None: + if worker := self.driver_worker: + worker.shutdown() + UniProcExecutorAsync = UniProcExecutor diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index b75756fbdae85..ce53154896bae 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2070,7 +2070,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): sampler_output = self._sample(logits, spec_decode_metadata) with record_function_or_nullcontext("Bookkeep"): - assert isinstance(hidden_states, torch.Tensor) ( num_nans_in_logits, logprobs_lists, diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py index 67bb967d2edfa..3eb9f26e9f5b6 100644 --- a/vllm/v1/worker/kv_connector_model_runner_mixin.py +++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py @@ -45,7 +45,8 @@ class KVConnectorModelRunnerMixin: @staticmethod def ensure_kv_transfer_shutdown() -> None: - if has_kv_transfer_group(): + # has_kv_transfer_group can be None during interpreter shutdown. + if has_kv_transfer_group and has_kv_transfer_group(): ensure_kv_transfer_shutdown() @staticmethod