From 24b65eff0da0c8c4422f9cff6bf35f80a11c0274 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 18 Dec 2025 11:47:56 -0800 Subject: [PATCH] [BugFix] Spec decode with VLLM_ENABLE_V1_MULTIPROCESSING=0 (#30319) Signed-off-by: Chen Zhang --- vllm/v1/engine/core_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index c936646aa7993..807db8275fbf5 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -268,7 +268,8 @@ class InprocClient(EngineCoreClient): self.engine_core = EngineCore(*args, **kwargs) def get_output(self) -> EngineCoreOutputs: - outputs, _ = self.engine_core.step_fn() + outputs, model_executed = self.engine_core.step_fn() + self.engine_core.post_step(model_executed=model_executed) return outputs and outputs.get(0) or EngineCoreOutputs() def get_supported_tasks(self) -> tuple[SupportedTask, ...]: