[BugFix] Spec decode with VLLM_ENABLE_V1_MULTIPROCESSING=0 (#30319)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2026-03-16 13:47:18 +08:00 · 2025-12-18 11:47:56 -08:00 · 2025-12-18 11:47:56 -08:00 · 24b65eff0d
commit 24b65eff0d
parent 41b6f9200f
1 changed files with 2 additions and 1 deletions
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@ -268,7 +268,8 @@ class InprocClient(EngineCoreClient):
        self.engine_core = EngineCore(*args, **kwargs)

    def get_output(self) -> EngineCoreOutputs:
-        outputs, _ = self.engine_core.step_fn()
+        outputs, model_executed = self.engine_core.step_fn()
+        self.engine_core.post_step(model_executed=model_executed)
        return outputs and outputs.get(0) or EngineCoreOutputs()

    def get_supported_tasks(self) -> tuple[SupportedTask, ...]: