From 24b65eff0da0c8c4422f9cff6bf35f80a11c0274 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Thu, 18 Dec 2025 11:47:56 -0800
Subject: [PATCH] [BugFix] Spec decode with VLLM_ENABLE_V1_MULTIPROCESSING=0
 (#30319)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 vllm/v1/engine/core_client.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
index c936646aa7993..807db8275fbf5 100644
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -268,7 +268,8 @@ class InprocClient(EngineCoreClient):
         self.engine_core = EngineCore(*args, **kwargs)
 
     def get_output(self) -> EngineCoreOutputs:
-        outputs, _ = self.engine_core.step_fn()
+        outputs, model_executed = self.engine_core.step_fn()
+        self.engine_core.post_step(model_executed=model_executed)
         return outputs and outputs.get(0) or EngineCoreOutputs()
 
     def get_supported_tasks(self) -> tuple[SupportedTask, ...]: