From 73a99cc2a53ccabaa9fee8ebc583c2bcdc998b8e Mon Sep 17 00:00:00 2001 From: Aleksandr Samarin Date: Fri, 3 Oct 2025 15:43:41 +0200 Subject: [PATCH] [Model] Fixed stream generator for gpt-oss + spec-decoding (#26027) Signed-off-by: Aleksandr Samarin --- vllm/entrypoints/openai/serving_chat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 54eb60a8589de..a646b16da82c7 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -691,11 +691,13 @@ class OpenAIServingChat(OpenAIServing): if self.use_harmony: harmony_parser = harmony_parsers[i] prev_recipient = harmony_parser.current_recipient + delta_text = "" for token_id in output.token_ids: harmony_parser.process(token_id) + delta_text += (harmony_parser.last_content_delta + or "") cur_channel = harmony_parser.current_channel cur_recipient = harmony_parser.current_recipient - delta_text = harmony_parser.last_content_delta or "" else: delta_text = output.text