From bd6d5a7475aa26a85de5d63cfcae9a1151d7652f Mon Sep 17 00:00:00 2001 From: AlonKejzman Date: Mon, 22 Dec 2025 14:56:06 +0200 Subject: [PATCH] [gpt-oss] Fix harmony parser in streaming responses (#30205) Signed-off-by: AlonKejzman --- vllm/entrypoints/openai/serving_chat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 88d87a3334955..422a8c18e8e98 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -811,6 +811,11 @@ class OpenAIServingChat(OpenAIServing): delta_text += harmony_parser.last_content_delta or "" cur_channel = harmony_parser.current_channel cur_recipient = harmony_parser.current_recipient + # handle the case where several tokens where generated at once + # including the final token, leading to a delta in the text + # but the current channel to be empty (start state) + if not cur_channel and delta_text: + cur_channel = "final" else: delta_text = output.text