From 23a6c5280e93ca8796f12b18d0e1dba4f3d1331d Mon Sep 17 00:00:00 2001 From: Chauncey Date: Sat, 6 Sep 2025 01:26:00 +0800 Subject: [PATCH] [gpt-oss][Bugfix]Fix streamableparser for missing handling of certain token_ids (#24306) Signed-off-by: chaunceyjiang --- vllm/entrypoints/context.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index fb58cba3a40ff..e4f2e800f94a6 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -238,11 +238,11 @@ class StreamingHarmonyContext(HarmonyContext): # (finished=True), then the next token processed will mark the # beginning of a new message self.first_tok_of_message = output.finished - tok = output.outputs[0].token_ids[0] - self.parser.process(tok) + for tok in output.outputs[0].token_ids: + self.parser.process(tok) self._update_num_output_tokens(output.outputs[0].token_ids) # Check if the current token is part of reasoning content - self._update_num_reasoning_tokens([tok]) + self._update_num_reasoning_tokens(output.outputs[0].token_ids) self.last_tok = tok else: # Handle the case of tool output in direct message format