From 23a6c5280e93ca8796f12b18d0e1dba4f3d1331d Mon Sep 17 00:00:00 2001
From: Chauncey <chaunceyjiang@gmail.com>
Date: Sat, 6 Sep 2025 01:26:00 +0800
Subject: [PATCH] [gpt-oss][Bugfix]Fix streamableparser for missing handling of
 certain token_ids (#24306)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/context.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
index fb58cba3a40ff..e4f2e800f94a6 100644
--- a/vllm/entrypoints/context.py
+++ b/vllm/entrypoints/context.py
@@ -238,11 +238,11 @@ class StreamingHarmonyContext(HarmonyContext):
             # (finished=True), then the next token processed will mark the
             # beginning of a new message
             self.first_tok_of_message = output.finished
-            tok = output.outputs[0].token_ids[0]
-            self.parser.process(tok)
+            for tok in output.outputs[0].token_ids:
+                self.parser.process(tok)
             self._update_num_output_tokens(output.outputs[0].token_ids)
             # Check if the current token is part of reasoning content
-            self._update_num_reasoning_tokens([tok])
+            self._update_num_reasoning_tokens(output.outputs[0].token_ids)
             self.last_tok = tok
         else:
             # Handle the case of tool output in direct message format