[Bugfix] Missing tokens in return_token_ids when tool parsers is enabled in streaming mode (#29074)

Signed-off-by: Peng-YM <1048217874pengym@gmail.com>
2026-06-08 18:35:41 +08:00 · 2025-12-05 03:09:39 +08:00 · 2025-12-05 03:09:39 +08:00 · 48a5fff66e
commit 48a5fff66e
parent 1119f6e47a
1 changed files with 8 additions and 3 deletions
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -1072,10 +1072,15 @@ class OpenAIServingChat(OpenAIServing):
                    # wasn't ready to send a token, then
                    #   get the next token without streaming a chunk
                    if delta_message is None:
-                        if output.finish_reason is None:
+                        # NOTE: If return_token_ids is enabled, we still need to
                        # send a chunk with token_ids even if delta_message is None
                        # to ensure all tokens are included in the response
                        if (
                            output.finish_reason is None
                            and not request.return_token_ids
                        ):
                            continue
-                        else:
+                        delta_message = DeltaMessage()
                            delta_message = DeltaMessage()
                    # Log streaming delta if output logging is enabled
                    if self.enable_log_outputs and self.request_logger: