diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 8b50153f01152..7e0e627780970 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -887,7 +887,8 @@ class OpenAIServingChat(OpenAIServing): delta_message = DeltaMessage(content=delta_text) # update the previous values for the next iteration - if tool_choice_auto or self.reasoning_parser: + if ((tool_choice_auto or self.reasoning_parser) + and not self.use_harmony): assert previous_texts is not None assert all_previous_token_ids is not None previous_texts[i] = current_text