diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 79eac184a212..2a0d4cd74a28 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -873,7 +873,7 @@ class OpenAIServingChat(OpenAIServing): total_tokens=num_prompt_tokens + completion_tokens, ) - data = chunk.model_dump_json(exclude_none=True) + data = chunk.model_dump_json(exclude_unset=True) yield f"data: {data}\n\n" # once the final token is handled, if stream_options.include_usage