From f3237f3f6b1ce3ea3b1881a059811c2695ffe650 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Fri, 12 Dec 2025 16:28:54 +0000 Subject: [PATCH] [Frontend] Fixes anthropic streaming message_start usage nesting (#30266) Signed-off-by: bbartels --- tests/entrypoints/openai/test_messages.py | 9 ++++++--- vllm/entrypoints/anthropic/serving_messages.py | 12 ++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/entrypoints/openai/test_messages.py b/tests/entrypoints/openai/test_messages.py index b804a1a7a841a..8de6c4cb6c887 100644 --- a/tests/entrypoints/openai/test_messages.py +++ b/tests/entrypoints/openai/test_messages.py @@ -79,9 +79,12 @@ async def test_anthropic_streaming(client: anthropic.AsyncAnthropic): assert chunk_count > 0 assert first_chunk is not None, "message_start chunk was never observed" - assert first_chunk.usage is not None, "first chunk should include usage stats" - assert first_chunk.usage["output_tokens"] == 0 - assert first_chunk.usage["input_tokens"] > 5 + assert first_chunk.message is not None, "first chunk should include message" + assert first_chunk.message.usage is not None, ( + "first chunk should include usage stats" + ) + assert first_chunk.message.usage.output_tokens == 0 + assert first_chunk.message.usage.input_tokens > 5 @pytest.mark.asyncio diff --git a/vllm/entrypoints/anthropic/serving_messages.py b/vllm/entrypoints/anthropic/serving_messages.py index e7ea3bb59ca70..25c2d88a2c7a4 100644 --- a/vllm/entrypoints/anthropic/serving_messages.py +++ b/vllm/entrypoints/anthropic/serving_messages.py @@ -324,12 +324,12 @@ class AnthropicServingMessages(OpenAIServingChat): id=origin_chunk.id, content=[], model=origin_chunk.model, - ), - usage=AnthropicUsage( - input_tokens=origin_chunk.usage.prompt_tokens - if origin_chunk.usage - else 0, - output_tokens=0, + usage=AnthropicUsage( + input_tokens=origin_chunk.usage.prompt_tokens + if origin_chunk.usage + else 0, + output_tokens=0, + ), ), ) first_item = False