mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 08:04:58 +08:00
[Frontend] Fixes anthropic /v1/messages streaming not containing input_tokens on first chunk (#29971)
Signed-off-by: bbartels <benjamin@bartels.dev>
This commit is contained in:
parent
28097d5638
commit
fca3f46658
@ -69,9 +69,20 @@ async def test_anthropic_streaming(client: anthropic.AsyncAnthropic):
|
||||
stream=True,
|
||||
)
|
||||
|
||||
first_chunk = None
|
||||
chunk_count = 0
|
||||
async for chunk in resp:
|
||||
chunk_count += 1
|
||||
if first_chunk is None and chunk.type == "message_start":
|
||||
first_chunk = chunk
|
||||
print(chunk.model_dump_json())
|
||||
|
||||
assert chunk_count > 0
|
||||
assert first_chunk is not None, "message_start chunk was never observed"
|
||||
assert first_chunk.usage is not None, "first chunk should include usage stats"
|
||||
assert first_chunk.usage["output_tokens"] == 0
|
||||
assert first_chunk.usage["input_tokens"] > 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
|
||||
|
||||
@ -183,7 +183,9 @@ class AnthropicServingMessages(OpenAIServingChat):
|
||||
|
||||
if anthropic_request.stream:
|
||||
req.stream = anthropic_request.stream
|
||||
req.stream_options = StreamOptions.validate({"include_usage": True})
|
||||
req.stream_options = StreamOptions.validate(
|
||||
{"include_usage": True, "continuous_usage_stats": True}
|
||||
)
|
||||
|
||||
if anthropic_request.tool_choice is None:
|
||||
req.tool_choice = None
|
||||
@ -323,6 +325,12 @@ class AnthropicServingMessages(OpenAIServingChat):
|
||||
content=[],
|
||||
model=origin_chunk.model,
|
||||
),
|
||||
usage=AnthropicUsage(
|
||||
input_tokens=origin_chunk.usage.prompt_tokens
|
||||
if origin_chunk.usage
|
||||
else 0,
|
||||
output_tokens=0,
|
||||
),
|
||||
)
|
||||
first_item = False
|
||||
data = chunk.model_dump_json(exclude_unset=True)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user