mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 01:45:02 +08:00
[Frontend] Fixes anthropic /v1/messages streaming not containing input_tokens on first chunk (#29971)
Signed-off-by: bbartels <benjamin@bartels.dev>
This commit is contained in:
parent
28097d5638
commit
fca3f46658
@ -69,9 +69,20 @@ async def test_anthropic_streaming(client: anthropic.AsyncAnthropic):
|
|||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
first_chunk = None
|
||||||
|
chunk_count = 0
|
||||||
async for chunk in resp:
|
async for chunk in resp:
|
||||||
|
chunk_count += 1
|
||||||
|
if first_chunk is None and chunk.type == "message_start":
|
||||||
|
first_chunk = chunk
|
||||||
print(chunk.model_dump_json())
|
print(chunk.model_dump_json())
|
||||||
|
|
||||||
|
assert chunk_count > 0
|
||||||
|
assert first_chunk is not None, "message_start chunk was never observed"
|
||||||
|
assert first_chunk.usage is not None, "first chunk should include usage stats"
|
||||||
|
assert first_chunk.usage["output_tokens"] == 0
|
||||||
|
assert first_chunk.usage["input_tokens"] > 5
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
|
async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
|
||||||
|
|||||||
@ -183,7 +183,9 @@ class AnthropicServingMessages(OpenAIServingChat):
|
|||||||
|
|
||||||
if anthropic_request.stream:
|
if anthropic_request.stream:
|
||||||
req.stream = anthropic_request.stream
|
req.stream = anthropic_request.stream
|
||||||
req.stream_options = StreamOptions.validate({"include_usage": True})
|
req.stream_options = StreamOptions.validate(
|
||||||
|
{"include_usage": True, "continuous_usage_stats": True}
|
||||||
|
)
|
||||||
|
|
||||||
if anthropic_request.tool_choice is None:
|
if anthropic_request.tool_choice is None:
|
||||||
req.tool_choice = None
|
req.tool_choice = None
|
||||||
@ -323,6 +325,12 @@ class AnthropicServingMessages(OpenAIServingChat):
|
|||||||
content=[],
|
content=[],
|
||||||
model=origin_chunk.model,
|
model=origin_chunk.model,
|
||||||
),
|
),
|
||||||
|
usage=AnthropicUsage(
|
||||||
|
input_tokens=origin_chunk.usage.prompt_tokens
|
||||||
|
if origin_chunk.usage
|
||||||
|
else 0,
|
||||||
|
output_tokens=0,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
first_item = False
|
first_item = False
|
||||||
data = chunk.model_dump_json(exclude_unset=True)
|
data = chunk.model_dump_json(exclude_unset=True)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user