mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 13:47:00 +08:00
[Frontend] Fixes anthropic streaming message_start usage nesting (#30266)
Signed-off-by: bbartels <benjamin@bartels.dev>
This commit is contained in:
parent
9c0ee995a8
commit
f3237f3f6b
@ -79,9 +79,12 @@ async def test_anthropic_streaming(client: anthropic.AsyncAnthropic):
|
||||
|
||||
assert chunk_count > 0
|
||||
assert first_chunk is not None, "message_start chunk was never observed"
|
||||
assert first_chunk.usage is not None, "first chunk should include usage stats"
|
||||
assert first_chunk.usage["output_tokens"] == 0
|
||||
assert first_chunk.usage["input_tokens"] > 5
|
||||
assert first_chunk.message is not None, "first chunk should include message"
|
||||
assert first_chunk.message.usage is not None, (
|
||||
"first chunk should include usage stats"
|
||||
)
|
||||
assert first_chunk.message.usage.output_tokens == 0
|
||||
assert first_chunk.message.usage.input_tokens > 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -324,12 +324,12 @@ class AnthropicServingMessages(OpenAIServingChat):
|
||||
id=origin_chunk.id,
|
||||
content=[],
|
||||
model=origin_chunk.model,
|
||||
),
|
||||
usage=AnthropicUsage(
|
||||
input_tokens=origin_chunk.usage.prompt_tokens
|
||||
if origin_chunk.usage
|
||||
else 0,
|
||||
output_tokens=0,
|
||||
usage=AnthropicUsage(
|
||||
input_tokens=origin_chunk.usage.prompt_tokens
|
||||
if origin_chunk.usage
|
||||
else 0,
|
||||
output_tokens=0,
|
||||
),
|
||||
),
|
||||
)
|
||||
first_item = False
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user