mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:45:01 +08:00
[Bugfix] fix missing last itl in openai completions benchmark (#5926)
This commit is contained in:
parent
7f83f40dee
commit
c4bca740e8
@ -265,6 +265,9 @@ async def async_request_openai_completions(
|
||||
else:
|
||||
data = json.loads(chunk)
|
||||
|
||||
# NOTE: Some completion API might have a last
|
||||
# usage summary response without a token so we
|
||||
# want to check a token was generated
|
||||
if data["choices"][0]["text"]:
|
||||
timestamp = time.perf_counter()
|
||||
# First token
|
||||
@ -273,12 +276,8 @@ async def async_request_openai_completions(
|
||||
output.ttft = ttft
|
||||
|
||||
# Decoding phase
|
||||
# NOTE: Some completion API might have a last
|
||||
# usage summary response without a token so we
|
||||
# do not want to include as inter-token-latency
|
||||
elif data.get("usage", None) is None:
|
||||
output.itl.append(timestamp -
|
||||
most_recent_timestamp)
|
||||
output.itl.append(timestamp -
|
||||
most_recent_timestamp)
|
||||
|
||||
most_recent_timestamp = timestamp
|
||||
generated_text += data["choices"][0]["text"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user