mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 14:56:16 +08:00
[Bugfix] fix missing last itl in openai completions benchmark (#5926)
This commit is contained in:
parent
7f83f40dee
commit
c4bca740e8
@ -265,6 +265,9 @@ async def async_request_openai_completions(
|
|||||||
else:
|
else:
|
||||||
data = json.loads(chunk)
|
data = json.loads(chunk)
|
||||||
|
|
||||||
|
# NOTE: Some completion API might have a last
|
||||||
|
# usage summary response without a token so we
|
||||||
|
# want to check a token was generated
|
||||||
if data["choices"][0]["text"]:
|
if data["choices"][0]["text"]:
|
||||||
timestamp = time.perf_counter()
|
timestamp = time.perf_counter()
|
||||||
# First token
|
# First token
|
||||||
@ -273,12 +276,8 @@ async def async_request_openai_completions(
|
|||||||
output.ttft = ttft
|
output.ttft = ttft
|
||||||
|
|
||||||
# Decoding phase
|
# Decoding phase
|
||||||
# NOTE: Some completion API might have a last
|
output.itl.append(timestamp -
|
||||||
# usage summary response without a token so we
|
most_recent_timestamp)
|
||||||
# do not want to include as inter-token-latency
|
|
||||||
elif data.get("usage", None) is None:
|
|
||||||
output.itl.append(timestamp -
|
|
||||||
most_recent_timestamp)
|
|
||||||
|
|
||||||
most_recent_timestamp = timestamp
|
most_recent_timestamp = timestamp
|
||||||
generated_text += data["choices"][0]["text"]
|
generated_text += data["choices"][0]["text"]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user