mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:04:53 +08:00
[Benchmark] Enable benchmark to run with encoding_format="bytes" (#27467)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
3567816932
commit
b7030d962b
@ -498,10 +498,17 @@ async def _run_pooling_request(
|
|||||||
async with session.post(url=api_url, headers=headers, json=payload) as response:
|
async with session.post(url=api_url, headers=headers, json=payload) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
output.ttft = output.latency = time.perf_counter() - st
|
output.ttft = output.latency = time.perf_counter() - st
|
||||||
data = await response.json()
|
|
||||||
|
if payload.get("encoding_format", "float") == "bytes":
|
||||||
|
metadata = json.loads(response.headers["metadata"])
|
||||||
|
usage = metadata.get("usage", {})
|
||||||
|
else:
|
||||||
|
data = await response.json()
|
||||||
|
usage = data.get("usage", {})
|
||||||
|
|
||||||
output.success = True
|
output.success = True
|
||||||
output.generated_text = ""
|
output.generated_text = ""
|
||||||
output.prompt_len = data.get("usage", {}).get("prompt_tokens", 0)
|
output.prompt_len = usage.get("prompt_tokens", 0)
|
||||||
else:
|
else:
|
||||||
output.success = False
|
output.success = False
|
||||||
output.error = response.reason or ""
|
output.error = response.reason or ""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user