diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index a42e70170ba2..313ba819c87c 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -256,6 +256,7 @@ async def async_request_openai_completions( async with session.post(url=api_url, json=payload, headers=headers) as response: if response.status == 200: + first_valid_chunk_received = False async for chunk_bytes in response.content: chunk_bytes = chunk_bytes.strip() if not chunk_bytes: @@ -274,7 +275,8 @@ async def async_request_openai_completions( if data["choices"][0]["text"]: timestamp = time.perf_counter() # First token - if ttft == 0.0: + if not first_valid_chunk_received: + first_chunk_received = True ttft = time.perf_counter() - st output.ttft = ttft @@ -285,9 +287,14 @@ async def async_request_openai_completions( most_recent_timestamp = timestamp generated_text += data["choices"][0]["text"] - + if first_chunk_received: + output.success = True + else: + output.success = False + output.error = ( + "Never received a valid chunk to calculate TTFT." + "This response will be marked as failed!") output.generated_text = generated_text - output.success = True output.latency = latency else: output.error = response.reason or ""