[Bugfix] fix missing last itl in openai completions benchmark (#5926)

2025-12-11 14:56:16 +08:00 · 2024-06-28 22:34:42 -04:00 · 2024-06-28 22:34:42 -04:00 · c4bca740e8
commit c4bca740e8
parent 7f83f40dee
1 changed files with 5 additions and 6 deletions
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@ -265,6 +265,9 @@ async def async_request_openai_completions(
                        else:
                            data = json.loads(chunk)
                            # NOTE: Some completion API might have a last
                            # usage summary response without a token so we
                            # want to check a token was generated
                            if data["choices"][0]["text"]:
                                timestamp = time.perf_counter()
                                # First token
@ -273,12 +276,8 @@ async def async_request_openai_completions(
                                    output.ttft = ttft
                                # Decoding phase
-                                # NOTE: Some completion API might have a last
+                                output.itl.append(timestamp -
-                                # usage summary response without a token so we
+                                                  most_recent_timestamp)
                                # do not want to include as inter-token-latency
                                elif data.get("usage", None) is None:
                                    output.itl.append(timestamp -
                                                      most_recent_timestamp)
                                most_recent_timestamp = timestamp
                                generated_text += data["choices"][0]["text"]