fix "Total generated tokens:" is 0 if using --backend tgi and --endpo… (#14673)

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2025-12-14 00:34:58 +08:00 · 2025-03-20 11:56:16 +08:00 · 2025-03-20 11:56:16 +08:00 · 40828ce5fe
commit 40828ce5fe
parent ffa443afed
1 changed files with 5 additions and 1 deletions
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@ -63,7 +63,7 @@ async def async_request_tgi(
            "temperature": 0.01,  # TGI does not accept 0.0 temperature.
            "top_p": 0.99,  # TGI does not accept 1.0 top_p.
            "truncate": request_func_input.prompt_len,
-            # TGI does not accept ignore_eos flag.
+            "ignore_eos_token": request_func_input.ignore_eos,
        }
        payload = {
            "inputs": request_func_input.prompt,
@ -71,6 +71,10 @@ async def async_request_tgi(
        }
        output = RequestFuncOutput()
        output.prompt_len = request_func_input.prompt_len
        if request_func_input.ignore_eos:
            output.output_tokens = request_func_input.output_len
        else:
            output.output_tokens = None
        ttft = 0.0
        st = time.perf_counter()