mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:34:58 +08:00
fix "Total generated tokens:" is 0 if using --backend tgi and --endpo… (#14673)
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
ffa443afed
commit
40828ce5fe
@ -63,7 +63,7 @@ async def async_request_tgi(
|
|||||||
"temperature": 0.01, # TGI does not accept 0.0 temperature.
|
"temperature": 0.01, # TGI does not accept 0.0 temperature.
|
||||||
"top_p": 0.99, # TGI does not accept 1.0 top_p.
|
"top_p": 0.99, # TGI does not accept 1.0 top_p.
|
||||||
"truncate": request_func_input.prompt_len,
|
"truncate": request_func_input.prompt_len,
|
||||||
# TGI does not accept ignore_eos flag.
|
"ignore_eos_token": request_func_input.ignore_eos,
|
||||||
}
|
}
|
||||||
payload = {
|
payload = {
|
||||||
"inputs": request_func_input.prompt,
|
"inputs": request_func_input.prompt,
|
||||||
@ -71,6 +71,10 @@ async def async_request_tgi(
|
|||||||
}
|
}
|
||||||
output = RequestFuncOutput()
|
output = RequestFuncOutput()
|
||||||
output.prompt_len = request_func_input.prompt_len
|
output.prompt_len = request_func_input.prompt_len
|
||||||
|
if request_func_input.ignore_eos:
|
||||||
|
output.output_tokens = request_func_input.output_len
|
||||||
|
else:
|
||||||
|
output.output_tokens = None
|
||||||
|
|
||||||
ttft = 0.0
|
ttft = 0.0
|
||||||
st = time.perf_counter()
|
st = time.perf_counter()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user