mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:06:06 +08:00
[Misc][Tools][Benchmark] Add benchmark_serving supports for llama.cpp. (#18692)
Signed-off-by: Duyi-Wang <duyi.wang@intel.com>
This commit is contained in:
parent
f8977c233f
commit
b169d5f7b6
@ -324,7 +324,7 @@ async def async_request_openai_completions(
|
|||||||
|
|
||||||
most_recent_timestamp = timestamp
|
most_recent_timestamp = timestamp
|
||||||
generated_text += text or ""
|
generated_text += text or ""
|
||||||
elif usage := data.get("usage"):
|
if usage := data.get("usage"):
|
||||||
output.output_tokens = usage.get("completion_tokens")
|
output.output_tokens = usage.get("completion_tokens")
|
||||||
if first_chunk_received:
|
if first_chunk_received:
|
||||||
output.success = True
|
output.success = True
|
||||||
@ -611,6 +611,7 @@ ASYNC_REQUEST_FUNCS = {
|
|||||||
"tensorrt-llm": async_request_trt_llm,
|
"tensorrt-llm": async_request_trt_llm,
|
||||||
"scalellm": async_request_openai_completions,
|
"scalellm": async_request_openai_completions,
|
||||||
"sglang": async_request_openai_completions,
|
"sglang": async_request_openai_completions,
|
||||||
|
"llama.cpp": async_request_openai_completions,
|
||||||
}
|
}
|
||||||
|
|
||||||
OPENAI_COMPATIBLE_BACKENDS = [
|
OPENAI_COMPATIBLE_BACKENDS = [
|
||||||
|
|||||||
@ -762,6 +762,10 @@ def main(args: argparse.Namespace):
|
|||||||
if "temperature" not in sampling_params:
|
if "temperature" not in sampling_params:
|
||||||
sampling_params["temperature"] = 0.0 # Default to greedy decoding.
|
sampling_params["temperature"] = 0.0 # Default to greedy decoding.
|
||||||
|
|
||||||
|
if args.backend == "llama.cpp":
|
||||||
|
# Disable prompt caching in llama.cpp backend
|
||||||
|
sampling_params["cache_prompt"] = False
|
||||||
|
|
||||||
# Avoid GC processing "static" data - reduce pause times.
|
# Avoid GC processing "static" data - reduce pause times.
|
||||||
gc.collect()
|
gc.collect()
|
||||||
gc.freeze()
|
gc.freeze()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user