From 891b9d33de7ee7b3ee95b9bd7bb8a9cffae0e08c Mon Sep 17 00:00:00 2001 From: Brayden Zhong Date: Mon, 12 May 2025 01:55:53 -0400 Subject: [PATCH] [Fix] Benchmark `"EngineClient" has no attribute "model_config"` (#17976) Signed-off-by: Brayden Zhong --- benchmarks/benchmark_throughput.py | 7 ++++--- vllm/benchmarks/throughput.py | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index 1f65277e1bfeb..cd6c76ad6096f 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -146,9 +146,10 @@ async def run_vllm_async( async with build_async_engine_client_from_engine_args( engine_args, disable_frontend_multiprocessing) as llm: + model_config = await llm.get_model_config() assert all( - llm.model_config.max_model_len >= (request.prompt_len + - request.expected_output_len) + model_config.max_model_len >= (request.prompt_len + + request.expected_output_len) for request in requests), ( "Please ensure that max_model_len is greater than the sum of" " prompt_len and expected_output_len for all requests.") @@ -599,7 +600,7 @@ if __name__ == "__main__": "--lora-path", type=str, default=None, - help="Path to the lora adapters to use. This can be an absolute path, " + help="Path to the LoRA adapters to use. This can be an absolute path, " "a relative path, or a Hugging Face model identifier.") parser.add_argument( "--prefix-len", diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py index b3e24911cc982..13110a8b4db3f 100644 --- a/vllm/benchmarks/throughput.py +++ b/vllm/benchmarks/throughput.py @@ -148,9 +148,10 @@ async def run_vllm_async( async with build_async_engine_client_from_engine_args( engine_args, disable_frontend_multiprocessing) as llm: + model_config = await llm.get_model_config() assert all( - llm.model_config.max_model_len >= (request.prompt_len + - request.expected_output_len) + model_config.max_model_len >= (request.prompt_len + + request.expected_output_len) for request in requests), ( "Please ensure that max_model_len is greater than the sum of" " prompt_len and expected_output_len for all requests.")