[Fix] Benchmark "EngineClient" has no attribute "model_config" (#17976)

Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>
2026-03-16 11:37:12 +08:00 · 2025-05-12 01:55:53 -04:00 · 2025-05-12 01:55:53 -04:00 · 891b9d33de
commit 891b9d33de
parent 430783018c
2 changed files with 7 additions and 5 deletions
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@ -146,9 +146,10 @@ async def run_vllm_async(

    async with build_async_engine_client_from_engine_args(
            engine_args, disable_frontend_multiprocessing) as llm:
+        model_config = await llm.get_model_config()
        assert all(
-            llm.model_config.max_model_len >= (request.prompt_len +
-                                               request.expected_output_len)
+            model_config.max_model_len >= (request.prompt_len +
+                                           request.expected_output_len)
            for request in requests), (
                "Please ensure that max_model_len is greater than the sum of"
                " prompt_len and expected_output_len for all requests.")
@ -599,7 +600,7 @@ if __name__ == "__main__":
        "--lora-path",
        type=str,
        default=None,
-        help="Path to the lora adapters to use. This can be an absolute path, "
+        help="Path to the LoRA adapters to use. This can be an absolute path, "
        "a relative path, or a Hugging Face model identifier.")
    parser.add_argument(
        "--prefix-len",
--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@ -148,9 +148,10 @@ async def run_vllm_async(

    async with build_async_engine_client_from_engine_args(
            engine_args, disable_frontend_multiprocessing) as llm:
+        model_config = await llm.get_model_config()
        assert all(
-            llm.model_config.max_model_len >= (request.prompt_len +
-                                               request.expected_output_len)
+            model_config.max_model_len >= (request.prompt_len +
+                                           request.expected_output_len)
            for request in requests), (
                "Please ensure that max_model_len is greater than the sum of"
                " prompt_len and expected_output_len for all requests.")