From 891b9d33de7ee7b3ee95b9bd7bb8a9cffae0e08c Mon Sep 17 00:00:00 2001
From: Brayden Zhong <b8zhong@uwaterloo.ca>
Date: Mon, 12 May 2025 01:55:53 -0400
Subject: [PATCH] [Fix] Benchmark `"EngineClient" has no attribute
 "model_config"` (#17976)

Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>
---
 benchmarks/benchmark_throughput.py | 7 ++++---
 vllm/benchmarks/throughput.py      | 5 +++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index 1f65277e1bfeb..cd6c76ad6096f 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -146,9 +146,10 @@ async def run_vllm_async(
 
     async with build_async_engine_client_from_engine_args(
             engine_args, disable_frontend_multiprocessing) as llm:
+        model_config = await llm.get_model_config()
         assert all(
-            llm.model_config.max_model_len >= (request.prompt_len +
-                                               request.expected_output_len)
+            model_config.max_model_len >= (request.prompt_len +
+                                           request.expected_output_len)
             for request in requests), (
                 "Please ensure that max_model_len is greater than the sum of"
                 " prompt_len and expected_output_len for all requests.")
@@ -599,7 +600,7 @@ if __name__ == "__main__":
         "--lora-path",
         type=str,
         default=None,
-        help="Path to the lora adapters to use. This can be an absolute path, "
+        help="Path to the LoRA adapters to use. This can be an absolute path, "
         "a relative path, or a Hugging Face model identifier.")
     parser.add_argument(
         "--prefix-len",
diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py
index b3e24911cc982..13110a8b4db3f 100644
--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@@ -148,9 +148,10 @@ async def run_vllm_async(
 
     async with build_async_engine_client_from_engine_args(
             engine_args, disable_frontend_multiprocessing) as llm:
+        model_config = await llm.get_model_config()
         assert all(
-            llm.model_config.max_model_len >= (request.prompt_len +
-                                               request.expected_output_len)
+            model_config.max_model_len >= (request.prompt_len +
+                                           request.expected_output_len)
             for request in requests), (
                 "Please ensure that max_model_len is greater than the sum of"
                 " prompt_len and expected_output_len for all requests.")