[Fix] Benchmark "EngineClient" has no attribute "model_config" (#17976)

Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>
This commit is contained in:
Brayden Zhong 2025-05-12 01:55:53 -04:00 committed by GitHub
parent 430783018c
commit 891b9d33de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 5 deletions

View File

@ -146,9 +146,10 @@ async def run_vllm_async(
async with build_async_engine_client_from_engine_args(
engine_args, disable_frontend_multiprocessing) as llm:
model_config = await llm.get_model_config()
assert all(
llm.model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
for request in requests), (
"Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests.")
@ -599,7 +600,7 @@ if __name__ == "__main__":
"--lora-path",
type=str,
default=None,
help="Path to the lora adapters to use. This can be an absolute path, "
help="Path to the LoRA adapters to use. This can be an absolute path, "
"a relative path, or a Hugging Face model identifier.")
parser.add_argument(
"--prefix-len",

View File

@ -148,9 +148,10 @@ async def run_vllm_async(
async with build_async_engine_client_from_engine_args(
engine_args, disable_frontend_multiprocessing) as llm:
model_config = await llm.get_model_config()
assert all(
llm.model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
for request in requests), (
"Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests.")