[CI] Only capture a single CUDA graph size in CI by default (#25951)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-10-01 10:03:44 +01:00 committed by GitHub
parent 1405f0c7ba
commit a332b84578
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -731,6 +731,9 @@ class VllmRunner:
init_ctx = (nullcontext() if default_torch_num_threads is None else
set_default_torch_num_threads(default_torch_num_threads))
if not kwargs.get("compilation_config", None):
kwargs["compilation_config"] = {"cudagraph_capture_sizes": [8]}
with init_ctx:
self.llm = LLM(
model=model_name,