[CI] Only capture a single CUDA graph size in CI by default (#25951)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-01-21 02:54:27 +08:00 · 2025-10-01 10:03:44 +01:00 · 2025-10-01 10:03:44 +01:00 · e8773e620f
commit e8773e620f
parent 63c56cbb25
1 changed files with 3 additions and 0 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -731,6 +731,9 @@ class VllmRunner:
        init_ctx = (nullcontext() if default_torch_num_threads is None else
                    set_default_torch_num_threads(default_torch_num_threads))

+        if not kwargs.get("compilation_config", None):
+            kwargs["compilation_config"] = {"cudagraph_capture_sizes": [8]}
+
        with init_ctx:
            self.llm = LLM(
                model=model_name,