From d3c84297c38c79e2da6cd0a4ba8be58d95d13d97 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Mon, 6 Oct 2025 04:35:37 +0200 Subject: [PATCH] [CI] Add comment about the single cudagraph capture size that is used (#26252) --- tests/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 3ff3e061cdf31..c03fd84ade1e6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -740,6 +740,10 @@ class VllmRunner: ) if not kwargs.get("compilation_config", None): + # Note(@tdoublep): This is set to 4 because some tests (e.g., hybrid + # model tests) may set max_num_seqs=4. If min cudagraph_capture_size is + # set to larger than max_num_seqs, then it will lead to *no* graphs + # being captured which can trigger edge cases that we don't handle yet. kwargs["compilation_config"] = {"cudagraph_capture_sizes": [4]} with init_ctx: