From 20db99cc692afcb2f5397a9805acb259c2564d28 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 6 Oct 2025 13:50:11 -0400 Subject: [PATCH] [CI Bugfix] Make sure TRTLLM attention is available in test_blackwell_moe (#26188) Signed-off-by: mgoin Signed-off-by: Michael Goin Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- tests/quantization/test_blackwell_moe.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/quantization/test_blackwell_moe.py b/tests/quantization/test_blackwell_moe.py index 218763bc627dc..4a0f701ae3cb3 100644 --- a/tests/quantization/test_blackwell_moe.py +++ b/tests/quantization/test_blackwell_moe.py @@ -15,7 +15,15 @@ if not current_platform.is_device_capability(100): "This test only runs on Blackwell GPUs (SM100).", allow_module_level=True ) -os.environ["FLASHINFER_NVCC_THREADS"] = "16" + +@pytest.fixture(scope="module", autouse=True) +def set_test_environment(): + """Sets environment variables required for this test module.""" + # Make sure TRTLLM attention is available + os.environ["VLLM_HAS_FLASHINFER_CUBIN"] = "1" + # Set compilation threads to 16 to speed up startup + os.environ["FLASHINFER_NVCC_THREADS"] = "16" + # dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4, # "text_config": {"num_layers": 4, "num_hidden_layers": 4}}