From 20db99cc692afcb2f5397a9805acb259c2564d28 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Mon, 6 Oct 2025 13:50:11 -0400
Subject: [PATCH] [CI Bugfix] Make sure TRTLLM attention is available in
 test_blackwell_moe (#26188)

Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 tests/quantization/test_blackwell_moe.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/quantization/test_blackwell_moe.py b/tests/quantization/test_blackwell_moe.py
index 218763bc627dc..4a0f701ae3cb3 100644
--- a/tests/quantization/test_blackwell_moe.py
+++ b/tests/quantization/test_blackwell_moe.py
@@ -15,7 +15,15 @@ if not current_platform.is_device_capability(100):
         "This test only runs on Blackwell GPUs (SM100).", allow_module_level=True
     )
 
-os.environ["FLASHINFER_NVCC_THREADS"] = "16"
+
+@pytest.fixture(scope="module", autouse=True)
+def set_test_environment():
+    """Sets environment variables required for this test module."""
+    # Make sure TRTLLM attention is available
+    os.environ["VLLM_HAS_FLASHINFER_CUBIN"] = "1"
+    # Set compilation threads to 16 to speed up startup
+    os.environ["FLASHINFER_NVCC_THREADS"] = "16"
+
 
 # dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4,
 # "text_config": {"num_layers": 4, "num_hidden_layers": 4}}