[CI Bugfix] Make sure TRTLLM attention is available in test_blackwell_moe (#26188)

Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-12-13 20:44:27 +08:00 · 2025-10-06 13:50:11 -04:00 · 2025-10-06 13:50:11 -04:00 · 20db99cc69
commit 20db99cc69
parent 6431be808f
1 changed files with 9 additions and 1 deletions
--- a/tests/quantization/test_blackwell_moe.py
+++ b/tests/quantization/test_blackwell_moe.py
@ -15,8 +15,16 @@ if not current_platform.is_device_capability(100):
        "This test only runs on Blackwell GPUs (SM100).", allow_module_level=True
    )
@pytest.fixture(scope="module", autouse=True)
 def set_test_environment():
    """Sets environment variables required for this test module."""
    # Make sure TRTLLM attention is available
    os.environ["VLLM_HAS_FLASHINFER_CUBIN"] = "1"
    # Set compilation threads to 16 to speed up startup
    os.environ["FLASHINFER_NVCC_THREADS"] = "16"
 # dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4,
 # "text_config": {"num_layers": 4, "num_hidden_layers": 4}}
 dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4}