mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 20:44:27 +08:00
[CI Bugfix] Make sure TRTLLM attention is available in test_blackwell_moe (#26188)
Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
6431be808f
commit
20db99cc69
@ -15,8 +15,16 @@ if not current_platform.is_device_capability(100):
|
|||||||
"This test only runs on Blackwell GPUs (SM100).", allow_module_level=True
|
"This test only runs on Blackwell GPUs (SM100).", allow_module_level=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module", autouse=True)
|
||||||
|
def set_test_environment():
|
||||||
|
"""Sets environment variables required for this test module."""
|
||||||
|
# Make sure TRTLLM attention is available
|
||||||
|
os.environ["VLLM_HAS_FLASHINFER_CUBIN"] = "1"
|
||||||
|
# Set compilation threads to 16 to speed up startup
|
||||||
os.environ["FLASHINFER_NVCC_THREADS"] = "16"
|
os.environ["FLASHINFER_NVCC_THREADS"] = "16"
|
||||||
|
|
||||||
|
|
||||||
# dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4,
|
# dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4,
|
||||||
# "text_config": {"num_layers": 4, "num_hidden_layers": 4}}
|
# "text_config": {"num_layers": 4, "num_hidden_layers": 4}}
|
||||||
dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4}
|
dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user