Fix torch version check for SM100 mxfp4 (#22535)

Signed-off-by: Zifei Tong <zifeitong@gmail.com> Signed-off-by: mgoin <mgoin64@gmail.com> Co-authored-by: mgoin <mgoin64@gmail.com>
2026-08-01 06:07:54 +08:00 · 2025-08-12 12:54:42 -07:00 · 2025-08-12 12:54:42 -07:00 · 6534d2fc97
commit 6534d2fc97
parent 422f22e012
1 changed files with 8 additions and 6 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -741,12 +741,14 @@ class FusedMoE(torch.nn.Module):
        # we padding globally so EP buffer allocation works
        if quant_config and quant_config.get_name() == "mxfp4":
-            if not is_torch_equal_or_newer("2.8.0"):
+            if not current_platform.is_device_capability(100):
-                raise RuntimeError("Mxfp4 on hopper requires torch >= 2.8.0")
+                if not is_torch_equal_or_newer("2.8.0"):
-            if current_platform.is_device_capability(
+                    raise RuntimeError(
-                    90) and not has_triton_kernels():
+                        "Mxfp4 on non-blackwell requires torch >= 2.8.0")
-                raise NotImplementedError(
+                if not has_triton_kernels():
-                    "Triton kernels must be installed for mxfp4 on hopper")
+                    raise NotImplementedError(
                        "triton_kernels must be installed for "
                        "mxfp4 on non-blackwell")
            if (current_platform.is_rocm()
                    or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8
                    or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_BF16):