From 0b99f5d3023a84f120e6af7df355824e0f39af93 Mon Sep 17 00:00:00 2001 From: XiaobingZhang Date: Thu, 16 Oct 2025 03:06:47 +0800 Subject: [PATCH] support flashinfer_fp4 moe for 5090 gpu (#26669) Signed-off-by: XiaobingSuper Signed-off-by: Michael Goin Co-authored-by: Michael Goin --- .../layers/quantization/utils/flashinfer_fp4_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py index ddb74a27dc122..5ce0188b60aed 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py @@ -32,7 +32,7 @@ def is_flashinfer_fp4_cutlass_moe_available() -> bool: envs.VLLM_USE_FLASHINFER_MOE_FP4 and has_flashinfer_cutlass_fused_moe() and current_platform.is_cuda() - and current_platform.is_device_capability(100) + and current_platform.has_device_capability(100) )