From 8f36850f732394648e36770bd6dfc325748d1104 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 7 Oct 2025 13:50:30 -0400 Subject: [PATCH] [Bug] Fix Shape Validation for Fallback while Enabling E8M0 for DeepGEMM (#26322) Signed-off-by: yewentao256 --- vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py index bb1c70dc3895b..9c35d7d2fe120 100644 --- a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py @@ -143,7 +143,7 @@ class TritonOrDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): apply_router_weight_on_input: bool, ): use_deep_gemm = self.allow_deep_gemm and ( - _valid_deep_gemm(hidden_states, w1, w2) or is_deep_gemm_e8m0_used() + is_deep_gemm_e8m0_used() or _valid_deep_gemm(hidden_states, w1, w2) ) experts = self.deep_gemm_expert if use_deep_gemm else self.triton_expert