diff --git a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py index bb1c70dc3895b..9c35d7d2fe120 100644 --- a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py @@ -143,7 +143,7 @@ class TritonOrDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): apply_router_weight_on_input: bool, ): use_deep_gemm = self.allow_deep_gemm and ( - _valid_deep_gemm(hidden_states, w1, w2) or is_deep_gemm_e8m0_used() + is_deep_gemm_e8m0_used() or _valid_deep_gemm(hidden_states, w1, w2) ) experts = self.deep_gemm_expert if use_deep_gemm else self.triton_expert