From 7557a676558d7f1f302acdce7e2a301787908239 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Wed, 29 Oct 2025 20:26:12 +0000 Subject: [PATCH] precommit Signed-off-by: Tyler Michael Smith --- .../batched_triton_or_deep_gemm_moe.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py index 3da8a55e7eb55..36ffda62fdf75 100644 --- a/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py @@ -45,14 +45,17 @@ class BatchedTritonOrDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): # If deep gemm was requested but is not available (either due to # unsupported configuration or missing dependencies), check if # we should allow fallback to batched triton kernel - if deep_gemm_requested and self.batched_deep_gemm_experts is None: - if not envs.VLLM_ALLOW_BATCHED_TRITON_FALLBACK: - raise RuntimeError( - "DeepGemm was requested but is not available. " - "The batched triton kernel fallback is disabled by default. " - "Set VLLM_ALLOW_BATCHED_TRITON_FALLBACK=1 to enable the fallback " - "for debugging purposes." - ) + if ( + deep_gemm_requested + and self.batched_deep_gemm_experts is None + and not envs.VLLM_ALLOW_BATCHED_TRITON_FALLBACK + ): + raise RuntimeError( + "DeepGemm was requested but is not available. " + "The batched triton kernel fallback is disabled by default. " + "Set VLLM_ALLOW_BATCHED_TRITON_FALLBACK=1 to enable the fallback " + "for debugging purposes." + ) self.batched_triton_experts = ( BatchedTritonExperts(