From de509ae8eb4467bd789234d909e5c725b8fba326 Mon Sep 17 00:00:00 2001
From: Kaixi Hou <kaixih@nvidia.com>
Date: Sat, 26 Jul 2025 07:10:36 -0700
Subject: [PATCH] [NVIDIA] Explicitly disable shuffled weights for flashinfer
 blockscale moe fp8 kernels (#21411)

Signed-off-by: kaixih <kaixih@nvidia.com>
---
 vllm/model_executor/layers/fused_moe/fused_moe.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index c412f695ae766..1985e8612da35 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1127,6 +1127,7 @@ def flashinfer_fused_moe_blockscale_fp8(
         tile_tokens_dim=_get_tile_tokens_dim(x.shape[0], top_k,
                                              global_num_experts),
         routing_method_type=2,  # DeepSeek-styled routing method
+        use_shuffled_weight=False,
     )