From ca6f755d2416bb1bd1b924b40481a41b2b0eaf85 Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Thu, 6 Nov 2025 17:53:30 -0500 Subject: [PATCH] [BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237) Signed-off-by: Varun Sundar Rabindranath Co-authored-by: Varun Sundar Rabindranath --- vllm/lora/layers/fused_moe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/lora/layers/fused_moe.py b/vllm/lora/layers/fused_moe.py index f5a766dd5e45..dadb9e25ba2f 100644 --- a/vllm/lora/layers/fused_moe.py +++ b/vllm/lora/layers/fused_moe.py @@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import ( modular_triton_fused_moe, try_get_optimal_moe_config, ) +from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod class FusedMoEWithLoRA(BaseLayerWithLoRA): @@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA): self.base_layer, fused_experts.moe_sum ) - self.base_layer.quant_method.old_fused_experts = ( - self.base_layer.quant_method.fused_experts + self.base_layer.quant_method = FusedMoEModularMethod( + self.base_layer.quant_method, m_fused_moe_fn ) - self.base_layer.quant_method.fused_experts = m_fused_moe_fn def create_lora_weights( self,