[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
2025-12-10 03:15:20 +08:00 · 2025-11-06 17:53:30 -05:00 · 2025-11-06 17:53:30 -05:00 · ca6f755d24
commit ca6f755d24
parent ca90f50304
1 changed files with 3 additions and 3 deletions
--- a/vllm/lora/layers/fused_moe.py
+++ b/vllm/lora/layers/fused_moe.py
@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
    modular_triton_fused_moe,
    try_get_optimal_moe_config,
 )
+from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod


 class FusedMoEWithLoRA(BaseLayerWithLoRA):
@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA):
            self.base_layer, fused_experts.moe_sum
        )

-        self.base_layer.quant_method.old_fused_experts = (
-            self.base_layer.quant_method.fused_experts
+        self.base_layer.quant_method = FusedMoEModularMethod(
+            self.base_layer.quant_method, m_fused_moe_fn
        )
-        self.base_layer.quant_method.fused_experts = m_fused_moe_fn

    def create_lora_weights(
        self,