[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
Varun Sundar Rabindranath 2025-11-06 17:53:30 -05:00 committed by GitHub
parent ca90f50304
commit ca6f755d24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
modular_triton_fused_moe, modular_triton_fused_moe,
try_get_optimal_moe_config, try_get_optimal_moe_config,
) )
from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod
class FusedMoEWithLoRA(BaseLayerWithLoRA): class FusedMoEWithLoRA(BaseLayerWithLoRA):
@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA):
self.base_layer, fused_experts.moe_sum self.base_layer, fused_experts.moe_sum
) )
self.base_layer.quant_method.old_fused_experts = ( self.base_layer.quant_method = FusedMoEModularMethod(
self.base_layer.quant_method.fused_experts self.base_layer.quant_method, m_fused_moe_fn
) )
self.base_layer.quant_method.fused_experts = m_fused_moe_fn
def create_lora_weights( def create_lora_weights(
self, self,