[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
Varun Sundar Rabindranath 2025-11-06 17:53:30 -05:00 committed by GitHub
parent ca90f50304
commit ca6f755d24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
modular_triton_fused_moe,
try_get_optimal_moe_config,
)
from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod
class FusedMoEWithLoRA(BaseLayerWithLoRA):
@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA):
self.base_layer, fused_experts.moe_sum
)
self.base_layer.quant_method.old_fused_experts = (
self.base_layer.quant_method.fused_experts
self.base_layer.quant_method = FusedMoEModularMethod(
self.base_layer.quant_method, m_fused_moe_fn
)
self.base_layer.quant_method.fused_experts = m_fused_moe_fn
def create_lora_weights(
self,