mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 11:06:15 +08:00
[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
parent
ca90f50304
commit
ca6f755d24
@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
|
|||||||
modular_triton_fused_moe,
|
modular_triton_fused_moe,
|
||||||
try_get_optimal_moe_config,
|
try_get_optimal_moe_config,
|
||||||
)
|
)
|
||||||
|
from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod
|
||||||
|
|
||||||
|
|
||||||
class FusedMoEWithLoRA(BaseLayerWithLoRA):
|
class FusedMoEWithLoRA(BaseLayerWithLoRA):
|
||||||
@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA):
|
|||||||
self.base_layer, fused_experts.moe_sum
|
self.base_layer, fused_experts.moe_sum
|
||||||
)
|
)
|
||||||
|
|
||||||
self.base_layer.quant_method.old_fused_experts = (
|
self.base_layer.quant_method = FusedMoEModularMethod(
|
||||||
self.base_layer.quant_method.fused_experts
|
self.base_layer.quant_method, m_fused_moe_fn
|
||||||
)
|
)
|
||||||
self.base_layer.quant_method.fused_experts = m_fused_moe_fn
|
|
||||||
|
|
||||||
def create_lora_weights(
|
def create_lora_weights(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user