mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 19:46:42 +08:00
[Bugfix] Fix qwen-moe packed_modules_mapping (#26634)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
9d6cff3ede
commit
f0a30a067b
@ -325,7 +325,7 @@ class SupportsLoRA(Protocol):
|
|||||||
# are empty by default.
|
# are empty by default.
|
||||||
embedding_modules: ClassVar[dict[str, str]] = {}
|
embedding_modules: ClassVar[dict[str, str]] = {}
|
||||||
embedding_padding_modules: ClassVar[list[str]] = []
|
embedding_padding_modules: ClassVar[list[str]] = []
|
||||||
packed_modules_mapping: ClassVar[dict[str, list[str]]] = {}
|
packed_modules_mapping: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
|
||||||
# We can't use runtime_checkable with ClassVar for issubclass checks
|
# We can't use runtime_checkable with ClassVar for issubclass checks
|
||||||
|
|||||||
@ -534,11 +534,7 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
|
|||||||
"q_proj",
|
"q_proj",
|
||||||
"k_proj",
|
"k_proj",
|
||||||
"v_proj",
|
"v_proj",
|
||||||
],
|
]
|
||||||
"gate_up_proj": [
|
|
||||||
"gate_proj",
|
|
||||||
"up_proj",
|
|
||||||
],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
@ -547,6 +543,18 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
|
|||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
self.config = config
|
self.config = config
|
||||||
self.quant_config = quant_config
|
self.quant_config = quant_config
|
||||||
|
# Only perform the following mapping when Qwen2MoeMLP exists
|
||||||
|
if (
|
||||||
|
getattr(config, "mlp_only_layers", [])
|
||||||
|
or config.shared_expert_intermediate_size > 0
|
||||||
|
):
|
||||||
|
self.packed_modules_mapping["gate_up_proj"] = (
|
||||||
|
[
|
||||||
|
"gate_proj",
|
||||||
|
"up_proj",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
self.model = Qwen2MoeModel(
|
self.model = Qwen2MoeModel(
|
||||||
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
|
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
|
||||||
)
|
)
|
||||||
|
|||||||
@ -634,11 +634,7 @@ class Qwen3MoeForCausalLM(
|
|||||||
"q_proj",
|
"q_proj",
|
||||||
"k_proj",
|
"k_proj",
|
||||||
"v_proj",
|
"v_proj",
|
||||||
],
|
]
|
||||||
"gate_up_proj": [
|
|
||||||
"gate_proj",
|
|
||||||
"up_proj",
|
|
||||||
],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fall_back_to_pt_during_load = False
|
fall_back_to_pt_during_load = False
|
||||||
@ -649,6 +645,14 @@ class Qwen3MoeForCausalLM(
|
|||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
self.config = config
|
self.config = config
|
||||||
self.quant_config = quant_config
|
self.quant_config = quant_config
|
||||||
|
# Only perform the following mapping when Qwen3MoeMLP exists
|
||||||
|
if getattr(config, "mlp_only_layers", []):
|
||||||
|
self.packed_modules_mapping["gate_up_proj"] = (
|
||||||
|
[
|
||||||
|
"gate_proj",
|
||||||
|
"up_proj",
|
||||||
|
],
|
||||||
|
)
|
||||||
self.model = Qwen3MoeModel(
|
self.model = Qwen3MoeModel(
|
||||||
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
|
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user