mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-02 10:44:30 +08:00
[Bugfix][ROCm] Fix load issue on deepseek quark quantization when shared expert enabled (#31261)
Signed-off-by: ganyi <ygan@amd.com>
This commit is contained in:
parent
ddfac7034e
commit
b41aeb3468
@ -1598,7 +1598,11 @@ class DeepseekV2ForCausalLM(
|
|||||||
# Determine split axis based on op type
|
# Determine split axis based on op type
|
||||||
# gate/up: ColumnParallel → split along dim 0
|
# gate/up: ColumnParallel → split along dim 0
|
||||||
# down: RowParallel → split along dim 1
|
# down: RowParallel → split along dim 1
|
||||||
split_dim = 1 if "down_proj.weight" in name else 0
|
split_dim = (
|
||||||
|
1
|
||||||
|
if ("down_proj.weight" in name and loaded_weight.ndim > 1)
|
||||||
|
else 0
|
||||||
|
)
|
||||||
total = loaded_weight.shape[split_dim]
|
total = loaded_weight.shape[split_dim]
|
||||||
assert total % num_chunks == 0, (
|
assert total % num_chunks == 0, (
|
||||||
f"Shared expert weight dim {total} "
|
f"Shared expert weight dim {total} "
|
||||||
@ -1611,14 +1615,13 @@ class DeepseekV2ForCausalLM(
|
|||||||
weight_to_load = loaded_weight
|
weight_to_load = loaded_weight
|
||||||
|
|
||||||
if is_fusion_moe_shared_experts_layer:
|
if is_fusion_moe_shared_experts_layer:
|
||||||
if split_dim == 0:
|
chunk_slice = slice(j * chunk_size, (j + 1) * chunk_size)
|
||||||
weight_to_load = loaded_weight[
|
if loaded_weight.ndim == 1:
|
||||||
j * chunk_size : (j + 1) * chunk_size, :
|
weight_to_load = loaded_weight[chunk_slice]
|
||||||
]
|
elif split_dim == 0:
|
||||||
|
weight_to_load = loaded_weight[chunk_slice, :]
|
||||||
else:
|
else:
|
||||||
weight_to_load = loaded_weight[
|
weight_to_load = loaded_weight[:, chunk_slice]
|
||||||
:, j * chunk_size : (j + 1) * chunk_size
|
|
||||||
]
|
|
||||||
# Synthesize an expert-style name so expert mapping
|
# Synthesize an expert-style name so expert mapping
|
||||||
# can route it
|
# can route it
|
||||||
chunk_name = name.replace(
|
chunk_name = name.replace(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user