[Bugfix][ROCm] Fix load issue on deepseek quark quantization when shared expert enabled (#31261)

Signed-off-by: ganyi <ygan@amd.com>
This commit is contained in:
Pleaplusone 2025-12-24 16:47:44 +08:00 committed by GitHub
parent ddfac7034e
commit b41aeb3468
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1598,7 +1598,11 @@ class DeepseekV2ForCausalLM(
# Determine split axis based on op type
# gate/up: ColumnParallel → split along dim 0
# down: RowParallel → split along dim 1
split_dim = 1 if "down_proj.weight" in name else 0
split_dim = (
1
if ("down_proj.weight" in name and loaded_weight.ndim > 1)
else 0
)
total = loaded_weight.shape[split_dim]
assert total % num_chunks == 0, (
f"Shared expert weight dim {total} "
@ -1611,14 +1615,13 @@ class DeepseekV2ForCausalLM(
weight_to_load = loaded_weight
if is_fusion_moe_shared_experts_layer:
if split_dim == 0:
weight_to_load = loaded_weight[
j * chunk_size : (j + 1) * chunk_size, :
]
chunk_slice = slice(j * chunk_size, (j + 1) * chunk_size)
if loaded_weight.ndim == 1:
weight_to_load = loaded_weight[chunk_slice]
elif split_dim == 0:
weight_to_load = loaded_weight[chunk_slice, :]
else:
weight_to_load = loaded_weight[
:, j * chunk_size : (j + 1) * chunk_size
]
weight_to_load = loaded_weight[:, chunk_slice]
# Synthesize an expert-style name so expert mapping
# can route it
chunk_name = name.replace(