[Bugfix][ROCm] Fix load issue on deepseek quark quantization when shared expert enabled (#31261)

Signed-off-by: ganyi <ygan@amd.com>
2026-03-16 08:27:07 +08:00 · 2025-12-24 16:47:44 +08:00 · 2025-12-24 16:47:44 +08:00 · b41aeb3468
commit b41aeb3468
parent ddfac7034e
1 changed files with 11 additions and 8 deletions
--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@ -1598,7 +1598,11 @@ class DeepseekV2ForCausalLM(
                    # Determine split axis based on op type
                    # gate/up: ColumnParallel → split along dim 0
                    # down: RowParallel → split along dim 1
-                    split_dim = 1 if "down_proj.weight" in name else 0
+                    split_dim = (
+                        1
+                        if ("down_proj.weight" in name and loaded_weight.ndim > 1)
+                        else 0
+                    )
                    total = loaded_weight.shape[split_dim]
                    assert total % num_chunks == 0, (
                        f"Shared expert weight dim {total} "
@ -1611,14 +1615,13 @@ class DeepseekV2ForCausalLM(
                    weight_to_load = loaded_weight

                    if is_fusion_moe_shared_experts_layer:
-                        if split_dim == 0:
-                            weight_to_load = loaded_weight[
-                                j * chunk_size : (j + 1) * chunk_size, :
-                            ]
+                        chunk_slice = slice(j * chunk_size, (j + 1) * chunk_size)
+                        if loaded_weight.ndim == 1:
+                            weight_to_load = loaded_weight[chunk_slice]
+                        elif split_dim == 0:
+                            weight_to_load = loaded_weight[chunk_slice, :]
                        else:
-                            weight_to_load = loaded_weight[
-                                :, j * chunk_size : (j + 1) * chunk_size
-                            ]
+                            weight_to_load = loaded_weight[:, chunk_slice]
                        # Synthesize an expert-style name so expert mapping
                        # can route it
                        chunk_name = name.replace(