From b41aeb3468fecb33b7934fc7ca5406bdba84dace Mon Sep 17 00:00:00 2001 From: Pleaplusone Date: Wed, 24 Dec 2025 16:47:44 +0800 Subject: [PATCH] [Bugfix][ROCm] Fix load issue on deepseek quark quantization when shared expert enabled (#31261) Signed-off-by: ganyi --- vllm/model_executor/models/deepseek_v2.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 4899f5476f955..b22cdb6d6c80c 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -1598,7 +1598,11 @@ class DeepseekV2ForCausalLM( # Determine split axis based on op type # gate/up: ColumnParallel → split along dim 0 # down: RowParallel → split along dim 1 - split_dim = 1 if "down_proj.weight" in name else 0 + split_dim = ( + 1 + if ("down_proj.weight" in name and loaded_weight.ndim > 1) + else 0 + ) total = loaded_weight.shape[split_dim] assert total % num_chunks == 0, ( f"Shared expert weight dim {total} " @@ -1611,14 +1615,13 @@ class DeepseekV2ForCausalLM( weight_to_load = loaded_weight if is_fusion_moe_shared_experts_layer: - if split_dim == 0: - weight_to_load = loaded_weight[ - j * chunk_size : (j + 1) * chunk_size, : - ] + chunk_slice = slice(j * chunk_size, (j + 1) * chunk_size) + if loaded_weight.ndim == 1: + weight_to_load = loaded_weight[chunk_slice] + elif split_dim == 0: + weight_to_load = loaded_weight[chunk_slice, :] else: - weight_to_load = loaded_weight[ - :, j * chunk_size : (j + 1) * chunk_size - ] + weight_to_load = loaded_weight[:, chunk_slice] # Synthesize an expert-style name so expert mapping # can route it chunk_name = name.replace(