[Model] add Hunyuan V1 Dense Model support. (#21368)

Signed-off-by: Asher Zhang <asherszhang@tencent.com>
This commit is contained in:
Asher 2025-07-23 18:54:08 +08:00 committed by GitHub
parent 2cc5016a19
commit 2671334d45
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 57 additions and 19 deletions

View File

@ -363,6 +363,7 @@ th {
| `GraniteMoeSharedForCausalLM` | Granite MoE Shared | `ibm-research/moe-7b-1b-active-shared-experts` (test model) | ✅︎ | ✅︎ | ✅︎ |
| `GritLM` | GritLM | `parasail-ai/GritLM-7B-vllm`. | ✅︎ | ✅︎ | |
| `Grok1ModelForCausalLM` | Grok1 | `hpcai-tech/grok-1`. | ✅︎ | ✅︎ | ✅︎ |
| `HunYuanDenseV1ForCausalLM` | Hunyuan-7B-Instruct-0124 | `tencent/Hunyuan-7B-Instruct-0124` | ✅︎ | | ✅︎ |
| `HunYuanMoEV1ForCausalLM` | Hunyuan-80B-A13B | `tencent/Hunyuan-A13B-Instruct`, `tencent/Hunyuan-A13B-Pretrain`, `tencent/Hunyuan-A13B-Instruct-FP8`, etc. | ✅︎ | | ✅︎ |
| `InternLMForCausalLM` | InternLM | `internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `InternLM2ForCausalLM` | InternLM2 | `internlm/internlm2-7b`, `internlm/internlm2-chat-7b`, etc. | ✅︎ | ✅︎ | ✅︎ |

View File

@ -199,6 +199,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code=True),
"HunYuanMoEV1ForCausalLM": _HfExamplesInfo("tencent/Hunyuan-A13B-Instruct",
trust_remote_code=True),
"HunYuanDenseV1ForCausalLM":_HfExamplesInfo("tencent/Hunyuan-7B-Instruct-0124",
trust_remote_code=True),
"InternLMForCausalLM": _HfExamplesInfo("internlm/internlm-chat-7b",
trust_remote_code=True),
"InternLM2ForCausalLM": _HfExamplesInfo("internlm/internlm2-chat-7b",

View File

@ -61,6 +61,19 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter,
make_layers)
def _is_moe(config: PretrainedConfig) -> bool:
num_experts = getattr(config, "num_experts", None)
if isinstance(num_experts, int):
return num_experts > 1
if isinstance(num_experts, list) and num_experts:
# Ensure all elements are integers before calling max.
if all(isinstance(e, int) for e in num_experts):
return max(num_experts) > 1
else:
return False
return False
def _get_cla_factor(config: PretrainedConfig) -> int:
if not getattr(config, "use_cla", False):
return 1
@ -140,8 +153,8 @@ class HunYuanAttention(nn.Module):
# the KV heads across multiple tensor parallel GPUs.
assert tp_size % self.total_num_kv_heads == 0
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
# MistralConfig has an optional head_dim introduced by Mistral-Nemo
if hasattr(config, "head_dim"):
if hasattr(config, "head_dim") and config.head_dim:
self.head_dim = config.head_dim
elif hasattr(config, "attention_head_dim"):
self.head_dim = config.attention_head_dim
@ -490,12 +503,23 @@ class HunYuanDecoderLayer(nn.Module):
else:
raise RuntimeError(f"Unsupported attention type: {attention_type}")
self.mlp = HunYuanSparseMoeBlock(
config=config,
quant_config=quant_config,
layer_id=layer_id,
prefix=f"{prefix}.mlp",
)
if _is_moe(config):
self.mlp = HunYuanSparseMoeBlock(
config=config,
quant_config=quant_config,
layer_id=layer_id,
prefix=f"{prefix}.mlp",
)
else:
self.mlp = HunYuanMLP(
hidden_size=self.hidden_size,
intermediate_size=self.intermediate_size,
hidden_act=config.hidden_act,
quant_config=quant_config,
bias=getattr(config, "mlp_bias", False),
prefix=f"{prefix}.mlp",
)
self.input_layernorm = RMSNorm(config.hidden_size,
eps=config.rms_norm_eps)
self.post_attention_layernorm = RMSNorm(config.hidden_size,
@ -642,15 +666,17 @@ class HunYuanModel(nn.Module):
return torch.concat((q, k, v))
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return FusedMoE.make_expert_params_mapping(
ckpt_gate_proj_name="gate_proj",
ckpt_down_proj_name="down_proj",
ckpt_up_proj_name="up_proj",
num_experts=self.config.num_experts,
)
if _is_moe(self.config):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return FusedMoE.make_expert_params_mapping(
ckpt_gate_proj_name="gate_proj",
ckpt_down_proj_name="down_proj",
ckpt_up_proj_name="up_proj",
num_experts=self.config.num_experts,
)
else:
return []
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
cla_factor = _get_cla_factor(self.config)
@ -815,7 +841,7 @@ class HunYuanModel(nn.Module):
return loaded_params
class HunYuanMoEV1ForCausalLM(nn.Module, SupportsLoRA):
class HunYuanV1Base(nn.Module, SupportsLoRA):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
@ -901,3 +927,11 @@ class HunYuanMoEV1ForCausalLM(nn.Module, SupportsLoRA):
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
return self.model.get_expert_mapping()
class HunYuanDenseV1ForCausalLM(HunYuanV1Base):
pass
class HunYuanMoEV1ForCausalLM(HunYuanV1Base):
pass

View File

@ -79,7 +79,8 @@ _TEXT_GENERATION_MODELS = {
"GraniteMoeSharedForCausalLM": ("granitemoeshared", "GraniteMoeSharedForCausalLM"), # noqa: E501
"GritLM": ("gritlm", "GritLM"),
"Grok1ModelForCausalLM": ("grok1", "Grok1ForCausalLM"),
"HunYuanMoEV1ForCausalLM": ("hunyuan_v1_moe", "HunYuanMoEV1ForCausalLM"),
"HunYuanMoEV1ForCausalLM": ("hunyuan_v1", "HunYuanMoEV1ForCausalLM"),
"HunYuanDenseV1ForCausalLM": ("hunyuan_v1", "HunYuanDenseV1ForCausalLM"),
"InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
"InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
"InternLM2VEForCausalLM": ("internlm2_ve", "InternLM2VEForCausalLM"),