mirror of
https://git.datalinker.icu/deepseek-ai/DeepSeek-V3.git
synced 2026-04-24 00:07:24 +08:00
* support scale_fmt=ue8m0 * keep improving Signed-off-by: youkaichao <youkaichao@gmail.com> * keep improving Signed-off-by: youkaichao <youkaichao@gmail.com> * add clamp min of 1e-4 Signed-off-by: youkaichao <youkaichao@gmail.com> * rename config Signed-off-by: youkaichao <youkaichao@gmail.com> --------- Signed-off-by: youkaichao <youkaichao@gmail.com>
23 lines
529 B
JSON
23 lines
529 B
JSON
{
|
|
"vocab_size": 129280,
|
|
"dim": 7168,
|
|
"inter_dim": 18432,
|
|
"moe_inter_dim": 2048,
|
|
"n_layers": 61,
|
|
"n_dense_layers": 3,
|
|
"n_heads": 128,
|
|
"n_routed_experts": 256,
|
|
"n_shared_experts": 1,
|
|
"n_activated_experts": 8,
|
|
"n_expert_groups": 8,
|
|
"n_limited_groups": 4,
|
|
"route_scale": 2.5,
|
|
"score_func": "sigmoid",
|
|
"q_lora_rank": 1536,
|
|
"kv_lora_rank": 512,
|
|
"qk_nope_head_dim": 128,
|
|
"qk_rope_head_dim": 64,
|
|
"v_head_dim": 128,
|
|
"dtype": "fp8",
|
|
"scale_fmt": "ue8m0"
|
|
} |