mirror of
https://git.datalinker.icu/deepseek-ai/DeepSeek-V3.git
synced 2026-01-23 12:24:26 +08:00
fp32 gate bias
This commit is contained in:
parent
f6e34dd267
commit
4592be48c0
@ -558,7 +558,7 @@ class Gate(nn.Module):
|
||||
self.score_func = args.score_func
|
||||
self.route_scale = args.route_scale
|
||||
self.weight = nn.Parameter(torch.empty(args.n_routed_experts, args.dim))
|
||||
self.bias = nn.Parameter(torch.empty(args.n_routed_experts)) if self.dim == 7168 else None
|
||||
self.bias = nn.Parameter(torch.empty(args.n_routed_experts, dtype=torch.float32)) if self.dim == 7168 else None
|
||||
|
||||
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user