mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-23 18:25:37 +08:00
[Bugfix] Allow shared_experts skip quantization for DeepSeekV2/V3 (#14100)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
ae122b1cbd
commit
2b04c209ee
@ -145,6 +145,7 @@ class DeepseekV2MoE(nn.Module):
|
|||||||
hidden_act=config.hidden_act,
|
hidden_act=config.hidden_act,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
reduce_results=False,
|
reduce_results=False,
|
||||||
|
prefix=f"{prefix}.shared_experts",
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user