diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 0c6f07ce7b112..fd0e58fa1458d 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -581,7 +581,8 @@ class DeepseekV2Model(nn.Module): self.embed_tokens = VocabParallelEmbedding( config.vocab_size, config.hidden_size, - ) + quant_config=quant_config, + prefix=f"{prefix}.embed_tokens") else: self.embed_tokens = PPMissingLayer()