diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index ca77b8322e2e8..c25e8422da157 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -141,6 +141,7 @@ class DeepSeekMultiTokenPredictor(nn.Module): self.embed_tokens = VocabParallelEmbedding( config.vocab_size, config.hidden_size, + prefix=maybe_prefix(prefix, "embed_tokens"), ) self.logits_processor = LogitsProcessor(config.vocab_size)