mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 23:26:09 +08:00
[misc] refactor speculative config (#25657)
Signed-off-by: zxw <1020938856@qq.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
bc37468b3c
commit
c761b84d5f
@ -209,12 +209,9 @@ class SpeculativeConfig:
|
|||||||
|
|
||||||
if self.model is None and self.num_speculative_tokens is not None:
|
if self.model is None and self.num_speculative_tokens is not None:
|
||||||
# TODO(Shangming): Refactor mtp configuration logic when supporting
|
# TODO(Shangming): Refactor mtp configuration logic when supporting
|
||||||
# mtp acceleration for more models besides deepseek_v3
|
if (self.target_model_config
|
||||||
if self.target_model_config and \
|
and self.target_model_config.hf_text_config.model_type
|
||||||
(self.target_model_config.hf_text_config.model_type \
|
in ("deepseek_v3", "mimo", "ernie4_5_moe", "qwen3_next")):
|
||||||
== "deepseek_v3" or
|
|
||||||
self.target_model_config.hf_text_config.model_type in
|
|
||||||
("mimo","ernie4_5_moe", "qwen3_next")):
|
|
||||||
# use the draft model from the same model:
|
# use the draft model from the same model:
|
||||||
self.model = self.target_model_config.model
|
self.model = self.target_model_config.model
|
||||||
# Align the quantization of draft model for cases such as
|
# Align the quantization of draft model for cases such as
|
||||||
@ -224,7 +221,8 @@ class SpeculativeConfig:
|
|||||||
elif self.method in ("ngram", "[ngram]"):
|
elif self.method in ("ngram", "[ngram]"):
|
||||||
self.model = "ngram"
|
self.model = "ngram"
|
||||||
else:
|
else:
|
||||||
raise ValueError("num_speculative_tokens was provided without "
|
raise ValueError(
|
||||||
|
"num_speculative_tokens was provided but without "
|
||||||
"speculative model.")
|
"speculative model.")
|
||||||
|
|
||||||
# Automatically configure the method for ngram when "model" is used
|
# Automatically configure the method for ngram when "model" is used
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user