[misc] refactor speculative config (#25657)

Signed-off-by: zxw <1020938856@qq.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-12-21 23:26:09 +08:00 · 2025-09-26 16:22:06 +08:00 · 2025-09-26 16:22:06 +08:00 · c761b84d5f
commit c761b84d5f
parent bc37468b3c
1 changed files with 6 additions and 8 deletions
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@ -209,12 +209,9 @@ class SpeculativeConfig:
        if self.model is None and self.num_speculative_tokens is not None:
            # TODO(Shangming): Refactor mtp configuration logic when supporting
-            # mtp acceleration for more models besides deepseek_v3
+            if (self.target_model_config
-            if self.target_model_config and \
+                    and self.target_model_config.hf_text_config.model_type
-                (self.target_model_config.hf_text_config.model_type \
+                    in ("deepseek_v3", "mimo", "ernie4_5_moe", "qwen3_next")):
                        == "deepseek_v3" or
                    self.target_model_config.hf_text_config.model_type in
                        ("mimo","ernie4_5_moe", "qwen3_next")):
                # use the draft model from the same model:
                self.model = self.target_model_config.model
                # Align the quantization of draft model for cases such as
@ -224,7 +221,8 @@ class SpeculativeConfig:
            elif self.method in ("ngram", "[ngram]"):
                self.model = "ngram"
            else:
-                raise ValueError("num_speculative_tokens was provided without "
+                raise ValueError(
                    "num_speculative_tokens was provided but without "
                    "speculative model.")
        # Automatically configure the method for ngram when "model" is used