[V1][Spec Decode] Make Eagle model arch config driven (#17323)

2026-06-09 16:22:19 +08:00 · 2025-04-28 22:22:02 -04:00 · 2025-04-28 22:22:02 -04:00 · e136000595
commit e136000595
parent 86d9fc29cb
3 changed files with 26 additions and 13 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -2401,7 +2401,8 @@ class SpeculativeConfig:
                        pass
                    else:
                        eagle_config = EAGLEConfig(
-                            self.draft_model_config.hf_config)
+                            self.draft_model_config.hf_config,
                            method=self.method)
                        self.draft_model_config.hf_config = eagle_config
                if (self.num_speculative_tokens is not None
--- a/vllm/transformers_utils/configs/eagle.py
+++ b/vllm/transformers_utils/configs/eagle.py
@ -15,6 +15,7 @@ class EAGLEConfig(PretrainedConfig):
    def __init__(self,
                 model: Union[PretrainedConfig, dict, None] = None,
                 truncated_vocab_size: Optional[int] = None,
                 method: Optional[str] = 'eagle',
                 **kwargs):
        model_config: Union[PretrainedConfig, DeepseekV2Config, None]
@ -45,7 +46,23 @@ class EAGLEConfig(PretrainedConfig):
        if not envs.VLLM_USE_V1:
            kwargs["architectures"] = ["EAGLEModel"]
        else:
-            kwargs["architectures"] = ["EagleLlamaForCausalLM"]
+            # Eagle model name should follow naming convention of
            # LlamaForCausalLM -> EagleLlamaForCausalLM
            if method == "eagle":
                assert self.model is not None, \
                    "model should not be None when method is eagle"
                kwargs["architectures"] = [
                    f"Eagle{arch}" for arch in self.model.architectures
                ]
            elif method == "eagle3":
                assert self.model is not None, \
                    "model should not be None when method is eagle3"
                kwargs["architectures"] = [
                    f"Eagle3{arch}" for arch in self.model.architectures
                ]
            else:
                raise ValueError(f"Invalid method {method}. \
                    Supported methods are eagle and eagle3.")
        super().__init__(**kwargs)
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@ -9,8 +9,7 @@ from vllm.forward_context import set_forward_context
 from vllm.logger import init_logger
 from vllm.model_executor.model_loader.loader import get_model_loader
 from vllm.model_executor.model_loader.utils import set_default_torch_dtype
-from vllm.model_executor.models.llama_eagle import EagleLlamaForCausalLM
+from vllm.model_executor.models import ModelRegistry
 from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
 from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
 from vllm.v1.sample.metadata import SamplingMetadata
@ -225,15 +224,11 @@ class EagleProposer:
        with set_default_torch_dtype(
                draft_model_config.dtype), set_current_vllm_config(
                    self.vllm_config):
-            if self.vllm_config.speculative_config.method == "eagle":
+            draft_model_cls, arch = ModelRegistry.resolve_model_cls(
-                self.model = EagleLlamaForCausalLM(
+                draft_model_config.architectures)
-                    model_config=draft_model_config,
+            self.model = draft_model_cls(
-                    start_layer_id=target_layer_num).to(target_device)
+                model_config=draft_model_config,
-            else:
+                start_layer_id=target_layer_num).to(target_device)
                assert self.vllm_config.speculative_config.method == "eagle3"
                self.model = Eagle3LlamaForCausalLM(
                    model_config=draft_model_config,
                    start_layer_id=target_layer_num).to(target_device)
        loaded_weights = self.model.load_weights(
            loader.get_all_weights(