mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-09 16:22:19 +08:00
[V1][Spec Decode] Make Eagle model arch config driven (#17323)
This commit is contained in:
parent
86d9fc29cb
commit
e136000595
@ -2401,7 +2401,8 @@ class SpeculativeConfig:
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
eagle_config = EAGLEConfig(
|
eagle_config = EAGLEConfig(
|
||||||
self.draft_model_config.hf_config)
|
self.draft_model_config.hf_config,
|
||||||
|
method=self.method)
|
||||||
self.draft_model_config.hf_config = eagle_config
|
self.draft_model_config.hf_config = eagle_config
|
||||||
|
|
||||||
if (self.num_speculative_tokens is not None
|
if (self.num_speculative_tokens is not None
|
||||||
|
|||||||
@ -15,6 +15,7 @@ class EAGLEConfig(PretrainedConfig):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
model: Union[PretrainedConfig, dict, None] = None,
|
model: Union[PretrainedConfig, dict, None] = None,
|
||||||
truncated_vocab_size: Optional[int] = None,
|
truncated_vocab_size: Optional[int] = None,
|
||||||
|
method: Optional[str] = 'eagle',
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
|
||||||
model_config: Union[PretrainedConfig, DeepseekV2Config, None]
|
model_config: Union[PretrainedConfig, DeepseekV2Config, None]
|
||||||
@ -45,7 +46,23 @@ class EAGLEConfig(PretrainedConfig):
|
|||||||
if not envs.VLLM_USE_V1:
|
if not envs.VLLM_USE_V1:
|
||||||
kwargs["architectures"] = ["EAGLEModel"]
|
kwargs["architectures"] = ["EAGLEModel"]
|
||||||
else:
|
else:
|
||||||
kwargs["architectures"] = ["EagleLlamaForCausalLM"]
|
# Eagle model name should follow naming convention of
|
||||||
|
# LlamaForCausalLM -> EagleLlamaForCausalLM
|
||||||
|
if method == "eagle":
|
||||||
|
assert self.model is not None, \
|
||||||
|
"model should not be None when method is eagle"
|
||||||
|
kwargs["architectures"] = [
|
||||||
|
f"Eagle{arch}" for arch in self.model.architectures
|
||||||
|
]
|
||||||
|
elif method == "eagle3":
|
||||||
|
assert self.model is not None, \
|
||||||
|
"model should not be None when method is eagle3"
|
||||||
|
kwargs["architectures"] = [
|
||||||
|
f"Eagle3{arch}" for arch in self.model.architectures
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid method {method}. \
|
||||||
|
Supported methods are eagle and eagle3.")
|
||||||
|
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
|||||||
@ -9,8 +9,7 @@ from vllm.forward_context import set_forward_context
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.model_loader.loader import get_model_loader
|
from vllm.model_executor.model_loader.loader import get_model_loader
|
||||||
from vllm.model_executor.model_loader.utils import set_default_torch_dtype
|
from vllm.model_executor.model_loader.utils import set_default_torch_dtype
|
||||||
from vllm.model_executor.models.llama_eagle import EagleLlamaForCausalLM
|
from vllm.model_executor.models import ModelRegistry
|
||||||
from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
|
|
||||||
from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
|
from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
|
||||||
from vllm.v1.sample.metadata import SamplingMetadata
|
from vllm.v1.sample.metadata import SamplingMetadata
|
||||||
|
|
||||||
@ -225,15 +224,11 @@ class EagleProposer:
|
|||||||
with set_default_torch_dtype(
|
with set_default_torch_dtype(
|
||||||
draft_model_config.dtype), set_current_vllm_config(
|
draft_model_config.dtype), set_current_vllm_config(
|
||||||
self.vllm_config):
|
self.vllm_config):
|
||||||
if self.vllm_config.speculative_config.method == "eagle":
|
draft_model_cls, arch = ModelRegistry.resolve_model_cls(
|
||||||
self.model = EagleLlamaForCausalLM(
|
draft_model_config.architectures)
|
||||||
model_config=draft_model_config,
|
self.model = draft_model_cls(
|
||||||
start_layer_id=target_layer_num).to(target_device)
|
model_config=draft_model_config,
|
||||||
else:
|
start_layer_id=target_layer_num).to(target_device)
|
||||||
assert self.vllm_config.speculative_config.method == "eagle3"
|
|
||||||
self.model = Eagle3LlamaForCausalLM(
|
|
||||||
model_config=draft_model_config,
|
|
||||||
start_layer_id=target_layer_num).to(target_device)
|
|
||||||
|
|
||||||
loaded_weights = self.model.load_weights(
|
loaded_weights = self.model.load_weights(
|
||||||
loader.get_all_weights(
|
loader.get_all_weights(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user