mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:25:01 +08:00
[BugFix] [Spec Decode] Remove LlamaForCausalLMEagle3 to fix CI (#22611)
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
parent
c90fb03df5
commit
807d21b80d
@ -525,10 +525,11 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
|
|||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
speculative_model="yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
|
speculative_model="yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
|
||||||
tokenizer="meta-llama/Llama-3.1-8B-Instruct"),
|
tokenizer="meta-llama/Llama-3.1-8B-Instruct"),
|
||||||
"LlamaForCausalLMEagle3": _HfExamplesInfo("AngelSlim/Qwen3-8B_eagle3", # noqa: E501
|
# TODO: Re-enable this once tests/models/test_initialization.py is fixed, see PR #22333 #22611 # noqa: E501
|
||||||
trust_remote_code=True,
|
# "LlamaForCausalLMEagle3": _HfExamplesInfo("AngelSlim/Qwen3-8B_eagle3", # noqa: E501
|
||||||
speculative_model="AngelSlim/Qwen3-8B_eagle3",
|
# trust_remote_code=True,
|
||||||
tokenizer="Qwen/Qwen3-8B"),
|
# speculative_model="AngelSlim/Qwen3-8B_eagle3", # noqa: E501
|
||||||
|
# tokenizer="Qwen/Qwen3-8B"),
|
||||||
"EagleLlama4ForCausalLM": _HfExamplesInfo(
|
"EagleLlama4ForCausalLM": _HfExamplesInfo(
|
||||||
"morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct",
|
"morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct",
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
|
|||||||
@ -125,27 +125,30 @@ def test_ngram_correctness(
|
|||||||
cleanup_dist_env_and_memory()
|
cleanup_dist_env_and_memory()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(["model_setup", "mm_enabled"], [
|
@pytest.mark.parametrize(
|
||||||
(("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False),
|
["model_setup", "mm_enabled"],
|
||||||
(("eagle", "meta-llama/Llama-3.1-8B-Instruct",
|
[
|
||||||
"yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False),
|
# TODO: Re-enable this once tests/models/test_initialization.py is fixed, see PR #22333 #22611 # noqa: E501
|
||||||
(("eagle3", "meta-llama/Llama-3.1-8B-Instruct",
|
# (("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False),
|
||||||
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1), False),
|
(("eagle", "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
pytest.param(
|
"yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False),
|
||||||
("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
(("eagle3", "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
|
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1), False),
|
||||||
False,
|
pytest.param(
|
||||||
marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
|
("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||||
pytest.param(
|
"morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
|
||||||
("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
False,
|
||||||
"morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
|
marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
|
||||||
True,
|
pytest.param(
|
||||||
marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
|
("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||||
],
|
"morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
|
||||||
ids=[
|
True,
|
||||||
"qwen3_eagle3", "llama3_eagle", "llama3_eagle3",
|
marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
|
||||||
"llama4_eagle", "llama4_eagle_mm"
|
],
|
||||||
])
|
ids=[
|
||||||
|
"qwen3_eagle3", "llama3_eagle", "llama3_eagle3", "llama4_eagle",
|
||||||
|
"llama4_eagle_mm"
|
||||||
|
])
|
||||||
@pytest.mark.parametrize("attn_backend",
|
@pytest.mark.parametrize("attn_backend",
|
||||||
get_attn_backend_list_based_on_platform())
|
get_attn_backend_list_based_on_platform())
|
||||||
def test_eagle_correctness(
|
def test_eagle_correctness(
|
||||||
|
|||||||
@ -259,7 +259,8 @@ _SPECULATIVE_DECODING_MODELS = {
|
|||||||
"EagleLlama4ForCausalLM": ("llama4_eagle", "EagleLlama4ForCausalLM"),
|
"EagleLlama4ForCausalLM": ("llama4_eagle", "EagleLlama4ForCausalLM"),
|
||||||
"EagleMiniCPMForCausalLM": ("minicpm_eagle", "EagleMiniCPMForCausalLM"),
|
"EagleMiniCPMForCausalLM": ("minicpm_eagle", "EagleMiniCPMForCausalLM"),
|
||||||
"Eagle3LlamaForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
|
"Eagle3LlamaForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
|
||||||
"LlamaForCausalLMEagle3": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
|
# TODO: Re-enable this once tests/models/test_initialization.py is fixed, see PR #22333 #22611 # noqa: E501
|
||||||
|
# "LlamaForCausalLMEagle3": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
|
||||||
"DeepSeekMTPModel": ("deepseek_mtp", "DeepSeekMTP"),
|
"DeepSeekMTPModel": ("deepseek_mtp", "DeepSeekMTP"),
|
||||||
"Glm4MoeMTPModel": ("glm4_moe_mtp", "Glm4MoeMTP"),
|
"Glm4MoeMTPModel": ("glm4_moe_mtp", "Glm4MoeMTP"),
|
||||||
"MedusaModel": ("medusa", "Medusa"),
|
"MedusaModel": ("medusa", "Medusa"),
|
||||||
|
|||||||
@ -45,7 +45,7 @@ class EAGLEConfig(PretrainedConfig):
|
|||||||
|
|
||||||
# Eagle model name should follow naming convention of
|
# Eagle model name should follow naming convention of
|
||||||
# LlamaForCausalLM -> EagleLlamaForCausalLM
|
# LlamaForCausalLM -> EagleLlamaForCausalLM
|
||||||
# LlamaForCausalLM -> Eagle3LlamaForCausalLM / LlamaForCausalLMEagle3
|
# LlamaForCausalLM -> Eagle3LlamaForCausalLM
|
||||||
if method == "eagle":
|
if method == "eagle":
|
||||||
assert self.model is not None, \
|
assert self.model is not None, \
|
||||||
"model should not be None when method is eagle"
|
"model should not be None when method is eagle"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user