mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 22:05:44 +08:00
70 lines
2.5 KiB
Python
70 lines
2.5 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import pytest
|
|
import torch
|
|
|
|
from vllm.config import SpeculativeConfig
|
|
from vllm.model_executor.models.interfaces import supports_eagle3
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model_path",
|
|
[
|
|
pytest.param(
|
|
"nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized",
|
|
id="llama3-eagle3-speculator",
|
|
),
|
|
pytest.param(
|
|
"nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized",
|
|
id="qwen3-eagle3-speculator",
|
|
),
|
|
pytest.param(
|
|
"nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized-w4a16",
|
|
id="qwen3-eagle3-speculator-w4a16-verifier",
|
|
),
|
|
pytest.param(
|
|
"nm-testing/random-weights-llama3.1.8b-2layer-eagle3",
|
|
id="llama3-eagl3-multiple-layers",
|
|
),
|
|
],
|
|
)
|
|
def test_eagle3_speculators_model(
|
|
vllm_runner, example_prompts, model_path, monkeypatch
|
|
):
|
|
"""
|
|
Test Eagle3 speculators models properly initialize speculative decoding.
|
|
|
|
This test verifies:
|
|
1. Eagle3 support is detected for the model
|
|
2. Speculative config is automatically initialized from embedded config
|
|
3. The draft model path is correctly set to the speculators model
|
|
4. Speculative tokens count is valid
|
|
5. Text generation works with speculative decoding enabled
|
|
"""
|
|
# Set environment variable for V1 engine serialization
|
|
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
|
|
|
with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
|
|
# Verify Eagle3 support is detected
|
|
eagle3_supported = vllm_model.apply_model(supports_eagle3)
|
|
assert eagle3_supported, f"Eagle3 should be supported for {model_path}"
|
|
|
|
vllm_config = vllm_model.llm.llm_engine.vllm_config
|
|
|
|
assert isinstance(vllm_config.speculative_config, SpeculativeConfig), (
|
|
"Speculative config should be initialized for speculators model"
|
|
)
|
|
|
|
spec_config = vllm_config.speculative_config
|
|
assert spec_config.num_speculative_tokens > 0, (
|
|
f"Expected positive speculative tokens, "
|
|
f"got {spec_config.num_speculative_tokens}"
|
|
)
|
|
|
|
assert spec_config.model == model_path, (
|
|
f"Draft model should be {model_path}, got {spec_config.model}"
|
|
)
|
|
|
|
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens=20)
|
|
assert vllm_outputs, f"No outputs generated for speculators model {model_path}"
|