vllm/tests/speculative_decoding/speculators/test_eagle3.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch

from vllm.config import SpeculativeConfig
from vllm.model_executor.models.interfaces import supports_eagle3


@pytest.mark.parametrize(
    "model_path",
    [
        pytest.param(
            "nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized",
            id="llama3-eagle3-speculator",
        ),
        pytest.param(
            "nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized",
            id="qwen3-eagle3-speculator",
        ),
        pytest.param(
            "nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized-w4a16",
            id="qwen3-eagle3-speculator-w4a16-verifier",
        ),
        pytest.param(
            "nm-testing/random-weights-llama3.1.8b-2layer-eagle3",
            id="llama3-eagl3-multiple-layers",
        ),
    ],
)
def test_eagle3_speculators_model(
    vllm_runner, example_prompts, model_path, monkeypatch
):
    """
    Test Eagle3 speculators models properly initialize speculative decoding.

    This test verifies:
    1. Eagle3 support is detected for the model
    2. Speculative config is automatically initialized from embedded config
    3. The draft model path is correctly set to the speculators model
    4. Speculative tokens count is valid
    5. Text generation works with speculative decoding enabled
    """
    # Set environment variable for V1 engine serialization
    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")

    with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
        # Verify Eagle3 support is detected
        eagle3_supported = vllm_model.apply_model(supports_eagle3)
        assert eagle3_supported, f"Eagle3 should be supported for {model_path}"

        vllm_config = vllm_model.llm.llm_engine.vllm_config

        assert isinstance(vllm_config.speculative_config, SpeculativeConfig), (
            "Speculative config should be initialized for speculators model"
        )

        spec_config = vllm_config.speculative_config
        assert spec_config.num_speculative_tokens > 0, (
            f"Expected positive speculative tokens, "
            f"got {spec_config.num_speculative_tokens}"
        )

        assert spec_config.model == model_path, (
            f"Draft model should be {model_path}, got {spec_config.model}"
        )

        vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens=20)
        assert vllm_outputs, f"No outputs generated for speculators model {model_path}"