# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any

import pytest

from vllm import SamplingParams

from ...conftest import VllmRunner
from ...models.utils import check_outputs_equal

MODEL = "Qwen/Qwen3-0.6B"


def test_preempt_and_async_scheduling_e2e(monkeypatch: pytest.MonkeyPatch):
    """Test consistency of combos of async scheduling, preemption,
    uni/multiproc executor, and various sampling parameters."""

    first_prompt = (
        "The following numbers of the sequence "
        + ", ".join(str(i) for i in range(10))
        + " are:"
    )
    example_prompts = [first_prompt, "In one word, the capital of France is "] + [
        f"Tell me about the number {i}: " for i in range(32)
    ]

    sampling_param_tests: list[dict[str, Any]] = [
        dict(),
        # dict(min_tokens=20),
        dict(presence_penalty=-1.0),
        dict(bad_words=["the", " the"]),
    ]

    default_params = dict(
        temperature=0.0,  # greedy
        max_tokens=20,
    )

    with monkeypatch.context() as m:
        m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
        # m.setenv("VLLM_KERNEL_OVERRIDE_BATCH_INVARIANT", "1")

        outputs: list[tuple[str, list]] = []
        for test_preemption in [False, True]:
            for executor in ["mp", "uni"]:
                for async_scheduling in [False, True]:
                    cache_arg: dict[str, Any] = (
                        dict(num_gpu_blocks_override=32)
                        if test_preemption
                        else dict(gpu_memory_utilization=0.7)
                    )
                    test_config = (
                        f"executor={executor}, preemption={test_preemption},"
                        f" async_sched={async_scheduling}"
                    )
                    print("-" * 80)
                    print(f"---- TESTING: {test_config}")
                    print("-" * 80)
                    with VllmRunner(
                        MODEL,
                        max_model_len=512,
                        enforce_eager=True,
                        async_scheduling=async_scheduling,
                        distributed_executor_backend=executor,
                        dtype="float32",  # avoid precision errors
                        **cache_arg,
                    ) as vllm_model:
                        results = []
                        for override_params in sampling_param_tests:
                            print(f"----------- RUNNING PARAMS: {override_params}")
                            results.append(
                                vllm_model.generate(
                                    example_prompts,
                                    sampling_params=SamplingParams(
                                        **default_params, **override_params
                                    ),
                                )
                            )

                        if not outputs:
                            # First check that the different parameter configs
                            # actually result in different output.
                            for other_test, params in zip(
                                results[1:], sampling_param_tests[1:]
                            ):
                                with pytest.raises(AssertionError):
                                    check_outputs_equal(
                                        outputs_0_lst=results[0],
                                        outputs_1_lst=other_test,
                                        name_0=f"baseline params={params}",
                                        name_1=f"other params={params}",
                                    )

                        outputs.append((test_config, results))

    baseline_config, baseline_tests = outputs[0]

    for test_config, test_outputs in outputs[1:]:
        for base_outs, test_outs, params in zip(
            baseline_tests, test_outputs, sampling_param_tests
        ):
            check_outputs_equal(
                outputs_0_lst=base_outs,
                outputs_1_lst=test_outs,
                name_0=f"baseline=[{baseline_config}], params={params}",
                name_1=f"config=[{test_config}], params={params}",
            )

            print(f"PASSED: config=[{test_config}], params={params}")