vllm/tests/v1/entrypoints/openai/serving_responses/conftest.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio

from tests.utils import RemoteOpenAIServer

# Use a small reasoning model to test the responses API.
MODEL_NAME = "Qwen/Qwen3-1.7B"


@pytest.fixture(scope="module")
def default_server_args():
    return [
        "--max-model-len",
        "8192",
        "--enforce-eager",  # For faster startup.
        "--enable-auto-tool-choice",
        "--structured-outputs-config.backend",
        "xgrammar",
        "--tool-call-parser",
        "hermes",
        "--reasoning-parser",
        "qwen3",
    ]


@pytest.fixture(scope="module")
def server_with_store(default_server_args):
    with RemoteOpenAIServer(
        MODEL_NAME,
        default_server_args,
        env_dict={
            "VLLM_ENABLE_RESPONSES_API_STORE": "1",
            "VLLM_SERVER_DEV_MODE": "1",
        },
    ) as remote_server:
        yield remote_server


@pytest_asyncio.fixture
async def client(server_with_store):
    async with server_with_store.get_async_client() as async_client:
        yield async_client