diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 1ca52599c519..72d468db08f6 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -11,18 +11,25 @@ from openai import BadRequestError, NotFoundError, OpenAI from ...utils import RemoteOpenAIServer -pytest.skip(allow_module_level=True, reason="gpt-oss can't run on CI yet.") - MODEL_NAME = "openai/gpt-oss-20b" -DTYPE = "bfloat16" @pytest.fixture(scope="module") -def server(): +def monkeypatch_module(): + from _pytest.monkeypatch import MonkeyPatch + mpatch = MonkeyPatch() + yield mpatch + mpatch.undo() + + +@pytest.fixture(scope="module") +def server(monkeypatch_module: pytest.MonkeyPatch): args = ["--enforce-eager", "--tool-server", "demo"] - with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: - yield remote_server + with monkeypatch_module.context() as m: + m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server @pytest_asyncio.fixture @@ -269,10 +276,11 @@ async def test_stateful_multi_turn(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_streaming(client: OpenAI, model_name: str): + # TODO: Add back when web search and code interpreter are available in CI prompts = [ "tell me a story about a cat in 20 words", - "What is 13 * 24? Use python to calculate the result.", - "When did Jensen found NVIDIA? Search it and answer the year only.", + # "What is 13 * 24? Use python to calculate the result.", + # "When did Jensen found NVIDIA? Search it and answer the year only.", ] for prompt in prompts: @@ -281,15 +289,15 @@ async def test_streaming(client: OpenAI, model_name: str): input=prompt, reasoning={"effort": "low"}, tools=[ - { - "type": "web_search_preview" - }, - { - "type": "code_interpreter", - "container": { - "type": "auto" - } - }, + # { + # "type": "web_search_preview" + # }, + # { + # "type": "code_interpreter", + # "container": { + # "type": "auto" + # } + # }, ], stream=True, ) @@ -317,6 +325,7 @@ async def test_streaming(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Web search tool is not available in CI yet.") async def test_web_search(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, @@ -331,6 +340,7 @@ async def test_web_search(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") async def test_code_interpreter(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, @@ -436,6 +446,7 @@ async def test_function_calling(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.flaky(reruns=5) async def test_function_calling_multi_turn(client: OpenAI, model_name: str): tools = [ {