From 91ac7f764d04e7a9103e3c839244ce241a43b45e Mon Sep 17 00:00:00 2001 From: wuhang Date: Mon, 6 Oct 2025 12:20:06 +0800 Subject: [PATCH] [CI][gpt-oss] Enable python tool tests in CI (#24315) Signed-off-by: wuhang --- requirements/common.txt | 1 + .../openai/test_response_api_with_harmony.py | 38 ++++++++----------- vllm/entrypoints/tool.py | 11 ++++-- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index a52745f698703..1530e5a09e757 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -49,3 +49,4 @@ pybase64 # fast base64 implementation cbor2 # Required for cross-language serialization of hashable objects setproctitle # Used to set process names for better debugging and monitoring openai-harmony >= 0.0.3 # Required for gpt-oss +gpt-oss >= 0.0.7 diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 3a564bef0d126..fb0035de67c26 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -15,22 +15,15 @@ MODEL_NAME = "openai/gpt-oss-20b" @pytest.fixture(scope="module") -def monkeypatch_module(): - from _pytest.monkeypatch import MonkeyPatch - - mpatch = MonkeyPatch() - yield mpatch - mpatch.undo() - - -@pytest.fixture(scope="module") -def server(monkeypatch_module: pytest.MonkeyPatch): +def server(): args = ["--enforce-eager", "--tool-server", "demo"] + env_dict = dict( + VLLM_ENABLE_RESPONSES_API_STORE="1", + PYTHON_EXECUTION_BACKEND="dangerously_use_uv", + ) - with monkeypatch_module.context() as m: - m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") - with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: - yield remote_server + with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as remote_server: + yield remote_server @pytest_asyncio.fixture @@ -316,7 +309,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): # TODO: Add back when web search and code interpreter are available in CI prompts = [ "tell me a story about a cat in 20 words", - # "What is 13 * 24? Use python to calculate the result.", + "What is 13 * 24? Use python to calculate the result.", # "When did Jensen found NVIDIA? Search it and answer the year only.", ] @@ -329,12 +322,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): # { # "type": "web_search_preview" # }, - # { - # "type": "code_interpreter", - # "container": { - # "type": "auto" - # } - # }, + {"type": "code_interpreter", "container": {"type": "auto"}}, ], stream=True, background=background, @@ -412,6 +400,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): async for event in stream: counter += 1 assert event == events[counter] + assert counter == len(events) - 1 @pytest.mark.asyncio @@ -429,7 +418,6 @@ async def test_web_search(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") async def test_code_interpreter(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, @@ -443,10 +431,16 @@ async def test_code_interpreter(client: OpenAI, model_name: str): "and you must print to see the output." ), tools=[{"type": "code_interpreter", "container": {"type": "auto"}}], + temperature=0.0, # More deterministic output in response ) assert response is not None assert response.status == "completed" assert response.usage.output_tokens_details.tool_output_tokens > 0 + for item in response.output: + if item.type == "message": + output_string = item.content[0].text + print("output_string: ", output_string, flush=True) + assert "5846" in output_string def get_weather(latitude, longitude): diff --git a/vllm/entrypoints/tool.py b/vllm/entrypoints/tool.py index 1bc6a85a9a25c..c74ce1ee16de1 100644 --- a/vllm/entrypoints/tool.py +++ b/vllm/entrypoints/tool.py @@ -14,10 +14,12 @@ if TYPE_CHECKING: logger = init_logger(__name__) +MIN_GPT_OSS_VERSION = "0.0.7" + def validate_gpt_oss_install(): """ - Check if the gpt-oss is installed and its version is at least 0.0.3. + Check if the gpt-oss is installed and its version is at least 0.0.7. If not, raise an ImportError. """ from importlib.metadata import PackageNotFoundError, version @@ -25,16 +27,17 @@ def validate_gpt_oss_install(): from packaging.version import InvalidVersion, Version try: - pkg_version_str = version("gpt_oss") # e.g., "0.0.5" + pkg_version_str = version("gpt_oss") pkg_version = Version(pkg_version_str) except PackageNotFoundError: raise ImportError("Package 'gpt_oss' is not installed.") from None except InvalidVersion as e: raise ImportError(f"Invalid version string for 'gpt_oss': {e}") from None - if pkg_version < Version("0.0.3"): + if pkg_version < Version(MIN_GPT_OSS_VERSION): raise ImportError( - f"gpt_oss >= 0.0.3 is required, but {pkg_version} is installed." + f"gpt_oss >= {MIN_GPT_OSS_VERSION} is required, " + f"but {pkg_version} is installed." ) from None