diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py
index 1ca52599c519..72d468db08f6 100644
--- a/tests/entrypoints/openai/test_response_api_with_harmony.py
+++ b/tests/entrypoints/openai/test_response_api_with_harmony.py
@@ -11,18 +11,25 @@ from openai import BadRequestError, NotFoundError, OpenAI
 
 from ...utils import RemoteOpenAIServer
 
-pytest.skip(allow_module_level=True, reason="gpt-oss can't run on CI yet.")
-
 MODEL_NAME = "openai/gpt-oss-20b"
-DTYPE = "bfloat16"
 
 
 @pytest.fixture(scope="module")
-def server():
+def monkeypatch_module():
+    from _pytest.monkeypatch import MonkeyPatch
+    mpatch = MonkeyPatch()
+    yield mpatch
+    mpatch.undo()
+
+
+@pytest.fixture(scope="module")
+def server(monkeypatch_module: pytest.MonkeyPatch):
     args = ["--enforce-eager", "--tool-server", "demo"]
 
-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
-        yield remote_server
+    with monkeypatch_module.context() as m:
+        m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
+        with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+            yield remote_server
 
 
 @pytest_asyncio.fixture
@@ -269,10 +276,11 @@ async def test_stateful_multi_turn(client: OpenAI, model_name: str):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_streaming(client: OpenAI, model_name: str):
+    # TODO: Add back when web search and code interpreter are available in CI
     prompts = [
         "tell me a story about a cat in 20 words",
-        "What is 13 * 24? Use python to calculate the result.",
-        "When did Jensen found NVIDIA? Search it and answer the year only.",
+        # "What is 13 * 24? Use python to calculate the result.",
+        # "When did Jensen found NVIDIA? Search it and answer the year only.",
     ]
 
     for prompt in prompts:
@@ -281,15 +289,15 @@ async def test_streaming(client: OpenAI, model_name: str):
             input=prompt,
             reasoning={"effort": "low"},
             tools=[
-                {
-                    "type": "web_search_preview"
-                },
-                {
-                    "type": "code_interpreter",
-                    "container": {
-                        "type": "auto"
-                    }
-                },
+                # {
+                #     "type": "web_search_preview"
+                # },
+                # {
+                #     "type": "code_interpreter",
+                #     "container": {
+                #         "type": "auto"
+                #     }
+                # },
             ],
             stream=True,
         )
@@ -317,6 +325,7 @@ async def test_streaming(client: OpenAI, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="Web search tool is not available in CI yet.")
 async def test_web_search(client: OpenAI, model_name: str):
     response = await client.responses.create(
         model=model_name,
@@ -331,6 +340,7 @@ async def test_web_search(client: OpenAI, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.")
 async def test_code_interpreter(client: OpenAI, model_name: str):
     response = await client.responses.create(
         model=model_name,
@@ -436,6 +446,7 @@ async def test_function_calling(client: OpenAI, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.flaky(reruns=5)
 async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
     tools = [
         {