From 83004020fd2400f56df2686e8c55df3d9fc79b7b Mon Sep 17 00:00:00 2001 From: dongbo910220 <32610838+dongbo910220@users.noreply.github.com> Date: Sat, 18 Oct 2025 17:59:05 +0800 Subject: [PATCH] [Test] Add test for /health endpoint on engine failure (#26074) Signed-off-by: dongbo910220 <1275604947@qq.com> --- tests/entrypoints/openai/test_basic.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py index 50ec87b4464f6..e63a6f10cbc7f 100644 --- a/tests/entrypoints/openai/test_basic.py +++ b/tests/entrypoints/openai/test_basic.py @@ -3,12 +3,15 @@ import asyncio from http import HTTPStatus +from unittest.mock import AsyncMock, Mock import openai import pytest import pytest_asyncio import requests +from fastapi import Request +from vllm.v1.engine.exceptions import EngineDeadError from vllm.version import __version__ as VLLM_VERSION from ...utils import RemoteOpenAIServer @@ -224,3 +227,24 @@ async def test_server_load(server: RemoteOpenAIServer): response = requests.get(server.url_for("load")) assert response.status_code == HTTPStatus.OK assert response.json().get("server_load") == 0 + + +@pytest.mark.asyncio +async def test_health_check_engine_dead_error(): + # Import the health function directly to test it in isolation + from vllm.entrypoints.openai.api_server import health + + # Create a mock request that simulates what FastAPI would provide + mock_request = Mock(spec=Request) + mock_app_state = Mock() + mock_engine_client = AsyncMock() + mock_engine_client.check_health.side_effect = EngineDeadError() + mock_app_state.engine_client = mock_engine_client + mock_request.app.state = mock_app_state + + # Test the health function directly with our mocked request + # This simulates what would happen if the engine dies + response = await health(mock_request) + + # Assert that it returns 503 Service Unavailable + assert response.status_code == 503