diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py index 50ec87b4464f..e63a6f10cbc7 100644 --- a/tests/entrypoints/openai/test_basic.py +++ b/tests/entrypoints/openai/test_basic.py @@ -3,12 +3,15 @@ import asyncio from http import HTTPStatus +from unittest.mock import AsyncMock, Mock import openai import pytest import pytest_asyncio import requests +from fastapi import Request +from vllm.v1.engine.exceptions import EngineDeadError from vllm.version import __version__ as VLLM_VERSION from ...utils import RemoteOpenAIServer @@ -224,3 +227,24 @@ async def test_server_load(server: RemoteOpenAIServer): response = requests.get(server.url_for("load")) assert response.status_code == HTTPStatus.OK assert response.json().get("server_load") == 0 + + +@pytest.mark.asyncio +async def test_health_check_engine_dead_error(): + # Import the health function directly to test it in isolation + from vllm.entrypoints.openai.api_server import health + + # Create a mock request that simulates what FastAPI would provide + mock_request = Mock(spec=Request) + mock_app_state = Mock() + mock_engine_client = AsyncMock() + mock_engine_client.check_health.side_effect = EngineDeadError() + mock_app_state.engine_client = mock_engine_client + mock_request.app.state = mock_app_state + + # Test the health function directly with our mocked request + # This simulates what would happen if the engine dies + response = await health(mock_request) + + # Assert that it returns 503 Service Unavailable + assert response.status_code == 503