mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 14:45:44 +08:00
[Test] Add test for /health endpoint on engine failure (#26074)
Signed-off-by: dongbo910220 <1275604947@qq.com>
This commit is contained in:
parent
12e21701e7
commit
83004020fd
@ -3,12 +3,15 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
|
from unittest.mock import AsyncMock, Mock
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
import requests
|
import requests
|
||||||
|
from fastapi import Request
|
||||||
|
|
||||||
|
from vllm.v1.engine.exceptions import EngineDeadError
|
||||||
from vllm.version import __version__ as VLLM_VERSION
|
from vllm.version import __version__ as VLLM_VERSION
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
@ -224,3 +227,24 @@ async def test_server_load(server: RemoteOpenAIServer):
|
|||||||
response = requests.get(server.url_for("load"))
|
response = requests.get(server.url_for("load"))
|
||||||
assert response.status_code == HTTPStatus.OK
|
assert response.status_code == HTTPStatus.OK
|
||||||
assert response.json().get("server_load") == 0
|
assert response.json().get("server_load") == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_health_check_engine_dead_error():
|
||||||
|
# Import the health function directly to test it in isolation
|
||||||
|
from vllm.entrypoints.openai.api_server import health
|
||||||
|
|
||||||
|
# Create a mock request that simulates what FastAPI would provide
|
||||||
|
mock_request = Mock(spec=Request)
|
||||||
|
mock_app_state = Mock()
|
||||||
|
mock_engine_client = AsyncMock()
|
||||||
|
mock_engine_client.check_health.side_effect = EngineDeadError()
|
||||||
|
mock_app_state.engine_client = mock_engine_client
|
||||||
|
mock_request.app.state = mock_app_state
|
||||||
|
|
||||||
|
# Test the health function directly with our mocked request
|
||||||
|
# This simulates what would happen if the engine dies
|
||||||
|
response = await health(mock_request)
|
||||||
|
|
||||||
|
# Assert that it returns 503 Service Unavailable
|
||||||
|
assert response.status_code == 503
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user