Drop flaky test_healthcheck_response_time (#22539)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
Russell Bryant 2025-08-08 19:56:47 -04:00 committed by GitHub
parent e3edc0a7a8
commit 311d875614
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,15 +2,12 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio import asyncio
import contextlib
import random import random
import time
from typing import Callable from typing import Callable
import openai import openai
import pytest import pytest
import pytest_asyncio import pytest_asyncio
import requests
from tests.utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
@ -87,54 +84,3 @@ async def test_with_and_without_truncate(
responses = await asyncio.gather(*[get_status_code(**b) for b in bodies]) responses = await asyncio.gather(*[get_status_code(**b) for b in bodies])
assert 500 not in responses assert 500 not in responses
@pytest.mark.asyncio
@pytest.mark.parametrize(
ids=["single completion", "multiple completions", "chat"],
argnames=["create_func_gen", "content_body"],
argvalues=[
(lambda x: x.completions.create, {
"prompt": " ".join(['A'] * 300_000)
}),
(lambda x: x.completions.create, {
"prompt": [" ".join(['A'] * 300_000)] * 2
}),
(lambda x: x.chat.completions.create, {
"messages": [{
"role": "user",
"content": " ".join(['A'] * 300_000)
}]
}),
],
)
async def test_healthcheck_response_time(
server: RemoteOpenAIServer,
client: openai.AsyncOpenAI,
create_func_gen: Callable,
content_body: dict,
):
num_requests = 50
create_func = create_func_gen(client)
body = {"model": MODEL_NAME, **content_body, "max_tokens": 10}
def get_response_time(url):
start_time = time.monotonic()
res = requests.get(url)
end_time = time.monotonic()
assert res.status_code == 200
return end_time - start_time
no_load_response_time = get_response_time(server.url_for("health"))
tasks = [
asyncio.create_task(create_func(**body)) for _ in range(num_requests)
]
await asyncio.sleep(1) # give the tasks a chance to start running
load_response_time = get_response_time(server.url_for("health"))
with contextlib.suppress(openai.APIStatusError):
await asyncio.gather(*tasks)
assert load_response_time < 100 * no_load_response_time
assert load_response_time < 0.1