mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 09:16:06 +08:00
[Frontend] error suppression cleanup (#7786)
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
parent
a152246428
commit
b903e1ba7f
@ -75,11 +75,12 @@ async def test_client_aborts_use_timeouts(monkeypatch, dummy_server,
|
||||
m.setattr(dummy_server, "abort", lambda x: None)
|
||||
m.setattr(client, "_data_timeout", 10)
|
||||
|
||||
# Ensure the client doesn't hang
|
||||
# The client should suppress timeouts on `abort`s
|
||||
# and return normally, assuming the server will eventually
|
||||
# abort the request.
|
||||
client_task = asyncio.get_running_loop().create_task(
|
||||
client.abort("test request id"))
|
||||
with pytest.raises(TimeoutError, match="Server didn't reply within"):
|
||||
await asyncio.wait_for(client_task, timeout=0.05)
|
||||
await asyncio.wait_for(client_task, timeout=0.05)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -6,7 +6,7 @@ import os
|
||||
import re
|
||||
import tempfile
|
||||
from argparse import Namespace
|
||||
from contextlib import asynccontextmanager, suppress
|
||||
from contextlib import asynccontextmanager
|
||||
from http import HTTPStatus
|
||||
from typing import AsyncIterator, Optional, Set
|
||||
|
||||
@ -83,8 +83,7 @@ async def lifespan(app: FastAPI):
|
||||
async def _force_log():
|
||||
while True:
|
||||
await asyncio.sleep(10)
|
||||
with suppress(Exception):
|
||||
await async_engine_client.do_log_stats()
|
||||
await async_engine_client.do_log_stats()
|
||||
|
||||
if not engine_args.disable_log_stats:
|
||||
task = asyncio.create_task(_force_log())
|
||||
|
||||
@ -335,7 +335,18 @@ class AsyncEngineRPCClient:
|
||||
|
||||
async def abort(self, request_id: str):
|
||||
"""Send an ABORT_REQUEST signal to the RPC Server"""
|
||||
with suppress(RPCClientClosedError):
|
||||
|
||||
# Suppress timeouts as well.
|
||||
# In cases where the server is busy processing requests and a very
|
||||
# large volume of abort requests arrive, it is likely that the server
|
||||
# will not be able to ack all of them in time. We have seen this when
|
||||
# we abort 20k requests at once while another 2k are processing- many
|
||||
# of them time out, but we see the server successfully abort all of the
|
||||
# requests.
|
||||
# In this case we assume that the server has received or will receive
|
||||
# these abort requests, and ignore the timeout. This prevents a massive
|
||||
# wall of `TimeoutError` stack traces.
|
||||
with suppress(RPCClientClosedError, TimeoutError):
|
||||
await self._send_one_way_rpc_request(
|
||||
request=RPCAbortRequest(request_id),
|
||||
error_message=f"RPCAbortRequest {request_id} failed")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user