From 6e4cea1cc56da6d4a558a50196c5698c36385890 Mon Sep 17 00:00:00 2001 From: daniel-salib Date: Wed, 28 May 2025 07:15:12 -0700 Subject: [PATCH] decrement server_load on listen for disconnect (#18784) Signed-off-by: Daniel Salib --- vllm/entrypoints/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/entrypoints/utils.py b/vllm/entrypoints/utils.py index cc651a172b40..1b0ea69096cc 100644 --- a/vllm/entrypoints/utils.py +++ b/vllm/entrypoints/utils.py @@ -26,6 +26,11 @@ async def listen_for_disconnect(request: Request) -> None: while True: message = await request.receive() if message["type"] == "http.disconnect": + if request.app.state.enable_server_load_tracking: + # on timeout/cancellation the BackgroundTask in load_aware_call + # cannot decrement the server load metrics. + # Must be decremented by with_cancellation instead. + request.app.state.server_load_metrics -= 1 break