From 62be3670cb97e7196c30be26fe347d12d183429c Mon Sep 17 00:00:00 2001 From: Alec <35311602+alec-flowers@users.noreply.github.com> Date: Thu, 18 Dec 2025 09:52:55 -0800 Subject: [PATCH] [BugFix] Add sleep to fix tight loop and release GIL (#29476) Signed-off-by: alec-flowers Signed-off-by: Alec <35311602+alec-flowers@users.noreply.github.com> Co-authored-by: Nick Hill --- vllm/v1/engine/core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 0045b8c1dd3e7..9e2571201a684 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -923,6 +923,13 @@ class EngineCoreProc(EngineCore): # Post-step hook. self.post_step(model_executed) + # If no model execution happened but there are waiting requests + # (e.g., WAITING_FOR_REMOTE_KVS), yield the GIL briefly to allow + # background threads (like NIXL handshake) to make progress. + # Without this, the tight polling loop can starve background threads. + if not model_executed and self.scheduler.has_unfinished_requests(): + time.sleep(0.001) + return model_executed def _handle_client_request(