[BugFix] Add sleep to fix tight loop and release GIL (#29476)

Signed-off-by: alec-flowers <aflowers@nvidia.com> Signed-off-by: Alec <35311602+alec-flowers@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com>
2026-03-16 13:57:12 +08:00 · 2025-12-18 09:52:55 -08:00 · 2025-12-18 09:52:55 -08:00 · 62be3670cb
commit 62be3670cb
parent 500f26e6d3
1 changed files with 7 additions and 0 deletions
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -923,6 +923,13 @@ class EngineCoreProc(EngineCore):
        # Post-step hook.
        self.post_step(model_executed)

+        # If no model execution happened but there are waiting requests
+        # (e.g., WAITING_FOR_REMOTE_KVS), yield the GIL briefly to allow
+        # background threads (like NIXL handshake) to make progress.
+        # Without this, the tight polling loop can starve background threads.
+        if not model_executed and self.scheduler.has_unfinished_requests():
+            time.sleep(0.001)
+
        return model_executed

    def _handle_client_request(