[Core] Use os.sched_yield in ShmRingBuffer instead of time.sleep (#9994)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith 2024-11-04 20:08:21 -05:00 committed by GitHub
parent 8f0a9ca890
commit 04bbf38e05
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,3 +1,4 @@
import os
import pickle
import time
from contextlib import contextmanager
@ -18,12 +19,6 @@ from vllm.utils import get_ip, get_open_port, is_valid_ipv6_address
VLLM_RINGBUFFER_WARNING_INTERVAL = envs.VLLM_RINGBUFFER_WARNING_INTERVAL
# time to wait if the queue is full or empty
# if we sleep for too short, it will consume too much CPU
# if we sleep for too long, it will slow down the writer/reader
# 0.1 us is a good balance
RINGBUFFER_SLEEP_INTERVAL = 1e-7
logger = init_logger(__name__)
@ -333,8 +328,8 @@ class MessageQueue:
# if this block is not ready to write,
# we need to wait until it is read by all readers
# wait for a while
time.sleep(RINGBUFFER_SLEEP_INTERVAL)
# Release the processor to other threads
os.sched_yield()
# if we wait for a long time, we should warn the user
if (time.monotonic() - start_time >
@ -387,8 +382,8 @@ class MessageQueue:
# if this block is not ready,
# we need to wait until it is written
# wait for a while
time.sleep(RINGBUFFER_SLEEP_INTERVAL)
# Release the processor to other threads
os.sched_yield()
# if we wait for a long time, we should warn the user
if (time.monotonic() - start_time >