mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:45:01 +08:00
[Misc] Don't log shm dequeue delay warning on worker side (#25720)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
9fe4c2bdb9
commit
8b77328ffe
@ -387,23 +387,21 @@ class MessageQueue:
|
|||||||
# Release the processor to other threads
|
# Release the processor to other threads
|
||||||
sched_yield()
|
sched_yield()
|
||||||
|
|
||||||
# if we wait for a long time, log a message
|
|
||||||
if (time.monotonic() - start_time
|
|
||||||
> VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning):
|
|
||||||
logger.info(
|
|
||||||
("No available shared memory broadcast block found"
|
|
||||||
" in %s seconds. This typically happens when some"
|
|
||||||
" processes are hanging, doing some time-consuming"
|
|
||||||
" work (e.g. compilation), or sitting idle."),
|
|
||||||
VLLM_RINGBUFFER_WARNING_INTERVAL,
|
|
||||||
)
|
|
||||||
n_warning += 1
|
|
||||||
|
|
||||||
# if we time out, raise an exception
|
# if we time out, raise an exception
|
||||||
if (timeout is not None
|
elapsed = time.monotonic() - start_time
|
||||||
and time.monotonic() - start_time > timeout):
|
if timeout is not None and elapsed > timeout:
|
||||||
raise TimeoutError
|
raise TimeoutError
|
||||||
|
|
||||||
|
# if we wait for a long time, log a message
|
||||||
|
if elapsed > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning:
|
||||||
|
logger.info(
|
||||||
|
"No available shared memory broadcast block found"
|
||||||
|
" in %s seconds. This typically happens when some"
|
||||||
|
" processes are hanging or doing some"
|
||||||
|
" time-consuming work (e.g. compilation)",
|
||||||
|
VLLM_RINGBUFFER_WARNING_INTERVAL)
|
||||||
|
n_warning += 1
|
||||||
|
|
||||||
continue
|
continue
|
||||||
# found a block that is either
|
# found a block that is either
|
||||||
# (1) not written
|
# (1) not written
|
||||||
@ -432,7 +430,8 @@ class MessageQueue:
|
|||||||
@contextmanager
|
@contextmanager
|
||||||
def acquire_read(self,
|
def acquire_read(self,
|
||||||
timeout: Optional[float] = None,
|
timeout: Optional[float] = None,
|
||||||
cancel: Optional[Event] = None):
|
cancel: Optional[Event] = None,
|
||||||
|
indefinite: bool = False):
|
||||||
assert self._is_local_reader, "Only readers can acquire read"
|
assert self._is_local_reader, "Only readers can acquire read"
|
||||||
start_time = time.monotonic()
|
start_time = time.monotonic()
|
||||||
n_warning = 1
|
n_warning = 1
|
||||||
@ -452,26 +451,26 @@ class MessageQueue:
|
|||||||
# Release the processor to other threads
|
# Release the processor to other threads
|
||||||
self._read_spin_timer.spin()
|
self._read_spin_timer.spin()
|
||||||
|
|
||||||
# if we wait for a long time, log a message
|
|
||||||
if (time.monotonic() - start_time
|
|
||||||
> VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning):
|
|
||||||
logger.info(
|
|
||||||
("No available shared memory broadcast block found"
|
|
||||||
" in %s seconds. This typically happens when some"
|
|
||||||
" processes are hanging, doing some time-consuming"
|
|
||||||
" work (e.g. compilation), or sitting idle."),
|
|
||||||
VLLM_RINGBUFFER_WARNING_INTERVAL,
|
|
||||||
)
|
|
||||||
n_warning += 1
|
|
||||||
|
|
||||||
if cancel is not None and cancel.is_set():
|
if cancel is not None and cancel.is_set():
|
||||||
raise RuntimeError("cancelled")
|
raise RuntimeError("cancelled")
|
||||||
|
|
||||||
# if we time out, raise an exception
|
# if we time out, raise an exception
|
||||||
if (timeout is not None
|
elapsed = time.monotonic() - start_time
|
||||||
and time.monotonic() - start_time > timeout):
|
if timeout is not None and elapsed > timeout:
|
||||||
raise TimeoutError
|
raise TimeoutError
|
||||||
|
|
||||||
|
# if we wait for a long time, log a message
|
||||||
|
if not indefinite and (elapsed
|
||||||
|
> VLLM_RINGBUFFER_WARNING_INTERVAL *
|
||||||
|
n_warning):
|
||||||
|
logger.info(
|
||||||
|
"No available shared memory broadcast block found"
|
||||||
|
" in %s seconds. This typically happens when some"
|
||||||
|
" processes are hanging or doing some"
|
||||||
|
" time-consuming work (e.g. compilation).",
|
||||||
|
VLLM_RINGBUFFER_WARNING_INTERVAL)
|
||||||
|
n_warning += 1
|
||||||
|
|
||||||
continue
|
continue
|
||||||
# found a block that is not read by this reader
|
# found a block that is not read by this reader
|
||||||
# let caller read from the buffer
|
# let caller read from the buffer
|
||||||
@ -505,10 +504,11 @@ class MessageQueue:
|
|||||||
|
|
||||||
def dequeue(self,
|
def dequeue(self,
|
||||||
timeout: Optional[float] = None,
|
timeout: Optional[float] = None,
|
||||||
cancel: Optional[Event] = None):
|
cancel: Optional[Event] = None,
|
||||||
|
indefinite: bool = False):
|
||||||
""" Read from message queue with optional timeout (in seconds) """
|
""" Read from message queue with optional timeout (in seconds) """
|
||||||
if self._is_local_reader:
|
if self._is_local_reader:
|
||||||
with self.acquire_read(timeout, cancel) as buf:
|
with self.acquire_read(timeout, cancel, indefinite) as buf:
|
||||||
overflow = buf[0] == 1
|
overflow = buf[0] == 1
|
||||||
if not overflow:
|
if not overflow:
|
||||||
# no need to know the size of serialized object
|
# no need to know the size of serialized object
|
||||||
|
|||||||
@ -653,7 +653,7 @@ class WorkerProc:
|
|||||||
"""Main busy loop for Multiprocessing Workers"""
|
"""Main busy loop for Multiprocessing Workers"""
|
||||||
while True:
|
while True:
|
||||||
method, args, kwargs, output_rank = self.rpc_broadcast_mq.dequeue(
|
method, args, kwargs, output_rank = self.rpc_broadcast_mq.dequeue(
|
||||||
cancel=cancel)
|
cancel=cancel, indefinite=True)
|
||||||
try:
|
try:
|
||||||
if isinstance(method, str):
|
if isinstance(method, str):
|
||||||
func = getattr(self.worker, method)
|
func = getattr(self.worker, method)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user