[Log] update shm wait time msg (#28255)

This commit is contained in:
Boyuan Feng 2025-11-07 06:43:30 -08:00 committed by GitHub
parent 4b1ff13221
commit 0f872b7977
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -49,6 +49,16 @@ def to_bytes_big(value: int, size: int) -> bytes:
logger = init_logger(__name__) logger = init_logger(__name__)
def long_wait_time_msg(threshold: int) -> str:
return (
"No available shared memory broadcast block found "
f"in {threshold} seconds. This typically happens "
"when some processes are hanging or doing some "
"time-consuming work (e.g. compilation, "
"weight/kv cache quantization)."
)
class SpinTimer: class SpinTimer:
def record_activity(self): def record_activity(self):
pass pass
@ -422,11 +432,7 @@ class MessageQueue:
# if we wait for a long time, log a message # if we wait for a long time, log a message
if elapsed > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning: if elapsed > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning:
logger.info( logger.info(
"No available shared memory broadcast block found" long_wait_time_msg(VLLM_RINGBUFFER_WARNING_INTERVAL)
" in %s seconds. This typically happens when some"
" processes are hanging or doing some"
" time-consuming work (e.g. compilation)",
VLLM_RINGBUFFER_WARNING_INTERVAL,
) )
n_warning += 1 n_warning += 1
@ -493,11 +499,7 @@ class MessageQueue:
elapsed > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning elapsed > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning
): ):
logger.info( logger.info(
"No available shared memory broadcast block found" long_wait_time_msg(VLLM_RINGBUFFER_WARNING_INTERVAL)
" in %s seconds. This typically happens when some"
" processes are hanging or doing some"
" time-consuming work (e.g. compilation).",
VLLM_RINGBUFFER_WARNING_INTERVAL,
) )
n_warning += 1 n_warning += 1