From 380ba6816d4646be99d9b6d207ba7bc7fce8290e Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Tue, 4 Nov 2025 04:35:36 +0000 Subject: [PATCH] [Metrics] Enable sleep state metric outside of dev mode (#27867) Signed-off-by: Mark McLoughlin --- vllm/v1/metrics/loggers.py | 50 ++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index 67b6ceaa847f6..e85f85bfb0aab 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -9,7 +9,6 @@ from typing import TypeAlias from prometheus_client import Counter, Gauge, Histogram -import vllm.envs as envs from vllm.config import SupportsMetricsInfo, VllmConfig from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( KVConnectorLogging, @@ -395,32 +394,32 @@ class PrometheusStatLogger(AggregateStatLoggerBase): self.gauge_scheduler_waiting = make_per_engine( gauge_scheduler_waiting, engine_indexes, model_name ) - if envs.VLLM_SERVER_DEV_MODE: - gauge_engine_sleep_state = self._gauge_cls( - name="vllm:engine_sleep_state", - documentation=( - "Engine sleep state; awake = 0 means engine is sleeping; " - "awake = 1 means engine is awake; " - "weights_offloaded = 1 means sleep level 1; " - "discard_all = 1 means sleep level 2." - ), - labelnames=labelnames + ["sleep_state"], - multiprocess_mode="mostrecent", - ) - self.gauge_engine_sleep_state = {} - sleep_state = ["awake", "weights_offloaded", "discard_all"] + gauge_engine_sleep_state = self._gauge_cls( + name="vllm:engine_sleep_state", + documentation=( + "Engine sleep state; awake = 0 means engine is sleeping; " + "awake = 1 means engine is awake; " + "weights_offloaded = 1 means sleep level 1; " + "discard_all = 1 means sleep level 2." + ), + labelnames=labelnames + ["sleep_state"], + multiprocess_mode="mostrecent", + ) - for s in sleep_state: - self.gauge_engine_sleep_state[s] = { - idx: gauge_engine_sleep_state.labels( - engine=idx, model_name=model_name, sleep_state=s - ) - for idx in engine_indexes - } + self.gauge_engine_sleep_state = {} + sleep_state = ["awake", "weights_offloaded", "discard_all"] - # Setting default values - self.record_sleep_state() + for s in sleep_state: + self.gauge_engine_sleep_state[s] = { + idx: gauge_engine_sleep_state.labels( + engine=idx, model_name=model_name, sleep_state=s + ) + for idx in engine_indexes + } + + # Setting default values + self.record_sleep_state() # GPU cache # @@ -1052,9 +1051,6 @@ class PrometheusStatLogger(AggregateStatLoggerBase): self.gauge_lora_info.labels(**lora_info_labels).set_to_current_time() def record_sleep_state(self, sleep: int = 0, level: int = 0): - if not envs.VLLM_SERVER_DEV_MODE: - return - awake = 1 discard_all = 0 weights_offloaded = 0