[Metrics] Enable sleep state metric outside of dev mode (#27867)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin 2025-11-04 04:35:36 +00:00 committed by GitHub
parent 14a125a06d
commit 380ba6816d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -9,7 +9,6 @@ from typing import TypeAlias
from prometheus_client import Counter, Gauge, Histogram from prometheus_client import Counter, Gauge, Histogram
import vllm.envs as envs
from vllm.config import SupportsMetricsInfo, VllmConfig from vllm.config import SupportsMetricsInfo, VllmConfig
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
KVConnectorLogging, KVConnectorLogging,
@ -395,32 +394,32 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
self.gauge_scheduler_waiting = make_per_engine( self.gauge_scheduler_waiting = make_per_engine(
gauge_scheduler_waiting, engine_indexes, model_name gauge_scheduler_waiting, engine_indexes, model_name
) )
if envs.VLLM_SERVER_DEV_MODE:
gauge_engine_sleep_state = self._gauge_cls(
name="vllm:engine_sleep_state",
documentation=(
"Engine sleep state; awake = 0 means engine is sleeping; "
"awake = 1 means engine is awake; "
"weights_offloaded = 1 means sleep level 1; "
"discard_all = 1 means sleep level 2."
),
labelnames=labelnames + ["sleep_state"],
multiprocess_mode="mostrecent",
)
self.gauge_engine_sleep_state = {} gauge_engine_sleep_state = self._gauge_cls(
sleep_state = ["awake", "weights_offloaded", "discard_all"] name="vllm:engine_sleep_state",
documentation=(
"Engine sleep state; awake = 0 means engine is sleeping; "
"awake = 1 means engine is awake; "
"weights_offloaded = 1 means sleep level 1; "
"discard_all = 1 means sleep level 2."
),
labelnames=labelnames + ["sleep_state"],
multiprocess_mode="mostrecent",
)
for s in sleep_state: self.gauge_engine_sleep_state = {}
self.gauge_engine_sleep_state[s] = { sleep_state = ["awake", "weights_offloaded", "discard_all"]
idx: gauge_engine_sleep_state.labels(
engine=idx, model_name=model_name, sleep_state=s
)
for idx in engine_indexes
}
# Setting default values for s in sleep_state:
self.record_sleep_state() self.gauge_engine_sleep_state[s] = {
idx: gauge_engine_sleep_state.labels(
engine=idx, model_name=model_name, sleep_state=s
)
for idx in engine_indexes
}
# Setting default values
self.record_sleep_state()
# GPU cache # GPU cache
# #
@ -1052,9 +1051,6 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
self.gauge_lora_info.labels(**lora_info_labels).set_to_current_time() self.gauge_lora_info.labels(**lora_info_labels).set_to_current_time()
def record_sleep_state(self, sleep: int = 0, level: int = 0): def record_sleep_state(self, sleep: int = 0, level: int = 0):
if not envs.VLLM_SERVER_DEV_MODE:
return
awake = 1 awake = 1
discard_all = 0 discard_all = 0
weights_offloaded = 0 weights_offloaded = 0