mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-03 23:55:42 +08:00
[Metrics] Enable sleep state metric outside of dev mode (#27867)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
parent
14a125a06d
commit
380ba6816d
@ -9,7 +9,6 @@ from typing import TypeAlias
|
|||||||
|
|
||||||
from prometheus_client import Counter, Gauge, Histogram
|
from prometheus_client import Counter, Gauge, Histogram
|
||||||
|
|
||||||
import vllm.envs as envs
|
|
||||||
from vllm.config import SupportsMetricsInfo, VllmConfig
|
from vllm.config import SupportsMetricsInfo, VllmConfig
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
||||||
KVConnectorLogging,
|
KVConnectorLogging,
|
||||||
@ -395,32 +394,32 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
|
|||||||
self.gauge_scheduler_waiting = make_per_engine(
|
self.gauge_scheduler_waiting = make_per_engine(
|
||||||
gauge_scheduler_waiting, engine_indexes, model_name
|
gauge_scheduler_waiting, engine_indexes, model_name
|
||||||
)
|
)
|
||||||
if envs.VLLM_SERVER_DEV_MODE:
|
|
||||||
gauge_engine_sleep_state = self._gauge_cls(
|
|
||||||
name="vllm:engine_sleep_state",
|
|
||||||
documentation=(
|
|
||||||
"Engine sleep state; awake = 0 means engine is sleeping; "
|
|
||||||
"awake = 1 means engine is awake; "
|
|
||||||
"weights_offloaded = 1 means sleep level 1; "
|
|
||||||
"discard_all = 1 means sleep level 2."
|
|
||||||
),
|
|
||||||
labelnames=labelnames + ["sleep_state"],
|
|
||||||
multiprocess_mode="mostrecent",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.gauge_engine_sleep_state = {}
|
gauge_engine_sleep_state = self._gauge_cls(
|
||||||
sleep_state = ["awake", "weights_offloaded", "discard_all"]
|
name="vllm:engine_sleep_state",
|
||||||
|
documentation=(
|
||||||
|
"Engine sleep state; awake = 0 means engine is sleeping; "
|
||||||
|
"awake = 1 means engine is awake; "
|
||||||
|
"weights_offloaded = 1 means sleep level 1; "
|
||||||
|
"discard_all = 1 means sleep level 2."
|
||||||
|
),
|
||||||
|
labelnames=labelnames + ["sleep_state"],
|
||||||
|
multiprocess_mode="mostrecent",
|
||||||
|
)
|
||||||
|
|
||||||
for s in sleep_state:
|
self.gauge_engine_sleep_state = {}
|
||||||
self.gauge_engine_sleep_state[s] = {
|
sleep_state = ["awake", "weights_offloaded", "discard_all"]
|
||||||
idx: gauge_engine_sleep_state.labels(
|
|
||||||
engine=idx, model_name=model_name, sleep_state=s
|
|
||||||
)
|
|
||||||
for idx in engine_indexes
|
|
||||||
}
|
|
||||||
|
|
||||||
# Setting default values
|
for s in sleep_state:
|
||||||
self.record_sleep_state()
|
self.gauge_engine_sleep_state[s] = {
|
||||||
|
idx: gauge_engine_sleep_state.labels(
|
||||||
|
engine=idx, model_name=model_name, sleep_state=s
|
||||||
|
)
|
||||||
|
for idx in engine_indexes
|
||||||
|
}
|
||||||
|
|
||||||
|
# Setting default values
|
||||||
|
self.record_sleep_state()
|
||||||
|
|
||||||
# GPU cache
|
# GPU cache
|
||||||
#
|
#
|
||||||
@ -1052,9 +1051,6 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
|
|||||||
self.gauge_lora_info.labels(**lora_info_labels).set_to_current_time()
|
self.gauge_lora_info.labels(**lora_info_labels).set_to_current_time()
|
||||||
|
|
||||||
def record_sleep_state(self, sleep: int = 0, level: int = 0):
|
def record_sleep_state(self, sleep: int = 0, level: int = 0):
|
||||||
if not envs.VLLM_SERVER_DEV_MODE:
|
|
||||||
return
|
|
||||||
|
|
||||||
awake = 1
|
awake = 1
|
||||||
discard_all = 0
|
discard_all = 0
|
||||||
weights_offloaded = 0
|
weights_offloaded = 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user