mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-08 05:15:42 +08:00
[KVConnector] Add metrics to Prometheus-Grafana dashboard (#26811)
Signed-off-by: NickLucche <nlucches@redhat.com> Signed-off-by: Mark McLoughlin <markmc@redhat.com> Co-authored-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
parent
5b0448104f
commit
accb8fab07
@ -50,7 +50,12 @@ if TYPE_CHECKING:
|
|||||||
from vllm.attention.backends.abstract import AttentionMetadata
|
from vllm.attention.backends.abstract import AttentionMetadata
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.distributed.kv_events import KVCacheEvent
|
from vllm.distributed.kv_events import KVCacheEvent
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
||||||
|
KVConnectorPromMetrics,
|
||||||
|
KVConnectorStats,
|
||||||
|
PromMetric,
|
||||||
|
PromMetricT,
|
||||||
|
)
|
||||||
from vllm.forward_context import ForwardContext
|
from vllm.forward_context import ForwardContext
|
||||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||||
from vllm.v1.request import Request
|
from vllm.v1.request import Request
|
||||||
@ -471,3 +476,18 @@ class KVConnectorBase_V1(ABC):
|
|||||||
which can implement custom aggregation logic on the data dict.
|
which can implement custom aggregation logic on the data dict.
|
||||||
"""
|
"""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build_prom_metrics(
|
||||||
|
cls,
|
||||||
|
vllm_config: "VllmConfig",
|
||||||
|
metric_types: dict[type["PromMetric"], type["PromMetricT"]],
|
||||||
|
labelnames: list[str],
|
||||||
|
per_engine_labelvalues: dict[int, list[str]],
|
||||||
|
) -> Optional["KVConnectorPromMetrics"]:
|
||||||
|
"""
|
||||||
|
Create a KVConnectorPromMetrics subclass which should register
|
||||||
|
per-connector Prometheus metrics and implement observe() to
|
||||||
|
expose connector transfer stats via Prometheus.
|
||||||
|
"""
|
||||||
|
return None
|
||||||
|
|||||||
@ -1,13 +1,18 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any, TypeAlias, TypeVar
|
||||||
|
|
||||||
from vllm.config.kv_transfer import KVTransferConfig
|
from prometheus_client import Counter, Gauge, Histogram
|
||||||
|
|
||||||
|
from vllm.config import KVTransferConfig, VllmConfig
|
||||||
from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
|
from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
|
||||||
from vllm.distributed.kv_transfer.kv_transfer_state import has_kv_transfer_group
|
from vllm.distributed.kv_transfer.kv_transfer_state import has_kv_transfer_group
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
|
PromMetric: TypeAlias = Gauge | Counter | Histogram
|
||||||
|
PromMetricT = TypeVar("PromMetricT", bound=PromMetric)
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -102,3 +107,83 @@ class KVConnectorLogging:
|
|||||||
|
|
||||||
# Reset metrics for next interval
|
# Reset metrics for next interval
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
|
|
||||||
|
class KVConnectorPromMetrics:
|
||||||
|
"""
|
||||||
|
A base class for per-connector Prometheus metric registration
|
||||||
|
and recording.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vllm_config: VllmConfig,
|
||||||
|
metric_types: dict[type[PromMetric], type[PromMetricT]],
|
||||||
|
labelnames: list[str],
|
||||||
|
per_engine_labelvalues: dict[int, list[str]],
|
||||||
|
):
|
||||||
|
self._kv_transfer_config = vllm_config.kv_transfer_config
|
||||||
|
self._gauge_cls = metric_types[Gauge]
|
||||||
|
self._counter_cls = metric_types[Counter]
|
||||||
|
self._histogram_cls = metric_types[Histogram]
|
||||||
|
self._labelnames = labelnames
|
||||||
|
self._per_engine_labelvalues = per_engine_labelvalues
|
||||||
|
|
||||||
|
def make_per_engine(self, metric: PromMetric) -> PromMetric:
|
||||||
|
"""
|
||||||
|
Create a per-engine child of a prometheus_client.Metric with
|
||||||
|
the appropriate labels set. The parent metric must be created
|
||||||
|
using the labelnames list.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
idx: metric.labels(*labelvalues)
|
||||||
|
for idx, labelvalues in self._per_engine_labelvalues.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
|
||||||
|
"""
|
||||||
|
Record the supplied transfer statistics to Prometheus metrics. These
|
||||||
|
statistics are engine-specific, and should be recorded to a metric
|
||||||
|
with the appropriate 'engine' label. These metric instances can be
|
||||||
|
created using the make_per_engine() helper method.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class KVConnectorPrometheus:
|
||||||
|
"""
|
||||||
|
Support for registering per-connector Prometheus metrics, and
|
||||||
|
recording transfer statistics to those metrics. Uses
|
||||||
|
KVConnectorBase.build_prom_metrics().
|
||||||
|
"""
|
||||||
|
|
||||||
|
_gauge_cls = Gauge
|
||||||
|
_counter_cls = Counter
|
||||||
|
_histogram_cls = Histogram
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vllm_config: VllmConfig,
|
||||||
|
labelnames: list[str],
|
||||||
|
per_engine_labelvalues: dict[int, list[str]],
|
||||||
|
):
|
||||||
|
self.prom_metrics: KVConnectorPromMetrics | None = None
|
||||||
|
kv_transfer_config = vllm_config.kv_transfer_config
|
||||||
|
if kv_transfer_config and kv_transfer_config.kv_connector:
|
||||||
|
connector_cls = KVConnectorFactory.get_connector_class(kv_transfer_config)
|
||||||
|
metric_types = {
|
||||||
|
Gauge: self._gauge_cls,
|
||||||
|
Counter: self._counter_cls,
|
||||||
|
Histogram: self._histogram_cls,
|
||||||
|
}
|
||||||
|
self.prom_metrics = connector_cls.build_prom_metrics(
|
||||||
|
vllm_config,
|
||||||
|
metric_types,
|
||||||
|
labelnames,
|
||||||
|
per_engine_labelvalues,
|
||||||
|
)
|
||||||
|
|
||||||
|
def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
|
||||||
|
if self.prom_metrics is None:
|
||||||
|
return
|
||||||
|
self.prom_metrics.observe(transfer_stats_data, engine_idx)
|
||||||
|
|||||||
@ -9,13 +9,19 @@ import torch
|
|||||||
|
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.config.kv_transfer import KVTransferConfig
|
from vllm.config.kv_transfer import KVTransferConfig
|
||||||
|
from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBaseType
|
||||||
from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
|
from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||||
KVConnectorBase_V1,
|
KVConnectorBase_V1,
|
||||||
KVConnectorMetadata,
|
KVConnectorMetadata,
|
||||||
KVConnectorRole,
|
KVConnectorRole,
|
||||||
)
|
)
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
||||||
|
KVConnectorPromMetrics,
|
||||||
|
KVConnectorStats,
|
||||||
|
PromMetric,
|
||||||
|
PromMetricT,
|
||||||
|
)
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.v1.core.sched.output import SchedulerOutput
|
from vllm.v1.core.sched.output import SchedulerOutput
|
||||||
from vllm.v1.outputs import KVConnectorOutput
|
from vllm.v1.outputs import KVConnectorOutput
|
||||||
@ -72,6 +78,27 @@ class MultiKVConnectorStats(KVConnectorStats):
|
|||||||
self.data[connector_id] = stats
|
self.data[connector_id] = stats
|
||||||
|
|
||||||
|
|
||||||
|
class MultiKVConnectorPromMetrics(KVConnectorPromMetrics):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vllm_config: "VllmConfig",
|
||||||
|
metric_types: dict[type[PromMetric], type[PromMetricT]],
|
||||||
|
labelnames: list[str],
|
||||||
|
per_engine_labelvalues: dict[int, list[str]],
|
||||||
|
prom_metrics: dict[str, KVConnectorPromMetrics],
|
||||||
|
):
|
||||||
|
super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues)
|
||||||
|
self._prom_metrics = prom_metrics
|
||||||
|
|
||||||
|
def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
|
||||||
|
for connector_id, stats_data in transfer_stats_data.items():
|
||||||
|
assert connector_id in self._prom_metrics, (
|
||||||
|
f"{connector_id} is not contained in the list of registered connectors "
|
||||||
|
f"with Prometheus metrics support: {self._prom_metrics.keys()}"
|
||||||
|
)
|
||||||
|
self._prom_metrics[connector_id].observe(stats_data["data"], engine_idx)
|
||||||
|
|
||||||
|
|
||||||
class MultiConnector(KVConnectorBase_V1):
|
class MultiConnector(KVConnectorBase_V1):
|
||||||
"""
|
"""
|
||||||
A wrapper for using multiple KVConnectors at the same time.
|
A wrapper for using multiple KVConnectors at the same time.
|
||||||
@ -84,19 +111,13 @@ class MultiConnector(KVConnectorBase_V1):
|
|||||||
|
|
||||||
def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
|
def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
|
||||||
super().__init__(vllm_config=vllm_config, role=role)
|
super().__init__(vllm_config=vllm_config, role=role)
|
||||||
|
|
||||||
self._connectors: list[KVConnectorBase_V1] = []
|
self._connectors: list[KVConnectorBase_V1] = []
|
||||||
self._ktc_kv_transfer_config = []
|
self._ktc_kv_transfer_config = []
|
||||||
ktcs = self._kv_transfer_config.kv_connector_extra_config.get("connectors")
|
for connector_cls, temp_config in self._get_connector_classes_and_configs(
|
||||||
assert ktcs is not None
|
vllm_config
|
||||||
for ktc in ktcs:
|
):
|
||||||
temp_config = copy.copy(vllm_config)
|
self._connectors.append(connector_cls(temp_config, role))
|
||||||
engine_id = ktc.get("engine_id", self._kv_transfer_config.engine_id)
|
|
||||||
temp_config.kv_transfer_config = KVTransferConfig(
|
|
||||||
**ktc, engine_id=engine_id
|
|
||||||
)
|
|
||||||
self._connectors.append(
|
|
||||||
KVConnectorFactory.create_connector(temp_config, role)
|
|
||||||
)
|
|
||||||
self._ktc_kv_transfer_config.append(temp_config.kv_transfer_config)
|
self._ktc_kv_transfer_config.append(temp_config.kv_transfer_config)
|
||||||
|
|
||||||
# A mapping from request id to the index of the connector chosen to
|
# A mapping from request id to the index of the connector chosen to
|
||||||
@ -109,6 +130,32 @@ class MultiConnector(KVConnectorBase_V1):
|
|||||||
# Propagated from scheduler to worker side via the connector metadata.
|
# Propagated from scheduler to worker side via the connector metadata.
|
||||||
self._extra_async_saves: dict[str, int] = {}
|
self._extra_async_saves: dict[str, int] = {}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_connector_classes_and_configs(
|
||||||
|
cls, vllm_config: "VllmConfig"
|
||||||
|
) -> list[tuple[type[KVConnectorBaseType], "VllmConfig"]]:
|
||||||
|
assert vllm_config.kv_transfer_config is not None
|
||||||
|
ktcs = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
|
||||||
|
"connectors"
|
||||||
|
)
|
||||||
|
assert ktcs is not None
|
||||||
|
ret: list[tuple[type[KVConnectorBaseType], VllmConfig]] = []
|
||||||
|
for ktc in ktcs:
|
||||||
|
temp_config = copy.copy(vllm_config)
|
||||||
|
engine_id = ktc.get("engine_id", vllm_config.kv_transfer_config.engine_id)
|
||||||
|
temp_config.kv_transfer_config = KVTransferConfig(
|
||||||
|
**ktc, engine_id=engine_id
|
||||||
|
)
|
||||||
|
ret.append(
|
||||||
|
(
|
||||||
|
KVConnectorFactory.get_connector_class(
|
||||||
|
temp_config.kv_transfer_config
|
||||||
|
),
|
||||||
|
temp_config,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return ret
|
||||||
|
|
||||||
def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
|
def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
|
||||||
for c in self._connectors:
|
for c in self._connectors:
|
||||||
c.register_kv_caches(kv_caches)
|
c.register_kv_caches(kv_caches)
|
||||||
@ -295,18 +342,12 @@ class MultiConnector(KVConnectorBase_V1):
|
|||||||
None if the connector does not require a specific layout.
|
None if the connector does not require a specific layout.
|
||||||
"""
|
"""
|
||||||
assert vllm_config.kv_transfer_config is not None
|
assert vllm_config.kv_transfer_config is not None
|
||||||
ktcs = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
|
|
||||||
"connectors"
|
|
||||||
)
|
|
||||||
assert ktcs is not None
|
|
||||||
layouts: set[str] = set()
|
layouts: set[str] = set()
|
||||||
temp_vllm_config = copy.copy(vllm_config)
|
for connector_cls, temp_config in cls._get_connector_classes_and_configs(
|
||||||
for ktc in ktcs:
|
vllm_config
|
||||||
kv_transfer_config = KVTransferConfig(**ktc)
|
):
|
||||||
temp_vllm_config.kv_transfer_config = kv_transfer_config
|
|
||||||
connector_cls = KVConnectorFactory.get_connector_class(kv_transfer_config)
|
|
||||||
required_kvcache_layout = connector_cls.get_required_kvcache_layout(
|
required_kvcache_layout = connector_cls.get_required_kvcache_layout(
|
||||||
temp_vllm_config
|
temp_config
|
||||||
)
|
)
|
||||||
if required_kvcache_layout is not None:
|
if required_kvcache_layout is not None:
|
||||||
layouts.add(required_kvcache_layout)
|
layouts.add(required_kvcache_layout)
|
||||||
@ -372,3 +413,28 @@ class MultiConnector(KVConnectorBase_V1):
|
|||||||
stats_by_connector = MultiKVConnectorStats()
|
stats_by_connector = MultiKVConnectorStats()
|
||||||
stats_by_connector[c.__class__.__name__] = stats
|
stats_by_connector[c.__class__.__name__] = stats
|
||||||
return stats_by_connector
|
return stats_by_connector
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build_prom_metrics(
|
||||||
|
cls,
|
||||||
|
vllm_config: "VllmConfig",
|
||||||
|
metric_types: dict[type["PromMetric"], type["PromMetricT"]],
|
||||||
|
labelnames: list[str],
|
||||||
|
per_engine_labelvalues: dict[int, list[str]],
|
||||||
|
) -> KVConnectorPromMetrics:
|
||||||
|
prom_metrics: dict[str, KVConnectorPromMetrics] = {}
|
||||||
|
for connector_cls, temp_config in cls._get_connector_classes_and_configs(
|
||||||
|
vllm_config
|
||||||
|
):
|
||||||
|
connector_prom = connector_cls.build_prom_metrics(
|
||||||
|
temp_config, metric_types, labelnames, per_engine_labelvalues
|
||||||
|
)
|
||||||
|
if connector_prom is not None:
|
||||||
|
prom_metrics[connector_cls.__name__] = connector_prom
|
||||||
|
return MultiKVConnectorPromMetrics(
|
||||||
|
vllm_config,
|
||||||
|
metric_types,
|
||||||
|
labelnames,
|
||||||
|
per_engine_labelvalues,
|
||||||
|
prom_metrics,
|
||||||
|
)
|
||||||
|
|||||||
@ -30,7 +30,12 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
|||||||
KVConnectorMetadata,
|
KVConnectorMetadata,
|
||||||
KVConnectorRole,
|
KVConnectorRole,
|
||||||
)
|
)
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
||||||
|
KVConnectorPromMetrics,
|
||||||
|
KVConnectorStats,
|
||||||
|
PromMetric,
|
||||||
|
PromMetricT,
|
||||||
|
)
|
||||||
from vllm.distributed.parallel_state import (
|
from vllm.distributed.parallel_state import (
|
||||||
get_tensor_model_parallel_rank,
|
get_tensor_model_parallel_rank,
|
||||||
get_tensor_model_parallel_world_size,
|
get_tensor_model_parallel_world_size,
|
||||||
@ -254,6 +259,18 @@ class NixlConnector(KVConnectorBase_V1):
|
|||||||
else NixlKVConnectorStats()
|
else NixlKVConnectorStats()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build_prom_metrics(
|
||||||
|
cls,
|
||||||
|
vllm_config: VllmConfig,
|
||||||
|
metric_types: dict[type[PromMetric], type[PromMetricT]],
|
||||||
|
labelnames: list[str],
|
||||||
|
per_engine_labelvalues: dict[int, list[str]],
|
||||||
|
) -> KVConnectorPromMetrics:
|
||||||
|
return NixlPromMetrics(
|
||||||
|
vllm_config, metric_types, labelnames, per_engine_labelvalues
|
||||||
|
)
|
||||||
|
|
||||||
def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
|
def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
|
||||||
assert self.connector_worker is not None
|
assert self.connector_worker is not None
|
||||||
assert isinstance(self._connector_metadata, NixlConnectorMetadata)
|
assert isinstance(self._connector_metadata, NixlConnectorMetadata)
|
||||||
@ -1960,3 +1977,125 @@ class NixlKVConnectorStats(KVConnectorStats):
|
|||||||
@property
|
@property
|
||||||
def num_successful_transfers(self) -> int:
|
def num_successful_transfers(self) -> int:
|
||||||
return len(self.data["transfer_duration"])
|
return len(self.data["transfer_duration"])
|
||||||
|
|
||||||
|
|
||||||
|
class NixlPromMetrics(KVConnectorPromMetrics):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vllm_config: VllmConfig,
|
||||||
|
metric_types: dict[type[PromMetric], type[PromMetricT]],
|
||||||
|
labelnames: list[str],
|
||||||
|
per_engine_labelvalues: dict[int, list[str]],
|
||||||
|
):
|
||||||
|
super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues)
|
||||||
|
|
||||||
|
buckets = [
|
||||||
|
0.001,
|
||||||
|
0.005,
|
||||||
|
0.01,
|
||||||
|
0.025,
|
||||||
|
0.05,
|
||||||
|
0.075,
|
||||||
|
0.1,
|
||||||
|
0.2,
|
||||||
|
0.3,
|
||||||
|
0.5,
|
||||||
|
0.75,
|
||||||
|
1.0,
|
||||||
|
5.0,
|
||||||
|
]
|
||||||
|
nixl_histogram_xfer_time = self._histogram_cls(
|
||||||
|
name="vllm:nixl_xfer_time_seconds",
|
||||||
|
documentation="Histogram of transfer duration for NIXL KV Cache transfers.",
|
||||||
|
buckets=buckets[1:],
|
||||||
|
labelnames=labelnames,
|
||||||
|
)
|
||||||
|
self.nixl_histogram_xfer_time = self.make_per_engine(nixl_histogram_xfer_time)
|
||||||
|
nixl_histogram_post_time = self._histogram_cls(
|
||||||
|
name="vllm:nixl_post_time_seconds",
|
||||||
|
documentation="Histogram of transfer post time for NIXL KV"
|
||||||
|
" Cache transfers.",
|
||||||
|
buckets=buckets,
|
||||||
|
labelnames=labelnames,
|
||||||
|
)
|
||||||
|
self.nixl_histogram_post_time = self.make_per_engine(nixl_histogram_post_time)
|
||||||
|
# uniform 2kb to 16gb range
|
||||||
|
buckets = [2 ** (10 + i) for i in range(1, 25, 2)]
|
||||||
|
nixl_histogram_bytes_transferred = self._histogram_cls(
|
||||||
|
name="vllm:nixl_bytes_transferred",
|
||||||
|
documentation="Histogram of bytes transferred per NIXL KV Cache transfers.",
|
||||||
|
buckets=buckets,
|
||||||
|
labelnames=labelnames,
|
||||||
|
)
|
||||||
|
self.nixl_histogram_bytes_transferred = self.make_per_engine(
|
||||||
|
nixl_histogram_bytes_transferred
|
||||||
|
)
|
||||||
|
buckets = [
|
||||||
|
10,
|
||||||
|
20,
|
||||||
|
30,
|
||||||
|
50,
|
||||||
|
75,
|
||||||
|
100,
|
||||||
|
200,
|
||||||
|
400,
|
||||||
|
1000,
|
||||||
|
2000,
|
||||||
|
4000,
|
||||||
|
10000,
|
||||||
|
20000,
|
||||||
|
50000,
|
||||||
|
]
|
||||||
|
nixl_histogram_num_descriptors = self._histogram_cls(
|
||||||
|
name="vllm:nixl_num_descriptors",
|
||||||
|
documentation="Histogram of number of descriptors per NIXL"
|
||||||
|
" KV Cache transfers.",
|
||||||
|
buckets=buckets,
|
||||||
|
labelnames=labelnames,
|
||||||
|
)
|
||||||
|
self.nixl_histogram_num_descriptors = self.make_per_engine(
|
||||||
|
nixl_histogram_num_descriptors
|
||||||
|
)
|
||||||
|
counter_nixl_num_failed_transfers = self._counter_cls(
|
||||||
|
name="vllm:nixl_num_failed_transfers",
|
||||||
|
documentation="Number of failed NIXL KV Cache transfers.",
|
||||||
|
labelnames=labelnames,
|
||||||
|
)
|
||||||
|
self.counter_nixl_num_failed_transfers = self.make_per_engine(
|
||||||
|
counter_nixl_num_failed_transfers
|
||||||
|
)
|
||||||
|
counter_nixl_num_failed_notifications = self._counter_cls(
|
||||||
|
name="vllm:nixl_num_failed_notifications",
|
||||||
|
documentation="Number of failed NIXL KV Cache notifications.",
|
||||||
|
labelnames=labelnames,
|
||||||
|
)
|
||||||
|
self.counter_nixl_num_failed_notifications = self.make_per_engine(
|
||||||
|
counter_nixl_num_failed_notifications
|
||||||
|
)
|
||||||
|
|
||||||
|
def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
|
||||||
|
for prom_obj, list_item_key in zip(
|
||||||
|
[
|
||||||
|
self.nixl_histogram_xfer_time,
|
||||||
|
self.nixl_histogram_post_time,
|
||||||
|
self.nixl_histogram_bytes_transferred,
|
||||||
|
self.nixl_histogram_num_descriptors,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"transfer_duration",
|
||||||
|
"post_duration",
|
||||||
|
"bytes_transferred",
|
||||||
|
"num_descriptors",
|
||||||
|
],
|
||||||
|
):
|
||||||
|
for list_item in transfer_stats_data[list_item_key]:
|
||||||
|
prom_obj[engine_idx].observe(list_item)
|
||||||
|
for counter_obj, counter_item_key in zip(
|
||||||
|
[
|
||||||
|
self.counter_nixl_num_failed_transfers,
|
||||||
|
self.counter_nixl_num_failed_notifications,
|
||||||
|
],
|
||||||
|
["num_failed_transfers", "num_failed_notifications"],
|
||||||
|
):
|
||||||
|
for list_item in transfer_stats_data[counter_item_key]:
|
||||||
|
counter_obj[engine_idx].inc(list_item)
|
||||||
|
|||||||
@ -11,7 +11,10 @@ from prometheus_client import Counter, Gauge, Histogram
|
|||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import SupportsMetricsInfo, VllmConfig
|
from vllm.config import SupportsMetricsInfo, VllmConfig
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorLogging
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
||||||
|
KVConnectorLogging,
|
||||||
|
KVConnectorPrometheus,
|
||||||
|
)
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.plugins import load_plugins_by_group
|
from vllm.plugins import load_plugins_by_group
|
||||||
from vllm.v1.engine import FinishReason
|
from vllm.v1.engine import FinishReason
|
||||||
@ -339,6 +342,7 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
|
|||||||
_counter_cls = Counter
|
_counter_cls = Counter
|
||||||
_histogram_cls = Histogram
|
_histogram_cls = Histogram
|
||||||
_spec_decoding_cls = SpecDecodingProm
|
_spec_decoding_cls = SpecDecodingProm
|
||||||
|
_kv_connector_cls = KVConnectorPrometheus
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, vllm_config: VllmConfig, engine_indexes: list[int] | None = None
|
self, vllm_config: VllmConfig, engine_indexes: list[int] | None = None
|
||||||
@ -358,12 +362,15 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
|
|||||||
model_name = vllm_config.model_config.served_model_name
|
model_name = vllm_config.model_config.served_model_name
|
||||||
max_model_len = vllm_config.model_config.max_model_len
|
max_model_len = vllm_config.model_config.max_model_len
|
||||||
|
|
||||||
spec_decode_labelvalues: dict[int, list[str]] = {
|
per_engine_labelvalues: dict[int, list[str]] = {
|
||||||
idx: [model_name, str(idx)] for idx in engine_indexes
|
idx: [model_name, str(idx)] for idx in engine_indexes
|
||||||
}
|
}
|
||||||
|
|
||||||
self.spec_decoding_prom = self._spec_decoding_cls(
|
self.spec_decoding_prom = self._spec_decoding_cls(
|
||||||
vllm_config.speculative_config, labelnames, spec_decode_labelvalues
|
vllm_config.speculative_config, labelnames, per_engine_labelvalues
|
||||||
|
)
|
||||||
|
self.kv_connector_prom = self._kv_connector_cls(
|
||||||
|
vllm_config, labelnames, per_engine_labelvalues
|
||||||
)
|
)
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -962,6 +969,11 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
|
|||||||
scheduler_stats.spec_decoding_stats, engine_idx
|
scheduler_stats.spec_decoding_stats, engine_idx
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if scheduler_stats.kv_connector_stats is not None:
|
||||||
|
self.kv_connector_prom.observe(
|
||||||
|
scheduler_stats.kv_connector_stats, engine_idx
|
||||||
|
)
|
||||||
|
|
||||||
if mm_cache_stats is not None:
|
if mm_cache_stats is not None:
|
||||||
self.counter_mm_cache_queries[engine_idx].inc(mm_cache_stats.queries)
|
self.counter_mm_cache_queries[engine_idx].inc(mm_cache_stats.queries)
|
||||||
self.counter_mm_cache_hits[engine_idx].inc(mm_cache_stats.hits)
|
self.counter_mm_cache_hits[engine_idx].inc(mm_cache_stats.hits)
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorPrometheus
|
||||||
from vllm.v1.metrics.loggers import PrometheusStatLogger
|
from vllm.v1.metrics.loggers import PrometheusStatLogger
|
||||||
from vllm.v1.spec_decode.metrics import SpecDecodingProm
|
from vllm.v1.spec_decode.metrics import SpecDecodingProm
|
||||||
|
|
||||||
@ -141,6 +142,18 @@ class RaySpecDecodingProm(SpecDecodingProm):
|
|||||||
_counter_cls = RayCounterWrapper
|
_counter_cls = RayCounterWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class RayKVConnectorPrometheus(KVConnectorPrometheus):
|
||||||
|
"""
|
||||||
|
RayKVConnectorPrometheus is used by RayMetrics to log Ray
|
||||||
|
metrics. Provides the same metrics as KV connectors but
|
||||||
|
uses Ray's util.metrics library.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_gauge_cls = RayGaugeWrapper
|
||||||
|
_counter_cls = RayCounterWrapper
|
||||||
|
_histogram_cls = RayHistogramWrapper
|
||||||
|
|
||||||
|
|
||||||
class RayPrometheusStatLogger(PrometheusStatLogger):
|
class RayPrometheusStatLogger(PrometheusStatLogger):
|
||||||
"""RayPrometheusStatLogger uses Ray metrics instead."""
|
"""RayPrometheusStatLogger uses Ray metrics instead."""
|
||||||
|
|
||||||
@ -148,6 +161,7 @@ class RayPrometheusStatLogger(PrometheusStatLogger):
|
|||||||
_counter_cls = RayCounterWrapper
|
_counter_cls = RayCounterWrapper
|
||||||
_histogram_cls = RayHistogramWrapper
|
_histogram_cls = RayHistogramWrapper
|
||||||
_spec_decoding_cls = RaySpecDecodingProm
|
_spec_decoding_cls = RaySpecDecodingProm
|
||||||
|
_kv_connector_cls = RayKVConnectorPrometheus
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _unregister_vllm_metrics():
|
def _unregister_vllm_metrics():
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user