mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 20:24:34 +08:00
[ray][metrics] Replace ':' with '_' for OpenTelemetry compatibility in Ray (#25439)
Signed-off-by: Seiji Eicher <seiji@anyscale.com> Signed-off-by: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com> Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
This commit is contained in:
parent
984d18498a
commit
8d52f2b3a7
@ -8,7 +8,8 @@ import ray
|
||||
from vllm.config import ModelDType
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
|
||||
from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
|
||||
from vllm.v1.metrics.ray_wrappers import (RayPrometheusMetric,
|
||||
RayPrometheusStatLogger)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
@ -65,3 +66,39 @@ def test_engine_log_metrics_ray(
|
||||
# Create the actor and call the async method
|
||||
actor = EngineTestActor.remote() # type: ignore[attr-defined]
|
||||
ray.get(actor.run.remote())
|
||||
|
||||
|
||||
def test_sanitized_opentelemetry_name():
|
||||
"""Test the metric name sanitization logic for Ray."""
|
||||
|
||||
# Only a-z, A-Z, 0-9, _, test valid characters are preserved
|
||||
valid_name = "valid_metric_123_abcDEF"
|
||||
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||
valid_name) == valid_name
|
||||
|
||||
# Test dash, dot, are replaced
|
||||
name_with_dash_dot = "metric-name.test"
|
||||
expected = "metric_name_test"
|
||||
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||
name_with_dash_dot) == expected
|
||||
|
||||
# Test colon is replaced with underscore
|
||||
name_with_colon = "metric:name"
|
||||
expected = "metric_name"
|
||||
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||
name_with_colon) == expected
|
||||
|
||||
# Test multiple invalid characters are replaced
|
||||
name_with_invalid = "metric:name@with#special%chars"
|
||||
expected = "metric_name_with_special_chars"
|
||||
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||
name_with_invalid) == expected
|
||||
|
||||
# Test mixed valid and invalid characters
|
||||
complex_name = "vllm:engine_stats/time.latency_ms-99p"
|
||||
expected = "vllm_engine_stats_time_latency_ms_99p"
|
||||
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||
complex_name) == expected
|
||||
|
||||
# Test empty string
|
||||
assert RayPrometheusMetric._get_sanitized_opentelemetry_name("") == ""
|
||||
|
||||
@ -11,6 +11,7 @@ try:
|
||||
from ray.util.metrics import Metric
|
||||
except ImportError:
|
||||
ray_metrics = None
|
||||
import regex as re
|
||||
|
||||
|
||||
class RayPrometheusMetric:
|
||||
@ -42,6 +43,21 @@ class RayPrometheusMetric:
|
||||
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def _get_sanitized_opentelemetry_name(name: str) -> str:
|
||||
"""
|
||||
For compatibility with Ray + OpenTelemetry, the metric name must be
|
||||
sanitized. In particular, this replaces disallowed character (e.g., ':')
|
||||
with '_' in the metric name.
|
||||
Allowed characters: a-z, A-Z, 0-9, _
|
||||
|
||||
# ruff: noqa: E501
|
||||
Ref: https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/src/metrics/instrument_metadata_validator.cc#L22-L23
|
||||
Ref: https://github.com/ray-project/ray/blob/master/src/ray/stats/metric.cc#L107
|
||||
"""
|
||||
|
||||
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
||||
|
||||
|
||||
class RayGaugeWrapper(RayPrometheusMetric):
|
||||
"""Wraps around ray.util.metrics.Gauge to provide same API as
|
||||
@ -58,6 +74,7 @@ class RayGaugeWrapper(RayPrometheusMetric):
|
||||
# implemented at the observability layer (Prometheus/Grafana).
|
||||
del multiprocess_mode
|
||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||
name = self._get_sanitized_opentelemetry_name(name)
|
||||
self.metric = ray_metrics.Gauge(name=name,
|
||||
description=documentation,
|
||||
tag_keys=labelnames_tuple)
|
||||
@ -79,6 +96,7 @@ class RayCounterWrapper(RayPrometheusMetric):
|
||||
documentation: Optional[str] = "",
|
||||
labelnames: Optional[list[str]] = None):
|
||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||
name = self._get_sanitized_opentelemetry_name(name)
|
||||
self.metric = ray_metrics.Counter(name=name,
|
||||
description=documentation,
|
||||
tag_keys=labelnames_tuple)
|
||||
@ -99,6 +117,7 @@ class RayHistogramWrapper(RayPrometheusMetric):
|
||||
labelnames: Optional[list[str]] = None,
|
||||
buckets: Optional[list[float]] = None):
|
||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||
name = self._get_sanitized_opentelemetry_name(name)
|
||||
boundaries = buckets if buckets else []
|
||||
self.metric = ray_metrics.Histogram(name=name,
|
||||
description=documentation,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user