mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-04 04:57:54 +08:00
[ray][metrics] Replace ':' with '_' for OpenTelemetry compatibility in Ray (#25439)
Signed-off-by: Seiji Eicher <seiji@anyscale.com> Signed-off-by: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com> Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
This commit is contained in:
parent
984d18498a
commit
8d52f2b3a7
@ -8,7 +8,8 @@ import ray
|
|||||||
from vllm.config import ModelDType
|
from vllm.config import ModelDType
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
|
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
|
||||||
from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
|
from vllm.v1.metrics.ray_wrappers import (RayPrometheusMetric,
|
||||||
|
RayPrometheusStatLogger)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", autouse=True)
|
@pytest.fixture(scope="function", autouse=True)
|
||||||
@ -65,3 +66,39 @@ def test_engine_log_metrics_ray(
|
|||||||
# Create the actor and call the async method
|
# Create the actor and call the async method
|
||||||
actor = EngineTestActor.remote() # type: ignore[attr-defined]
|
actor = EngineTestActor.remote() # type: ignore[attr-defined]
|
||||||
ray.get(actor.run.remote())
|
ray.get(actor.run.remote())
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitized_opentelemetry_name():
|
||||||
|
"""Test the metric name sanitization logic for Ray."""
|
||||||
|
|
||||||
|
# Only a-z, A-Z, 0-9, _, test valid characters are preserved
|
||||||
|
valid_name = "valid_metric_123_abcDEF"
|
||||||
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||||
|
valid_name) == valid_name
|
||||||
|
|
||||||
|
# Test dash, dot, are replaced
|
||||||
|
name_with_dash_dot = "metric-name.test"
|
||||||
|
expected = "metric_name_test"
|
||||||
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||||
|
name_with_dash_dot) == expected
|
||||||
|
|
||||||
|
# Test colon is replaced with underscore
|
||||||
|
name_with_colon = "metric:name"
|
||||||
|
expected = "metric_name"
|
||||||
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||||
|
name_with_colon) == expected
|
||||||
|
|
||||||
|
# Test multiple invalid characters are replaced
|
||||||
|
name_with_invalid = "metric:name@with#special%chars"
|
||||||
|
expected = "metric_name_with_special_chars"
|
||||||
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||||
|
name_with_invalid) == expected
|
||||||
|
|
||||||
|
# Test mixed valid and invalid characters
|
||||||
|
complex_name = "vllm:engine_stats/time.latency_ms-99p"
|
||||||
|
expected = "vllm_engine_stats_time_latency_ms_99p"
|
||||||
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
||||||
|
complex_name) == expected
|
||||||
|
|
||||||
|
# Test empty string
|
||||||
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name("") == ""
|
||||||
|
|||||||
@ -11,6 +11,7 @@ try:
|
|||||||
from ray.util.metrics import Metric
|
from ray.util.metrics import Metric
|
||||||
except ImportError:
|
except ImportError:
|
||||||
ray_metrics = None
|
ray_metrics = None
|
||||||
|
import regex as re
|
||||||
|
|
||||||
|
|
||||||
class RayPrometheusMetric:
|
class RayPrometheusMetric:
|
||||||
@ -42,6 +43,21 @@ class RayPrometheusMetric:
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_sanitized_opentelemetry_name(name: str) -> str:
|
||||||
|
"""
|
||||||
|
For compatibility with Ray + OpenTelemetry, the metric name must be
|
||||||
|
sanitized. In particular, this replaces disallowed character (e.g., ':')
|
||||||
|
with '_' in the metric name.
|
||||||
|
Allowed characters: a-z, A-Z, 0-9, _
|
||||||
|
|
||||||
|
# ruff: noqa: E501
|
||||||
|
Ref: https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/src/metrics/instrument_metadata_validator.cc#L22-L23
|
||||||
|
Ref: https://github.com/ray-project/ray/blob/master/src/ray/stats/metric.cc#L107
|
||||||
|
"""
|
||||||
|
|
||||||
|
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
||||||
|
|
||||||
|
|
||||||
class RayGaugeWrapper(RayPrometheusMetric):
|
class RayGaugeWrapper(RayPrometheusMetric):
|
||||||
"""Wraps around ray.util.metrics.Gauge to provide same API as
|
"""Wraps around ray.util.metrics.Gauge to provide same API as
|
||||||
@ -58,6 +74,7 @@ class RayGaugeWrapper(RayPrometheusMetric):
|
|||||||
# implemented at the observability layer (Prometheus/Grafana).
|
# implemented at the observability layer (Prometheus/Grafana).
|
||||||
del multiprocess_mode
|
del multiprocess_mode
|
||||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||||
|
name = self._get_sanitized_opentelemetry_name(name)
|
||||||
self.metric = ray_metrics.Gauge(name=name,
|
self.metric = ray_metrics.Gauge(name=name,
|
||||||
description=documentation,
|
description=documentation,
|
||||||
tag_keys=labelnames_tuple)
|
tag_keys=labelnames_tuple)
|
||||||
@ -79,6 +96,7 @@ class RayCounterWrapper(RayPrometheusMetric):
|
|||||||
documentation: Optional[str] = "",
|
documentation: Optional[str] = "",
|
||||||
labelnames: Optional[list[str]] = None):
|
labelnames: Optional[list[str]] = None):
|
||||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||||
|
name = self._get_sanitized_opentelemetry_name(name)
|
||||||
self.metric = ray_metrics.Counter(name=name,
|
self.metric = ray_metrics.Counter(name=name,
|
||||||
description=documentation,
|
description=documentation,
|
||||||
tag_keys=labelnames_tuple)
|
tag_keys=labelnames_tuple)
|
||||||
@ -99,6 +117,7 @@ class RayHistogramWrapper(RayPrometheusMetric):
|
|||||||
labelnames: Optional[list[str]] = None,
|
labelnames: Optional[list[str]] = None,
|
||||||
buckets: Optional[list[float]] = None):
|
buckets: Optional[list[float]] = None):
|
||||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||||
|
name = self._get_sanitized_opentelemetry_name(name)
|
||||||
boundaries = buckets if buckets else []
|
boundaries = buckets if buckets else []
|
||||||
self.metric = ray_metrics.Histogram(name=name,
|
self.metric = ray_metrics.Histogram(name=name,
|
||||||
description=documentation,
|
description=documentation,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user