Enable v1 metrics tests (#20953)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
2026-03-19 23:27:32 +08:00 · 2025-07-19 20:22:02 -07:00 · 2025-07-19 20:22:02 -07:00 · d1fb65bde3
commit d1fb65bde3
parent 3a1d8940ae
3 changed files with 20 additions and 7 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -264,6 +264,7 @@ steps:
    - pytest -v -s v1/structured_output
    - pytest -v -s v1/spec_decode
    - pytest -v -s v1/kv_connector/unit
+    - pytest -v -s v1/metrics
    - pytest -v -s v1/test_serial_utils.py
    - pytest -v -s v1/test_utils.py
    - pytest -v -s v1/test_oracle.py
--- a/tests/v1/metrics/test_ray_metrics.py
+++ b/tests/v1/metrics/test_ray_metrics.py
@ -1,8 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+
 import pytest
 import ray

+from vllm.config import ModelDType
 from vllm.sampling_params import SamplingParams
 from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
 from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
@ -27,7 +30,7 @@ MODELS = [
 def test_engine_log_metrics_ray(
    example_prompts,
    model: str,
-    dtype: str,
+    dtype: ModelDType,
    max_tokens: int,
 ) -> None:
    """ Simple smoke test, verifying this can be used without exceptions.
@ -37,11 +40,14 @@ def test_engine_log_metrics_ray(
    class EngineTestActor:

        async def run(self):
-            engine_args = AsyncEngineArgs(
-                model=model,
-                dtype=dtype,
-                disable_log_stats=False,
-            )
+            # Set environment variable inside the Ray actor since environment
+            # variables from pytest fixtures don't propagate to Ray actors
+            os.environ['VLLM_USE_V1'] = '1'
+
+            engine_args = AsyncEngineArgs(model=model,
+                                          dtype=dtype,
+                                          disable_log_stats=False,
+                                          enforce_eager=True)

            engine = AsyncLLM.from_engine_args(
                engine_args, stat_loggers=[RayPrometheusStatLogger])
--- a/vllm/v1/metrics/ray_wrappers.py
+++ b/vllm/v1/metrics/ray_wrappers.py
@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric):
    def __init__(self,
                 name: str,
                 documentation: Optional[str] = "",
-                 labelnames: Optional[list[str]] = None):
+                 labelnames: Optional[list[str]] = None,
+                 multiprocess_mode: Optional[str] = ""):
+
+        # All Ray metrics are keyed by WorkerId, so multiprocess modes like
+        # "mostrecent", "all", "sum" do not apply. This logic can be manually
+        # implemented at the observability layer (Prometheus/Grafana).
+        del multiprocess_mode
        labelnames_tuple = tuple(labelnames) if labelnames else None
        self.metric = ray_metrics.Gauge(name=name,
                                        description=documentation,