Enable v1 metrics tests (#20953)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
2026-07-23 07:37:09 +08:00 · 2025-07-19 20:22:02 -07:00 · 2025-07-19 20:22:02 -07:00 · d1fb65bde3
commit d1fb65bde3
parent 3a1d8940ae
3 changed files with 20 additions and 7 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -264,6 +264,7 @@ steps:
    - pytest -v -s v1/structured_output
    - pytest -v -s v1/spec_decode
    - pytest -v -s v1/kv_connector/unit
    - pytest -v -s v1/metrics
    - pytest -v -s v1/test_serial_utils.py
    - pytest -v -s v1/test_utils.py
    - pytest -v -s v1/test_oracle.py
--- a/tests/v1/metrics/test_ray_metrics.py
+++ b/tests/v1/metrics/test_ray_metrics.py
@ -1,8 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import os
 import pytest
 import ray
 from vllm.config import ModelDType
 from vllm.sampling_params import SamplingParams
 from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
 from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
@ -27,7 +30,7 @@ MODELS = [
 def test_engine_log_metrics_ray(
    example_prompts,
    model: str,
-    dtype: str,
+    dtype: ModelDType,
    max_tokens: int,
 ) -> None:
    """ Simple smoke test, verifying this can be used without exceptions.
@ -37,11 +40,14 @@ def test_engine_log_metrics_ray(
    class EngineTestActor:
        async def run(self):
-            engine_args = AsyncEngineArgs(
+            # Set environment variable inside the Ray actor since environment
-                model=model,
+            # variables from pytest fixtures don't propagate to Ray actors
-                dtype=dtype,
+            os.environ['VLLM_USE_V1'] = '1'
-                disable_log_stats=False,
+
-            )
+            engine_args = AsyncEngineArgs(model=model,
                                          dtype=dtype,
                                          disable_log_stats=False,
                                          enforce_eager=True)
            engine = AsyncLLM.from_engine_args(
                engine_args, stat_loggers=[RayPrometheusStatLogger])
--- a/vllm/v1/metrics/ray_wrappers.py
+++ b/vllm/v1/metrics/ray_wrappers.py
@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric):
    def __init__(self,
                 name: str,
                 documentation: Optional[str] = "",
-                 labelnames: Optional[list[str]] = None):
+                 labelnames: Optional[list[str]] = None,
                 multiprocess_mode: Optional[str] = ""):
        # All Ray metrics are keyed by WorkerId, so multiprocess modes like
        # "mostrecent", "all", "sum" do not apply. This logic can be manually
        # implemented at the observability layer (Prometheus/Grafana).
        del multiprocess_mode
        labelnames_tuple = tuple(labelnames) if labelnames else None
        self.metric = ray_metrics.Gauge(name=name,
                                        description=documentation,