mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-24 02:54:28 +08:00
Enable v1 metrics tests (#20953)
Signed-off-by: Seiji Eicher <seiji@anyscale.com>
This commit is contained in:
parent
3a1d8940ae
commit
d1fb65bde3
@ -264,6 +264,7 @@ steps:
|
||||
- pytest -v -s v1/structured_output
|
||||
- pytest -v -s v1/spec_decode
|
||||
- pytest -v -s v1/kv_connector/unit
|
||||
- pytest -v -s v1/metrics
|
||||
- pytest -v -s v1/test_serial_utils.py
|
||||
- pytest -v -s v1/test_utils.py
|
||||
- pytest -v -s v1/test_oracle.py
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import os
|
||||
|
||||
import pytest
|
||||
import ray
|
||||
|
||||
from vllm.config import ModelDType
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
|
||||
from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
|
||||
@ -27,7 +30,7 @@ MODELS = [
|
||||
def test_engine_log_metrics_ray(
|
||||
example_prompts,
|
||||
model: str,
|
||||
dtype: str,
|
||||
dtype: ModelDType,
|
||||
max_tokens: int,
|
||||
) -> None:
|
||||
""" Simple smoke test, verifying this can be used without exceptions.
|
||||
@ -37,11 +40,14 @@ def test_engine_log_metrics_ray(
|
||||
class EngineTestActor:
|
||||
|
||||
async def run(self):
|
||||
engine_args = AsyncEngineArgs(
|
||||
model=model,
|
||||
dtype=dtype,
|
||||
disable_log_stats=False,
|
||||
)
|
||||
# Set environment variable inside the Ray actor since environment
|
||||
# variables from pytest fixtures don't propagate to Ray actors
|
||||
os.environ['VLLM_USE_V1'] = '1'
|
||||
|
||||
engine_args = AsyncEngineArgs(model=model,
|
||||
dtype=dtype,
|
||||
disable_log_stats=False,
|
||||
enforce_eager=True)
|
||||
|
||||
engine = AsyncLLM.from_engine_args(
|
||||
engine_args, stat_loggers=[RayPrometheusStatLogger])
|
||||
|
||||
@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric):
|
||||
def __init__(self,
|
||||
name: str,
|
||||
documentation: Optional[str] = "",
|
||||
labelnames: Optional[list[str]] = None):
|
||||
labelnames: Optional[list[str]] = None,
|
||||
multiprocess_mode: Optional[str] = ""):
|
||||
|
||||
# All Ray metrics are keyed by WorkerId, so multiprocess modes like
|
||||
# "mostrecent", "all", "sum" do not apply. This logic can be manually
|
||||
# implemented at the observability layer (Prometheus/Grafana).
|
||||
del multiprocess_mode
|
||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||
self.metric = ray_metrics.Gauge(name=name,
|
||||
description=documentation,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user