mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-19 05:56:59 +08:00
Enable v1 metrics tests (#20953)
Signed-off-by: Seiji Eicher <seiji@anyscale.com>
This commit is contained in:
parent
3a1d8940ae
commit
d1fb65bde3
@ -264,6 +264,7 @@ steps:
|
|||||||
- pytest -v -s v1/structured_output
|
- pytest -v -s v1/structured_output
|
||||||
- pytest -v -s v1/spec_decode
|
- pytest -v -s v1/spec_decode
|
||||||
- pytest -v -s v1/kv_connector/unit
|
- pytest -v -s v1/kv_connector/unit
|
||||||
|
- pytest -v -s v1/metrics
|
||||||
- pytest -v -s v1/test_serial_utils.py
|
- pytest -v -s v1/test_serial_utils.py
|
||||||
- pytest -v -s v1/test_utils.py
|
- pytest -v -s v1/test_utils.py
|
||||||
- pytest -v -s v1/test_oracle.py
|
- pytest -v -s v1/test_oracle.py
|
||||||
|
|||||||
@ -1,8 +1,11 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import ray
|
import ray
|
||||||
|
|
||||||
|
from vllm.config import ModelDType
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
|
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
|
||||||
from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
|
from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
|
||||||
@ -27,7 +30,7 @@ MODELS = [
|
|||||||
def test_engine_log_metrics_ray(
|
def test_engine_log_metrics_ray(
|
||||||
example_prompts,
|
example_prompts,
|
||||||
model: str,
|
model: str,
|
||||||
dtype: str,
|
dtype: ModelDType,
|
||||||
max_tokens: int,
|
max_tokens: int,
|
||||||
) -> None:
|
) -> None:
|
||||||
""" Simple smoke test, verifying this can be used without exceptions.
|
""" Simple smoke test, verifying this can be used without exceptions.
|
||||||
@ -37,11 +40,14 @@ def test_engine_log_metrics_ray(
|
|||||||
class EngineTestActor:
|
class EngineTestActor:
|
||||||
|
|
||||||
async def run(self):
|
async def run(self):
|
||||||
engine_args = AsyncEngineArgs(
|
# Set environment variable inside the Ray actor since environment
|
||||||
model=model,
|
# variables from pytest fixtures don't propagate to Ray actors
|
||||||
dtype=dtype,
|
os.environ['VLLM_USE_V1'] = '1'
|
||||||
disable_log_stats=False,
|
|
||||||
)
|
engine_args = AsyncEngineArgs(model=model,
|
||||||
|
dtype=dtype,
|
||||||
|
disable_log_stats=False,
|
||||||
|
enforce_eager=True)
|
||||||
|
|
||||||
engine = AsyncLLM.from_engine_args(
|
engine = AsyncLLM.from_engine_args(
|
||||||
engine_args, stat_loggers=[RayPrometheusStatLogger])
|
engine_args, stat_loggers=[RayPrometheusStatLogger])
|
||||||
|
|||||||
@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
name: str,
|
name: str,
|
||||||
documentation: Optional[str] = "",
|
documentation: Optional[str] = "",
|
||||||
labelnames: Optional[list[str]] = None):
|
labelnames: Optional[list[str]] = None,
|
||||||
|
multiprocess_mode: Optional[str] = ""):
|
||||||
|
|
||||||
|
# All Ray metrics are keyed by WorkerId, so multiprocess modes like
|
||||||
|
# "mostrecent", "all", "sum" do not apply. This logic can be manually
|
||||||
|
# implemented at the observability layer (Prometheus/Grafana).
|
||||||
|
del multiprocess_mode
|
||||||
labelnames_tuple = tuple(labelnames) if labelnames else None
|
labelnames_tuple = tuple(labelnames) if labelnames else None
|
||||||
self.metric = ray_metrics.Gauge(name=name,
|
self.metric = ray_metrics.Gauge(name=name,
|
||||||
description=documentation,
|
description=documentation,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user