mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:25:23 +08:00
Signed-off-by: Seiji Eicher <seiji@anyscale.com> Signed-off-by: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com> Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
105 lines
3.5 KiB
Python
105 lines
3.5 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import os
|
|
|
|
import pytest
|
|
import ray
|
|
|
|
from vllm.config import ModelDType
|
|
from vllm.sampling_params import SamplingParams
|
|
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
|
|
from vllm.v1.metrics.ray_wrappers import (RayPrometheusMetric,
|
|
RayPrometheusStatLogger)
|
|
|
|
|
|
@pytest.fixture(scope="function", autouse=True)
|
|
def use_v1_only(monkeypatch):
|
|
"""
|
|
The change relies on V1 APIs, so set VLLM_USE_V1=1.
|
|
"""
|
|
monkeypatch.setenv('VLLM_USE_V1', '1')
|
|
|
|
|
|
MODELS = [
|
|
"distilbert/distilgpt2",
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("model", MODELS)
|
|
@pytest.mark.parametrize("dtype", ["half"])
|
|
@pytest.mark.parametrize("max_tokens", [16])
|
|
def test_engine_log_metrics_ray(
|
|
example_prompts,
|
|
model: str,
|
|
dtype: ModelDType,
|
|
max_tokens: int,
|
|
) -> None:
|
|
""" Simple smoke test, verifying this can be used without exceptions.
|
|
Need to start a Ray cluster in order to verify outputs."""
|
|
|
|
@ray.remote(num_gpus=1)
|
|
class EngineTestActor:
|
|
|
|
async def run(self):
|
|
# Set environment variable inside the Ray actor since environment
|
|
# variables from pytest fixtures don't propagate to Ray actors
|
|
os.environ['VLLM_USE_V1'] = '1'
|
|
|
|
engine_args = AsyncEngineArgs(model=model,
|
|
dtype=dtype,
|
|
disable_log_stats=False,
|
|
enforce_eager=True)
|
|
|
|
engine = AsyncLLM.from_engine_args(
|
|
engine_args, stat_loggers=[RayPrometheusStatLogger])
|
|
|
|
for i, prompt in enumerate(example_prompts):
|
|
results = engine.generate(
|
|
request_id=f"request-id-{i}",
|
|
prompt=prompt,
|
|
sampling_params=SamplingParams(max_tokens=max_tokens),
|
|
)
|
|
|
|
async for _ in results:
|
|
pass
|
|
|
|
# Create the actor and call the async method
|
|
actor = EngineTestActor.remote() # type: ignore[attr-defined]
|
|
ray.get(actor.run.remote())
|
|
|
|
|
|
def test_sanitized_opentelemetry_name():
|
|
"""Test the metric name sanitization logic for Ray."""
|
|
|
|
# Only a-z, A-Z, 0-9, _, test valid characters are preserved
|
|
valid_name = "valid_metric_123_abcDEF"
|
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
|
valid_name) == valid_name
|
|
|
|
# Test dash, dot, are replaced
|
|
name_with_dash_dot = "metric-name.test"
|
|
expected = "metric_name_test"
|
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
|
name_with_dash_dot) == expected
|
|
|
|
# Test colon is replaced with underscore
|
|
name_with_colon = "metric:name"
|
|
expected = "metric_name"
|
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
|
name_with_colon) == expected
|
|
|
|
# Test multiple invalid characters are replaced
|
|
name_with_invalid = "metric:name@with#special%chars"
|
|
expected = "metric_name_with_special_chars"
|
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
|
name_with_invalid) == expected
|
|
|
|
# Test mixed valid and invalid characters
|
|
complex_name = "vllm:engine_stats/time.latency_ms-99p"
|
|
expected = "vllm_engine_stats_time_latency_ms_99p"
|
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
|
|
complex_name) == expected
|
|
|
|
# Test empty string
|
|
assert RayPrometheusMetric._get_sanitized_opentelemetry_name("") == ""
|