Improve configs - ObservabilityConfig (#17453)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-05-01 11:52:05 +01:00 committed by GitHub
parent 015069b017
commit a257d9bccc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 93 additions and 63 deletions

View File

@ -14,6 +14,7 @@ from collections import Counter
from contextlib import contextmanager
from dataclasses import (MISSING, dataclass, field, fields, is_dataclass,
replace)
from functools import cached_property
from importlib.util import find_spec
from pathlib import Path
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
@ -26,6 +27,7 @@ from transformers import PretrainedConfig
from typing_extensions import deprecated
import vllm.envs as envs
from vllm import version
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
@ -3285,20 +3287,55 @@ class DecodingConfig:
self.disable_additional_properties = True
DetailedTraceModules = Literal["model", "worker", "all"]
@config
@dataclass
class ObservabilityConfig:
"""Configuration for observability - metrics and tracing."""
show_hidden_metrics: bool = False
show_hidden_metrics_for_version: Optional[str] = None
"""Enable deprecated Prometheus metrics that have been hidden since the
specified version. For example, if a previously deprecated metric has been
hidden since the v0.7.0 release, you use
`--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while
you migrate to new metrics. The metric is likely to be removed completely
in an upcoming release."""
@cached_property
def show_hidden_metrics(self) -> bool:
"""Check if the hidden metrics should be shown."""
if self.show_hidden_metrics_for_version is None:
return False
return version._prev_minor_version_was(
self.show_hidden_metrics_for_version)
otlp_traces_endpoint: Optional[str] = None
"""Target URL to which OpenTelemetry traces will be sent."""
# Collecting detailed timing information for each request can be expensive.
collect_detailed_traces: Optional[list[DetailedTraceModules]] = None
"""It makes sense to set this only if `--otlp-traces-endpoint` is set. If
set, it will collect detailed traces for the specified modules. This
involves use of possibly costly and or blocking operations and hence might
have a performance impact.
# If set, collects the model forward time for the request.
collect_model_forward_time: bool = False
Note that collecting detailed timing information for each request can be
expensive."""
# If set, collects the model execute time for the request.
collect_model_execute_time: bool = False
@cached_property
def collect_model_forward_time(self) -> bool:
"""Whether to collect model forward time for the request."""
return (self.collect_detailed_traces is not None
and ("model" in self.collect_detailed_traces
or "all" in self.collect_detailed_traces))
@cached_property
def collect_model_execute_time(self) -> bool:
"""Whether to collect model execute time for the request."""
return (self.collect_detailed_traces is not None
and ("worker" in self.collect_detailed_traces
or "all" in self.collect_detailed_traces))
def compute_hash(self) -> str:
"""
@ -3320,12 +3357,23 @@ class ObservabilityConfig:
return hash_str
def __post_init__(self):
if (self.collect_detailed_traces is not None
and len(self.collect_detailed_traces) == 1
and "," in self.collect_detailed_traces[0]):
self._parse_collect_detailed_traces()
if not is_otel_available() and self.otlp_traces_endpoint is not None:
raise ValueError(
"OpenTelemetry is not available. Unable to configure "
"'otlp_traces_endpoint'. Ensure OpenTelemetry packages are "
f"installed. Original error:\n{otel_import_error_traceback}")
def _parse_collect_detailed_traces(self):
assert isinstance(self.collect_detailed_traces, list)
self.collect_detailed_traces = cast(
list[DetailedTraceModules],
self.collect_detailed_traces[0].split(","))
class KVTransferConfig(BaseModel):
"""Configuration for distributed KV cache transfer."""

View File

@ -7,6 +7,7 @@ import json
import re
import threading
from dataclasses import MISSING, dataclass, fields
from itertools import permutations
from typing import (Any, Callable, Dict, List, Literal, Optional, Type,
TypeVar, Union, cast, get_args, get_origin)
@ -14,14 +15,13 @@ import torch
from typing_extensions import TypeIs, deprecated
import vllm.envs as envs
from vllm import version
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
ConfigFormat, ConfigType, DecodingConfig, Device,
DeviceConfig, DistributedExecutorBackend,
GuidedDecodingBackend, GuidedDecodingBackendV1,
HfOverrides, KVEventsConfig, KVTransferConfig,
LoadConfig, LoadFormat, LoRAConfig, ModelConfig,
ModelDType, ModelImpl, MultiModalConfig,
ConfigFormat, ConfigType, DecodingConfig,
DetailedTraceModules, Device, DeviceConfig,
DistributedExecutorBackend, GuidedDecodingBackend,
GuidedDecodingBackendV1, HfOverrides, KVEventsConfig,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ModelDType, ModelImpl, MultiModalConfig,
ObservabilityConfig, ParallelConfig, PoolerConfig,
PrefixCachingHashAlgo, PromptAdapterConfig,
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
@ -41,8 +41,6 @@ from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor
logger = init_logger(__name__)
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
# object is used to allow for special typing forms
T = TypeVar("T")
TypeHint = Union[type[Any], object]
@ -337,9 +335,12 @@ class EngineArgs:
speculative_config: Optional[Dict[str, Any]] = None
qlora_adapter_name_or_path: Optional[str] = None
show_hidden_metrics_for_version: Optional[str] = None
otlp_traces_endpoint: Optional[str] = None
collect_detailed_traces: Optional[str] = None
show_hidden_metrics_for_version: Optional[str] = \
ObservabilityConfig.show_hidden_metrics_for_version
otlp_traces_endpoint: Optional[str] = \
ObservabilityConfig.otlp_traces_endpoint
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
ObservabilityConfig.collect_detailed_traces
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
@ -677,33 +678,29 @@ class EngineArgs:
default=None,
help='Name or path of the QLoRA adapter.')
parser.add_argument('--show-hidden-metrics-for-version',
type=str,
default=None,
help='Enable deprecated Prometheus metrics that '
'have been hidden since the specified version. '
'For example, if a previously deprecated metric '
'has been hidden since the v0.7.0 release, you '
'use --show-hidden-metrics-for-version=0.7 as a '
'temporary escape hatch while you migrate to new '
'metrics. The metric is likely to be removed '
'completely in an upcoming release.')
parser.add_argument(
'--otlp-traces-endpoint',
type=str,
default=None,
help='Target URL to which OpenTelemetry traces will be sent.')
parser.add_argument(
'--collect-detailed-traces',
type=str,
default=None,
help="Valid choices are " +
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
" set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact.")
# Observability arguments
observability_kwargs = get_kwargs(ObservabilityConfig)
observability_group = parser.add_argument_group(
title="ObservabilityConfig",
description=ObservabilityConfig.__doc__,
)
observability_group.add_argument(
"--show-hidden-metrics-for-version",
**observability_kwargs["show_hidden_metrics_for_version"])
observability_group.add_argument(
"--otlp-traces-endpoint",
**observability_kwargs["otlp_traces_endpoint"])
# TODO: generalise this special case
choices = observability_kwargs["collect_detailed_traces"]["choices"]
metavar = f"{{{','.join(choices)}}}"
observability_kwargs["collect_detailed_traces"]["metavar"] = metavar
observability_kwargs["collect_detailed_traces"]["choices"] += [
",".join(p)
for p in permutations(get_args(DetailedTraceModules), r=2)
]
observability_group.add_argument(
"--collect-detailed-traces",
**observability_kwargs["collect_detailed_traces"])
# Scheduler arguments
scheduler_kwargs = get_kwargs(SchedulerConfig)
@ -1094,26 +1091,11 @@ class EngineArgs:
if self.enable_reasoning else None,
)
show_hidden_metrics = False
if self.show_hidden_metrics_for_version is not None:
show_hidden_metrics = version._prev_minor_version_was(
self.show_hidden_metrics_for_version)
detailed_trace_modules = []
if self.collect_detailed_traces is not None:
detailed_trace_modules = self.collect_detailed_traces.split(",")
for m in detailed_trace_modules:
if m not in ALLOWED_DETAILED_TRACE_MODULES:
raise ValueError(
f"Invalid module {m} in collect_detailed_traces. "
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
observability_config = ObservabilityConfig(
show_hidden_metrics=show_hidden_metrics,
show_hidden_metrics_for_version=self.
show_hidden_metrics_for_version,
otlp_traces_endpoint=self.otlp_traces_endpoint,
collect_model_forward_time="model" in detailed_trace_modules
or "all" in detailed_trace_modules,
collect_model_execute_time="worker" in detailed_trace_modules
or "all" in detailed_trace_modules,
collect_detailed_traces=self.collect_detailed_traces,
)
config = VllmConfig(