diff --git a/vllm/config.py b/vllm/config.py index 43038da373025..37a9d078e9d06 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -14,6 +14,7 @@ from collections import Counter from contextlib import contextmanager from dataclasses import (MISSING, dataclass, field, fields, is_dataclass, replace) +from functools import cached_property from importlib.util import find_spec from pathlib import Path from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional, @@ -26,6 +27,7 @@ from transformers import PretrainedConfig from typing_extensions import deprecated import vllm.envs as envs +from vllm import version from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass from vllm.logger import init_logger from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS, @@ -3285,20 +3287,55 @@ class DecodingConfig: self.disable_additional_properties = True +DetailedTraceModules = Literal["model", "worker", "all"] + + +@config @dataclass class ObservabilityConfig: """Configuration for observability - metrics and tracing.""" - show_hidden_metrics: bool = False + + show_hidden_metrics_for_version: Optional[str] = None + """Enable deprecated Prometheus metrics that have been hidden since the + specified version. For example, if a previously deprecated metric has been + hidden since the v0.7.0 release, you use + `--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while + you migrate to new metrics. The metric is likely to be removed completely + in an upcoming release.""" + + @cached_property + def show_hidden_metrics(self) -> bool: + """Check if the hidden metrics should be shown.""" + if self.show_hidden_metrics_for_version is None: + return False + return version._prev_minor_version_was( + self.show_hidden_metrics_for_version) otlp_traces_endpoint: Optional[str] = None + """Target URL to which OpenTelemetry traces will be sent.""" - # Collecting detailed timing information for each request can be expensive. + collect_detailed_traces: Optional[list[DetailedTraceModules]] = None + """It makes sense to set this only if `--otlp-traces-endpoint` is set. If + set, it will collect detailed traces for the specified modules. This + involves use of possibly costly and or blocking operations and hence might + have a performance impact. - # If set, collects the model forward time for the request. - collect_model_forward_time: bool = False + Note that collecting detailed timing information for each request can be + expensive.""" - # If set, collects the model execute time for the request. - collect_model_execute_time: bool = False + @cached_property + def collect_model_forward_time(self) -> bool: + """Whether to collect model forward time for the request.""" + return (self.collect_detailed_traces is not None + and ("model" in self.collect_detailed_traces + or "all" in self.collect_detailed_traces)) + + @cached_property + def collect_model_execute_time(self) -> bool: + """Whether to collect model execute time for the request.""" + return (self.collect_detailed_traces is not None + and ("worker" in self.collect_detailed_traces + or "all" in self.collect_detailed_traces)) def compute_hash(self) -> str: """ @@ -3320,12 +3357,23 @@ class ObservabilityConfig: return hash_str def __post_init__(self): + if (self.collect_detailed_traces is not None + and len(self.collect_detailed_traces) == 1 + and "," in self.collect_detailed_traces[0]): + self._parse_collect_detailed_traces() + if not is_otel_available() and self.otlp_traces_endpoint is not None: raise ValueError( "OpenTelemetry is not available. Unable to configure " "'otlp_traces_endpoint'. Ensure OpenTelemetry packages are " f"installed. Original error:\n{otel_import_error_traceback}") + def _parse_collect_detailed_traces(self): + assert isinstance(self.collect_detailed_traces, list) + self.collect_detailed_traces = cast( + list[DetailedTraceModules], + self.collect_detailed_traces[0].split(",")) + class KVTransferConfig(BaseModel): """Configuration for distributed KV cache transfer.""" diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index c7a580cf10512..d23463dedc63f 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -7,6 +7,7 @@ import json import re import threading from dataclasses import MISSING, dataclass, fields +from itertools import permutations from typing import (Any, Callable, Dict, List, Literal, Optional, Type, TypeVar, Union, cast, get_args, get_origin) @@ -14,14 +15,13 @@ import torch from typing_extensions import TypeIs, deprecated import vllm.envs as envs -from vllm import version from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, - ConfigFormat, ConfigType, DecodingConfig, Device, - DeviceConfig, DistributedExecutorBackend, - GuidedDecodingBackend, GuidedDecodingBackendV1, - HfOverrides, KVEventsConfig, KVTransferConfig, - LoadConfig, LoadFormat, LoRAConfig, ModelConfig, - ModelDType, ModelImpl, MultiModalConfig, + ConfigFormat, ConfigType, DecodingConfig, + DetailedTraceModules, Device, DeviceConfig, + DistributedExecutorBackend, GuidedDecodingBackend, + GuidedDecodingBackendV1, HfOverrides, KVEventsConfig, + KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig, + ModelConfig, ModelDType, ModelImpl, MultiModalConfig, ObservabilityConfig, ParallelConfig, PoolerConfig, PrefixCachingHashAlgo, PromptAdapterConfig, SchedulerConfig, SchedulerPolicy, SpeculativeConfig, @@ -41,8 +41,6 @@ from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor logger = init_logger(__name__) -ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"] - # object is used to allow for special typing forms T = TypeVar("T") TypeHint = Union[type[Any], object] @@ -337,9 +335,12 @@ class EngineArgs: speculative_config: Optional[Dict[str, Any]] = None qlora_adapter_name_or_path: Optional[str] = None - show_hidden_metrics_for_version: Optional[str] = None - otlp_traces_endpoint: Optional[str] = None - collect_detailed_traces: Optional[str] = None + show_hidden_metrics_for_version: Optional[str] = \ + ObservabilityConfig.show_hidden_metrics_for_version + otlp_traces_endpoint: Optional[str] = \ + ObservabilityConfig.otlp_traces_endpoint + collect_detailed_traces: Optional[list[DetailedTraceModules]] = \ + ObservabilityConfig.collect_detailed_traces disable_async_output_proc: bool = not ModelConfig.use_async_output_proc scheduling_policy: SchedulerPolicy = SchedulerConfig.policy scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls @@ -677,33 +678,29 @@ class EngineArgs: default=None, help='Name or path of the QLoRA adapter.') - parser.add_argument('--show-hidden-metrics-for-version', - type=str, - default=None, - help='Enable deprecated Prometheus metrics that ' - 'have been hidden since the specified version. ' - 'For example, if a previously deprecated metric ' - 'has been hidden since the v0.7.0 release, you ' - 'use --show-hidden-metrics-for-version=0.7 as a ' - 'temporary escape hatch while you migrate to new ' - 'metrics. The metric is likely to be removed ' - 'completely in an upcoming release.') - - parser.add_argument( - '--otlp-traces-endpoint', - type=str, - default=None, - help='Target URL to which OpenTelemetry traces will be sent.') - parser.add_argument( - '--collect-detailed-traces', - type=str, - default=None, - help="Valid choices are " + - ",".join(ALLOWED_DETAILED_TRACE_MODULES) + - ". It makes sense to set this only if ``--otlp-traces-endpoint`` is" - " set. If set, it will collect detailed traces for the specified " - "modules. This involves use of possibly costly and or blocking " - "operations and hence might have a performance impact.") + # Observability arguments + observability_kwargs = get_kwargs(ObservabilityConfig) + observability_group = parser.add_argument_group( + title="ObservabilityConfig", + description=ObservabilityConfig.__doc__, + ) + observability_group.add_argument( + "--show-hidden-metrics-for-version", + **observability_kwargs["show_hidden_metrics_for_version"]) + observability_group.add_argument( + "--otlp-traces-endpoint", + **observability_kwargs["otlp_traces_endpoint"]) + # TODO: generalise this special case + choices = observability_kwargs["collect_detailed_traces"]["choices"] + metavar = f"{{{','.join(choices)}}}" + observability_kwargs["collect_detailed_traces"]["metavar"] = metavar + observability_kwargs["collect_detailed_traces"]["choices"] += [ + ",".join(p) + for p in permutations(get_args(DetailedTraceModules), r=2) + ] + observability_group.add_argument( + "--collect-detailed-traces", + **observability_kwargs["collect_detailed_traces"]) # Scheduler arguments scheduler_kwargs = get_kwargs(SchedulerConfig) @@ -1094,26 +1091,11 @@ class EngineArgs: if self.enable_reasoning else None, ) - show_hidden_metrics = False - if self.show_hidden_metrics_for_version is not None: - show_hidden_metrics = version._prev_minor_version_was( - self.show_hidden_metrics_for_version) - - detailed_trace_modules = [] - if self.collect_detailed_traces is not None: - detailed_trace_modules = self.collect_detailed_traces.split(",") - for m in detailed_trace_modules: - if m not in ALLOWED_DETAILED_TRACE_MODULES: - raise ValueError( - f"Invalid module {m} in collect_detailed_traces. " - f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}") observability_config = ObservabilityConfig( - show_hidden_metrics=show_hidden_metrics, + show_hidden_metrics_for_version=self. + show_hidden_metrics_for_version, otlp_traces_endpoint=self.otlp_traces_endpoint, - collect_model_forward_time="model" in detailed_trace_modules - or "all" in detailed_trace_modules, - collect_model_execute_time="worker" in detailed_trace_modules - or "all" in detailed_trace_modules, + collect_detailed_traces=self.collect_detailed_traces, ) config = VllmConfig(