mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-30 07:58:43 +08:00
Improve configs - ObservabilityConfig (#17453)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
015069b017
commit
a257d9bccc
@ -14,6 +14,7 @@ from collections import Counter
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import (MISSING, dataclass, field, fields, is_dataclass,
|
||||
replace)
|
||||
from functools import cached_property
|
||||
from importlib.util import find_spec
|
||||
from pathlib import Path
|
||||
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
|
||||
@ -26,6 +27,7 @@ from transformers import PretrainedConfig
|
||||
from typing_extensions import deprecated
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm import version
|
||||
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
|
||||
@ -3285,20 +3287,55 @@ class DecodingConfig:
|
||||
self.disable_additional_properties = True
|
||||
|
||||
|
||||
DetailedTraceModules = Literal["model", "worker", "all"]
|
||||
|
||||
|
||||
@config
|
||||
@dataclass
|
||||
class ObservabilityConfig:
|
||||
"""Configuration for observability - metrics and tracing."""
|
||||
show_hidden_metrics: bool = False
|
||||
|
||||
show_hidden_metrics_for_version: Optional[str] = None
|
||||
"""Enable deprecated Prometheus metrics that have been hidden since the
|
||||
specified version. For example, if a previously deprecated metric has been
|
||||
hidden since the v0.7.0 release, you use
|
||||
`--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while
|
||||
you migrate to new metrics. The metric is likely to be removed completely
|
||||
in an upcoming release."""
|
||||
|
||||
@cached_property
|
||||
def show_hidden_metrics(self) -> bool:
|
||||
"""Check if the hidden metrics should be shown."""
|
||||
if self.show_hidden_metrics_for_version is None:
|
||||
return False
|
||||
return version._prev_minor_version_was(
|
||||
self.show_hidden_metrics_for_version)
|
||||
|
||||
otlp_traces_endpoint: Optional[str] = None
|
||||
"""Target URL to which OpenTelemetry traces will be sent."""
|
||||
|
||||
# Collecting detailed timing information for each request can be expensive.
|
||||
collect_detailed_traces: Optional[list[DetailedTraceModules]] = None
|
||||
"""It makes sense to set this only if `--otlp-traces-endpoint` is set. If
|
||||
set, it will collect detailed traces for the specified modules. This
|
||||
involves use of possibly costly and or blocking operations and hence might
|
||||
have a performance impact.
|
||||
|
||||
# If set, collects the model forward time for the request.
|
||||
collect_model_forward_time: bool = False
|
||||
Note that collecting detailed timing information for each request can be
|
||||
expensive."""
|
||||
|
||||
# If set, collects the model execute time for the request.
|
||||
collect_model_execute_time: bool = False
|
||||
@cached_property
|
||||
def collect_model_forward_time(self) -> bool:
|
||||
"""Whether to collect model forward time for the request."""
|
||||
return (self.collect_detailed_traces is not None
|
||||
and ("model" in self.collect_detailed_traces
|
||||
or "all" in self.collect_detailed_traces))
|
||||
|
||||
@cached_property
|
||||
def collect_model_execute_time(self) -> bool:
|
||||
"""Whether to collect model execute time for the request."""
|
||||
return (self.collect_detailed_traces is not None
|
||||
and ("worker" in self.collect_detailed_traces
|
||||
or "all" in self.collect_detailed_traces))
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
"""
|
||||
@ -3320,12 +3357,23 @@ class ObservabilityConfig:
|
||||
return hash_str
|
||||
|
||||
def __post_init__(self):
|
||||
if (self.collect_detailed_traces is not None
|
||||
and len(self.collect_detailed_traces) == 1
|
||||
and "," in self.collect_detailed_traces[0]):
|
||||
self._parse_collect_detailed_traces()
|
||||
|
||||
if not is_otel_available() and self.otlp_traces_endpoint is not None:
|
||||
raise ValueError(
|
||||
"OpenTelemetry is not available. Unable to configure "
|
||||
"'otlp_traces_endpoint'. Ensure OpenTelemetry packages are "
|
||||
f"installed. Original error:\n{otel_import_error_traceback}")
|
||||
|
||||
def _parse_collect_detailed_traces(self):
|
||||
assert isinstance(self.collect_detailed_traces, list)
|
||||
self.collect_detailed_traces = cast(
|
||||
list[DetailedTraceModules],
|
||||
self.collect_detailed_traces[0].split(","))
|
||||
|
||||
|
||||
class KVTransferConfig(BaseModel):
|
||||
"""Configuration for distributed KV cache transfer."""
|
||||
|
||||
@ -7,6 +7,7 @@ import json
|
||||
import re
|
||||
import threading
|
||||
from dataclasses import MISSING, dataclass, fields
|
||||
from itertools import permutations
|
||||
from typing import (Any, Callable, Dict, List, Literal, Optional, Type,
|
||||
TypeVar, Union, cast, get_args, get_origin)
|
||||
|
||||
@ -14,14 +15,13 @@ import torch
|
||||
from typing_extensions import TypeIs, deprecated
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm import version
|
||||
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
||||
ConfigFormat, ConfigType, DecodingConfig, Device,
|
||||
DeviceConfig, DistributedExecutorBackend,
|
||||
GuidedDecodingBackend, GuidedDecodingBackendV1,
|
||||
HfOverrides, KVEventsConfig, KVTransferConfig,
|
||||
LoadConfig, LoadFormat, LoRAConfig, ModelConfig,
|
||||
ModelDType, ModelImpl, MultiModalConfig,
|
||||
ConfigFormat, ConfigType, DecodingConfig,
|
||||
DetailedTraceModules, Device, DeviceConfig,
|
||||
DistributedExecutorBackend, GuidedDecodingBackend,
|
||||
GuidedDecodingBackendV1, HfOverrides, KVEventsConfig,
|
||||
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
|
||||
ModelConfig, ModelDType, ModelImpl, MultiModalConfig,
|
||||
ObservabilityConfig, ParallelConfig, PoolerConfig,
|
||||
PrefixCachingHashAlgo, PromptAdapterConfig,
|
||||
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
|
||||
@ -41,8 +41,6 @@ from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
|
||||
|
||||
# object is used to allow for special typing forms
|
||||
T = TypeVar("T")
|
||||
TypeHint = Union[type[Any], object]
|
||||
@ -337,9 +335,12 @@ class EngineArgs:
|
||||
speculative_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
qlora_adapter_name_or_path: Optional[str] = None
|
||||
show_hidden_metrics_for_version: Optional[str] = None
|
||||
otlp_traces_endpoint: Optional[str] = None
|
||||
collect_detailed_traces: Optional[str] = None
|
||||
show_hidden_metrics_for_version: Optional[str] = \
|
||||
ObservabilityConfig.show_hidden_metrics_for_version
|
||||
otlp_traces_endpoint: Optional[str] = \
|
||||
ObservabilityConfig.otlp_traces_endpoint
|
||||
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
|
||||
ObservabilityConfig.collect_detailed_traces
|
||||
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
|
||||
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
|
||||
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
|
||||
@ -677,33 +678,29 @@ class EngineArgs:
|
||||
default=None,
|
||||
help='Name or path of the QLoRA adapter.')
|
||||
|
||||
parser.add_argument('--show-hidden-metrics-for-version',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Enable deprecated Prometheus metrics that '
|
||||
'have been hidden since the specified version. '
|
||||
'For example, if a previously deprecated metric '
|
||||
'has been hidden since the v0.7.0 release, you '
|
||||
'use --show-hidden-metrics-for-version=0.7 as a '
|
||||
'temporary escape hatch while you migrate to new '
|
||||
'metrics. The metric is likely to be removed '
|
||||
'completely in an upcoming release.')
|
||||
|
||||
parser.add_argument(
|
||||
'--otlp-traces-endpoint',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Target URL to which OpenTelemetry traces will be sent.')
|
||||
parser.add_argument(
|
||||
'--collect-detailed-traces',
|
||||
type=str,
|
||||
default=None,
|
||||
help="Valid choices are " +
|
||||
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
|
||||
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
|
||||
" set. If set, it will collect detailed traces for the specified "
|
||||
"modules. This involves use of possibly costly and or blocking "
|
||||
"operations and hence might have a performance impact.")
|
||||
# Observability arguments
|
||||
observability_kwargs = get_kwargs(ObservabilityConfig)
|
||||
observability_group = parser.add_argument_group(
|
||||
title="ObservabilityConfig",
|
||||
description=ObservabilityConfig.__doc__,
|
||||
)
|
||||
observability_group.add_argument(
|
||||
"--show-hidden-metrics-for-version",
|
||||
**observability_kwargs["show_hidden_metrics_for_version"])
|
||||
observability_group.add_argument(
|
||||
"--otlp-traces-endpoint",
|
||||
**observability_kwargs["otlp_traces_endpoint"])
|
||||
# TODO: generalise this special case
|
||||
choices = observability_kwargs["collect_detailed_traces"]["choices"]
|
||||
metavar = f"{{{','.join(choices)}}}"
|
||||
observability_kwargs["collect_detailed_traces"]["metavar"] = metavar
|
||||
observability_kwargs["collect_detailed_traces"]["choices"] += [
|
||||
",".join(p)
|
||||
for p in permutations(get_args(DetailedTraceModules), r=2)
|
||||
]
|
||||
observability_group.add_argument(
|
||||
"--collect-detailed-traces",
|
||||
**observability_kwargs["collect_detailed_traces"])
|
||||
|
||||
# Scheduler arguments
|
||||
scheduler_kwargs = get_kwargs(SchedulerConfig)
|
||||
@ -1094,26 +1091,11 @@ class EngineArgs:
|
||||
if self.enable_reasoning else None,
|
||||
)
|
||||
|
||||
show_hidden_metrics = False
|
||||
if self.show_hidden_metrics_for_version is not None:
|
||||
show_hidden_metrics = version._prev_minor_version_was(
|
||||
self.show_hidden_metrics_for_version)
|
||||
|
||||
detailed_trace_modules = []
|
||||
if self.collect_detailed_traces is not None:
|
||||
detailed_trace_modules = self.collect_detailed_traces.split(",")
|
||||
for m in detailed_trace_modules:
|
||||
if m not in ALLOWED_DETAILED_TRACE_MODULES:
|
||||
raise ValueError(
|
||||
f"Invalid module {m} in collect_detailed_traces. "
|
||||
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
|
||||
observability_config = ObservabilityConfig(
|
||||
show_hidden_metrics=show_hidden_metrics,
|
||||
show_hidden_metrics_for_version=self.
|
||||
show_hidden_metrics_for_version,
|
||||
otlp_traces_endpoint=self.otlp_traces_endpoint,
|
||||
collect_model_forward_time="model" in detailed_trace_modules
|
||||
or "all" in detailed_trace_modules,
|
||||
collect_model_execute_time="worker" in detailed_trace_modules
|
||||
or "all" in detailed_trace_modules,
|
||||
collect_detailed_traces=self.collect_detailed_traces,
|
||||
)
|
||||
|
||||
config = VllmConfig(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user