diff --git a/tools/pre_commit/mypy.py b/tools/pre_commit/mypy.py index e3dc40fd0ec7..47e01fc93b48 100755 --- a/tools/pre_commit/mypy.py +++ b/tools/pre_commit/mypy.py @@ -27,19 +27,24 @@ FILES = [ "vllm/*.py", "vllm/assets", "vllm/distributed", + "vllm/engine", "vllm/entrypoints", "vllm/executor", "vllm/inputs", "vllm/logging_utils", "vllm/multimodal", "vllm/platforms", + "vllm/plugins", "vllm/transformers_utils", "vllm/triton_utils", "vllm/usage", "vllm/utils", + "vllm/worker", "vllm/v1/core", "vllm/v1/engine", + "vllm/v1/metrics", "vllm/v1/pool", + "vllm/v1/sample", "vllm/v1/worker", ] @@ -50,24 +55,19 @@ SEPARATE_GROUPS = [ # v0 related "vllm/attention", "vllm/compilation", - "vllm/engine", - "vllm/inputs", "vllm/lora", "vllm/model_executor", - "vllm/plugins", - "vllm/worker", # v1 related "vllm/v1/attention", "vllm/v1/executor", "vllm/v1/kv_offload", - "vllm/v1/metrics", - "vllm/v1/sample", "vllm/v1/spec_decode", "vllm/v1/structured_output", ] # TODO(woosuk): Include the code from Megatron and HuggingFace. EXCLUDE = [ + "vllm/engine/arg_utils.py", "vllm/model_executor/parallel_utils", "vllm/model_executor/models", "vllm/model_executor/layers/fla/ops", diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/base.py b/vllm/distributed/kv_transfer/kv_connector/v1/base.py index cac45425bb7a..d37ec25675b7 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/base.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/base.py @@ -565,7 +565,7 @@ class KVConnectorBase_V1(ABC): vllm_config: "VllmConfig", metric_types: dict[type["PromMetric"], type["PromMetricT"]], labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ) -> Optional["KVConnectorPromMetrics"]: """ Create a KVConnectorPromMetrics subclass which should register diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py index a4bddf5e0316..78256a6552c2 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py @@ -806,7 +806,7 @@ class LMCacheMPConnector(KVConnectorBase_V1): vllm_config: "VllmConfig", metric_types: dict[type["PromMetric"], type["PromMetricT"]], labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ) -> Optional["KVConnectorPromMetrics"]: """ Create a KVConnectorPromMetrics subclass which should register diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/metrics.py b/vllm/distributed/kv_transfer/kv_connector/v1/metrics.py index d6ea4f1ab4cf..eb8342eb7129 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/metrics.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/metrics.py @@ -52,13 +52,13 @@ class KVConnectorStats: class KVConnectorLogging: - def __init__(self, kv_tranfer_config: KVTransferConfig): + def __init__(self, kv_transfer_config: KVTransferConfig | None): # This should be called on frontend process. assert not has_kv_transfer_group() # Instantiate the connector's stats class. - if kv_tranfer_config and kv_tranfer_config.kv_connector: + if kv_transfer_config and kv_transfer_config.kv_connector: self.connector_cls = KVConnectorFactory.get_connector_class( - kv_tranfer_config + kv_transfer_config ) self.reset() @@ -120,7 +120,7 @@ class KVConnectorPromMetrics: vllm_config: VllmConfig, metric_types: dict[type[PromMetric], type[PromMetricT]], labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ): self._kv_transfer_config = vllm_config.kv_transfer_config self._gauge_cls = metric_types[Gauge] @@ -129,7 +129,7 @@ class KVConnectorPromMetrics: self._labelnames = labelnames self._per_engine_labelvalues = per_engine_labelvalues - def make_per_engine(self, metric: PromMetric) -> PromMetric: + def make_per_engine(self, metric: PromMetric) -> dict[int, PromMetric]: """ Create a per-engine child of a prometheus_client.Metric with the appropriate labels set. The parent metric must be created @@ -165,7 +165,7 @@ class KVConnectorPrometheus: self, vllm_config: VllmConfig, labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ): self.prom_metrics: KVConnectorPromMetrics | None = None kv_transfer_config = vllm_config.kv_transfer_config diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py index f47e8ca7e6c5..51d5df6c6ba1 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py @@ -85,7 +85,7 @@ class MultiKVConnectorPromMetrics(KVConnectorPromMetrics): vllm_config: "VllmConfig", metric_types: dict[type[PromMetric], type[PromMetricT]], labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], prom_metrics: dict[str, KVConnectorPromMetrics], ): super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues) @@ -434,7 +434,7 @@ class MultiConnector(KVConnectorBase_V1): vllm_config: "VllmConfig", metric_types: dict[type["PromMetric"], type["PromMetricT"]], labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ) -> KVConnectorPromMetrics: prom_metrics: dict[str, KVConnectorPromMetrics] = {} for connector_cls, temp_config in cls._get_connector_classes_and_configs( diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 24c8d32dafed..41e32bb73d40 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -288,7 +288,7 @@ class NixlConnector(KVConnectorBase_V1): vllm_config: VllmConfig, metric_types: dict[type[PromMetric], type[PromMetricT]], labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ) -> KVConnectorPromMetrics: return NixlPromMetrics( vllm_config, metric_types, labelnames, per_engine_labelvalues @@ -2345,9 +2345,9 @@ class NixlKVConnectorStats(KVConnectorStats): return { "Num successful transfers": n, "Avg xfer time (ms)": round(xfer_time.mean() * 1e3, 3), - "P90 xfer time (ms)": round(np.percentile(xfer_time, 90) * 1e3, 3), + "P90 xfer time (ms)": round(np.percentile(xfer_time, 90).item() * 1e3, 3), "Avg post time (ms)": round(post_time.mean() * 1e3, 3), - "P90 post time (ms)": round(np.percentile(post_time, 90) * 1e3, 3), + "P90 post time (ms)": round(np.percentile(post_time, 90).item() * 1e3, 3), "Avg MB per transfer": round(avg_mb, 3), "Throughput (MB/s)": round(throughput_mb_s, 3), "Avg number of descriptors": round(descs.mean(), 1), @@ -2364,7 +2364,7 @@ class NixlPromMetrics(KVConnectorPromMetrics): vllm_config: VllmConfig, metric_types: dict[type[PromMetric], type[PromMetricT]], labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ): super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ad5a34c56161..1126e9ce12db 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1954,7 +1954,9 @@ class EngineArgs: self.enable_prefix_caching = False def _set_default_max_num_seqs_and_batched_tokens_args( - self, usage_context: UsageContext, model_config: ModelConfig + self, + usage_context: UsageContext | None, + model_config: ModelConfig, ): world_size = self.pipeline_parallel_size * self.tensor_parallel_size ( diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 66680f410cb3..224542c824ef 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -614,12 +614,12 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig: def maybe_override_with_speculators( model: str, - tokenizer: str, + tokenizer: str | None, trust_remote_code: bool, revision: str | None = None, vllm_speculative_config: dict[str, Any] | None = None, **kwargs, -) -> tuple[str, str, dict[str, Any] | None]: +) -> tuple[str, str | None, dict[str, Any] | None]: """ Resolve model configuration when speculators are detected. diff --git a/vllm/triton_utils/__init__.py b/vllm/triton_utils/__init__.py index a475d0fa406b..ce459ca91d8e 100644 --- a/vllm/triton_utils/__init__.py +++ b/vllm/triton_utils/__init__.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import TYPE_CHECKING from vllm.triton_utils.importing import ( HAS_TRITON, @@ -7,7 +8,7 @@ from vllm.triton_utils.importing import ( TritonPlaceholder, ) -if HAS_TRITON: +if TYPE_CHECKING or HAS_TRITON: import triton import triton.language as tl import triton.language.extra.libdevice as tldevice diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index bd18a152ffc0..429cee3b5af1 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -104,8 +104,8 @@ class LoggingStatLogger(StatLoggerBase): self.mm_caching_metrics = CachingMetrics() self.spec_decoding_logging = SpecDecodingLogging() - kv_tranfer_config = self.vllm_config.kv_transfer_config - self.kv_connector_logging = KVConnectorLogging(kv_tranfer_config) + kv_transfer_config = self.vllm_config.kv_transfer_config + self.kv_connector_logging = KVConnectorLogging(kv_transfer_config) self.last_prompt_throughput: float = 0.0 self.last_generation_throughput: float = 0.0 self.engine_is_idle = False @@ -380,7 +380,7 @@ class PrometheusStatLogger(AggregateStatLoggerBase): model_name = vllm_config.model_config.served_model_name max_model_len = vllm_config.model_config.max_model_len - per_engine_labelvalues: dict[int, list[str]] = { + per_engine_labelvalues: dict[int, list[object]] = { idx: [model_name, str(idx)] for idx in engine_indexes } @@ -1052,7 +1052,7 @@ PromMetric: TypeAlias = Gauge | Counter | Histogram def make_per_engine( - metric: PromMetric, engine_idxs: list[int], model_name: str + metric: PromMetric, engine_idxs: list[int], model_name: object ) -> dict[int, PromMetric]: return {idx: metric.labels(model_name, str(idx)) for idx in engine_idxs} diff --git a/vllm/v1/sample/logits_processor/__init__.py b/vllm/v1/sample/logits_processor/__init__.py index 8b174af4c779..f7b70645fd18 100644 --- a/vllm/v1/sample/logits_processor/__init__.py +++ b/vllm/v1/sample/logits_processor/__init__.py @@ -313,7 +313,7 @@ class AdapterLogitsProcessor(LogitsProcessor): if (len(inspect.signature(req_lp).parameters) == 3) else [output_ids] ) - return partial(req_lp, *args) + return partial(req_lp, *args) # type: ignore[misc] return None def update_state(self, batch_update: BatchUpdate | None): diff --git a/vllm/v1/spec_decode/metrics.py b/vllm/v1/spec_decode/metrics.py index 79d856a143ba..6c16bc686d16 100644 --- a/vllm/v1/spec_decode/metrics.py +++ b/vllm/v1/spec_decode/metrics.py @@ -144,7 +144,7 @@ class SpecDecodingProm: self, speculative_config: SpeculativeConfig | None, labelnames: list[str], - per_engine_labelvalues: dict[int, list[str]], + per_engine_labelvalues: dict[int, list[object]], ): self.spec_decoding_enabled = speculative_config is not None if not self.spec_decoding_enabled: @@ -215,7 +215,8 @@ class SpecDecodingProm: def make_per_engine( - counter: prometheus_client.Counter, per_engine_labelvalues: dict[int, list[str]] + counter: prometheus_client.Counter, + per_engine_labelvalues: dict[int, list[object]], ): """Create a counter for each label value.""" return {