From 603a1bf9bcd35f0a329f2cbe45123e6e26bbe9a8 Mon Sep 17 00:00:00 2001 From: Wei-Yu Lin Date: Wed, 17 Dec 2025 01:26:42 +0000 Subject: [PATCH] Run pre-commit to format files Signed-off-by: Wei-Yu Lin --- .../model_loader/default_loader.py | 2 +- vllm/platforms/tpu.py | 30 ++----------------- vllm/v1/worker/tpu_worker.py | 27 +---------------- 3 files changed, 5 insertions(+), 54 deletions(-) diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index 4d85f8e3b478c..deee2324960dd 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -242,7 +242,7 @@ class DefaultModelLoader(BaseModelLoader): ) if current_platform.is_tpu(): - from vllm.platforms.tpu import USE_TPU_INFERENCE + pass if self.counter_before_loading_weights == 0.0: self.counter_before_loading_weights = time.perf_counter() diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index f7a11d2c557c4..455aceb3269eb 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -1,34 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import contextlib -from typing import TYPE_CHECKING, Optional, cast - -import torch -from tpu_info import device - -from vllm.attention.backends.registry import AttentionBackendEnum -from vllm.inputs import ProcessorInputs, PromptType from vllm.logger import init_logger -from .interface import Platform, PlatformEnum - -if TYPE_CHECKING: - from typing import TypeAlias - - from vllm.attention.selector import AttentionSelectorConfig - from vllm.config import VllmConfig - from vllm.config.cache import BlockSize - from vllm.pooling_params import PoolingParams - from vllm.sampling_params import SamplingParams - - ParamsType: TypeAlias = SamplingParams | PoolingParams -else: - BlockSize = None - VllmConfig = None - PoolingParams = None - ParamsType = None - logger = init_logger(__name__) @@ -40,5 +14,7 @@ try: TpuPlatform = TpuInferencePlatform # type: ignore USE_TPU_INFERENCE = True except ImportError: - logger.error("tpu_inference not found, please install tpu_inference to run vllm on TPU") + logger.error( + "tpu_inference not found, please install tpu_inference to run vllm on TPU" + ) pass diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py index b50def0e17de4..085b119e12600 100644 --- a/vllm/v1/worker/tpu_worker.py +++ b/vllm/v1/worker/tpu_worker.py @@ -2,35 +2,10 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """A TPU worker class.""" -import os -from collections.abc import Callable -from typing import Any, TypeVar +from typing import TypeVar -import torch -import torch.nn as nn - -import vllm.envs as envs -from vllm.config import VllmConfig, set_current_vllm_config -from vllm.distributed import ( - ensure_model_parallel_initialized, - init_distributed_environment, -) -from vllm.distributed.kv_transfer import ( - ensure_kv_transfer_initialized, -) from vllm.logger import init_logger -from vllm.lora.request import LoRARequest -from vllm.model_executor import set_random_seed -from vllm.platforms import current_platform from vllm.platforms.tpu import USE_TPU_INFERENCE -from vllm.tasks import SupportedTask -from vllm.utils.math_utils import cdiv -from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE -from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput -from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig, KVCacheSpec -from vllm.v1.outputs import ModelRunnerOutput -from vllm.v1.utils import report_usage_stats -from vllm.v1.worker.utils import bind_kv_cache logger = init_logger(__name__)