mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-23 10:44:25 +08:00
Run pre-commit to format files
Signed-off-by: Wei-Yu Lin <weiyulin@google.com>
This commit is contained in:
parent
6125a172f9
commit
603a1bf9bc
@ -242,7 +242,7 @@ class DefaultModelLoader(BaseModelLoader):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if current_platform.is_tpu():
|
if current_platform.is_tpu():
|
||||||
from vllm.platforms.tpu import USE_TPU_INFERENCE
|
pass
|
||||||
|
|
||||||
if self.counter_before_loading_weights == 0.0:
|
if self.counter_before_loading_weights == 0.0:
|
||||||
self.counter_before_loading_weights = time.perf_counter()
|
self.counter_before_loading_weights = time.perf_counter()
|
||||||
|
|||||||
@ -1,34 +1,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
import contextlib
|
|
||||||
from typing import TYPE_CHECKING, Optional, cast
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from tpu_info import device
|
|
||||||
|
|
||||||
from vllm.attention.backends.registry import AttentionBackendEnum
|
|
||||||
from vllm.inputs import ProcessorInputs, PromptType
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
from .interface import Platform, PlatformEnum
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from typing import TypeAlias
|
|
||||||
|
|
||||||
from vllm.attention.selector import AttentionSelectorConfig
|
|
||||||
from vllm.config import VllmConfig
|
|
||||||
from vllm.config.cache import BlockSize
|
|
||||||
from vllm.pooling_params import PoolingParams
|
|
||||||
from vllm.sampling_params import SamplingParams
|
|
||||||
|
|
||||||
ParamsType: TypeAlias = SamplingParams | PoolingParams
|
|
||||||
else:
|
|
||||||
BlockSize = None
|
|
||||||
VllmConfig = None
|
|
||||||
PoolingParams = None
|
|
||||||
ParamsType = None
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -40,5 +14,7 @@ try:
|
|||||||
TpuPlatform = TpuInferencePlatform # type: ignore
|
TpuPlatform = TpuInferencePlatform # type: ignore
|
||||||
USE_TPU_INFERENCE = True
|
USE_TPU_INFERENCE = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.error("tpu_inference not found, please install tpu_inference to run vllm on TPU")
|
logger.error(
|
||||||
|
"tpu_inference not found, please install tpu_inference to run vllm on TPU"
|
||||||
|
)
|
||||||
pass
|
pass
|
||||||
|
|||||||
@ -2,35 +2,10 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
"""A TPU worker class."""
|
"""A TPU worker class."""
|
||||||
|
|
||||||
import os
|
from typing import TypeVar
|
||||||
from collections.abc import Callable
|
|
||||||
from typing import Any, TypeVar
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
|
|
||||||
import vllm.envs as envs
|
|
||||||
from vllm.config import VllmConfig, set_current_vllm_config
|
|
||||||
from vllm.distributed import (
|
|
||||||
ensure_model_parallel_initialized,
|
|
||||||
init_distributed_environment,
|
|
||||||
)
|
|
||||||
from vllm.distributed.kv_transfer import (
|
|
||||||
ensure_kv_transfer_initialized,
|
|
||||||
)
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
|
||||||
from vllm.model_executor import set_random_seed
|
|
||||||
from vllm.platforms import current_platform
|
|
||||||
from vllm.platforms.tpu import USE_TPU_INFERENCE
|
from vllm.platforms.tpu import USE_TPU_INFERENCE
|
||||||
from vllm.tasks import SupportedTask
|
|
||||||
from vllm.utils.math_utils import cdiv
|
|
||||||
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
|
|
||||||
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
|
|
||||||
from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig, KVCacheSpec
|
|
||||||
from vllm.v1.outputs import ModelRunnerOutput
|
|
||||||
from vllm.v1.utils import report_usage_stats
|
|
||||||
from vllm.v1.worker.utils import bind_kv_cache
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user