diff --git a/vllm/model_executor/layers/mamba/linear_attn.py b/vllm/model_executor/layers/mamba/linear_attn.py index b5a37b2582e56..ce8f50bb27b82 100644 --- a/vllm/model_executor/layers/mamba/linear_attn.py +++ b/vllm/model_executor/layers/mamba/linear_attn.py @@ -10,7 +10,6 @@ if TYPE_CHECKING: from typing import TYPE_CHECKING import torch -import torch.distributed import torch.nn.functional as F from einops import rearrange from torch import nn @@ -41,9 +40,6 @@ from vllm.v1.attention.backends.linear_attn import LinearAttentionMetadata if TYPE_CHECKING: from vllm.attention.backends.abstract import AttentionBackend -import torch -import torch.distributed - class MiniMaxText01RMSNormTP(CustomOp): name = "MiniMaxText01RMSNormTP" diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 1f4a76452f969..3b82f8a98bbd6 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -848,7 +848,6 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod): # Property to determine if AITER is used if self.rocm_aiter_moe_enabled: from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa E501 - rocm_aiter_fused_experts, shuffle_weights, ) diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index 0e69fcfd8febd..2b727a538bf25 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -58,7 +58,6 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsTransc from .utils import ( AutoWeightsLoader, WeightsMapper, - flatten_bn, init_vllm_registered_model, maybe_prefix, ) diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index 82f7cd3aa8c22..e262012dcd526 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -11,7 +11,6 @@ if TYPE_CHECKING: import regex as re import torch -import torch.distributed from torch import nn from transformers import MiniMaxConfig diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index 5b228e6b3aeb3..e97bab250ed19 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -31,13 +31,11 @@ if TYPE_CHECKING: from .inputs import ( BatchedTensorInputs, MultiModalKwargsItem, - MultiModalKwargsItems, MultiModalPlaceholderDict, ) else: BatchedTensorInputs = Any MultiModalKwargsItem = Any - MultiModalKwargsItems = Any MultiModalPlaceholderDict = Any global_thread_pool = ThreadPoolExecutor( diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 54173c64a2075..a393568909d27 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -21,11 +21,9 @@ from vllm.transformers_utils.utils import check_gguf_file if TYPE_CHECKING: from vllm.config import ModelConfig - from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizer_base import TokenizerBase else: ModelConfig = Any - LoRARequest = Any TokenizerBase = Any logger = init_logger(__name__) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index c6fc4134d28a7..a35dda2d77345 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -900,7 +900,6 @@ def _cuda_device_count_stateless(cuda_visible_devices: str | None = None) -> int # c1cd946818442aca8c7f812b16d187ce1586c3bc/ # torch/cuda/__init__.py#L831C1-L831C17 import torch.cuda - import torch.version from vllm.platforms import current_platform diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py index 9bce362120acf..9605ff6de9eb6 100644 --- a/vllm/v1/worker/tpu_worker.py +++ b/vllm/v1/worker/tpu_worker.py @@ -7,7 +7,6 @@ from collections.abc import Callable from typing import Any, TypeVar import torch -import torch.distributed import torch.nn as nn import vllm.envs as envs