diff --git a/tests/distributed/test_context_parallel.py b/tests/distributed/test_context_parallel.py index 53fc9957b910f..89c2c9f8badeb 100644 --- a/tests/distributed/test_context_parallel.py +++ b/tests/distributed/test_context_parallel.py @@ -15,7 +15,7 @@ from typing import Literal, NamedTuple, Optional import pytest -from vllm.config import RunnerOption +from vllm.config.model import RunnerOption from vllm.logger import init_logger from ..models.registry import HF_EXAMPLE_MODELS diff --git a/tests/distributed/test_expert_parallel.py b/tests/distributed/test_expert_parallel.py index 94f0ece4971ba..8a9ddcd58cfce 100644 --- a/tests/distributed/test_expert_parallel.py +++ b/tests/distributed/test_expert_parallel.py @@ -6,7 +6,7 @@ from typing import Literal, NamedTuple, Optional import pytest -from vllm.config import RunnerOption +from vllm.config.model import RunnerOption from vllm.logger import init_logger from ..utils import compare_two_settings, create_new_process_for_each_test diff --git a/tests/distributed/test_sequence_parallel.py b/tests/distributed/test_sequence_parallel.py index 1defd96902414..0847687cf2f9a 100644 --- a/tests/distributed/test_sequence_parallel.py +++ b/tests/distributed/test_sequence_parallel.py @@ -15,7 +15,7 @@ from typing import Literal, NamedTuple, Optional import pytest -from vllm.config import RunnerOption +from vllm.config.model import RunnerOption from vllm.logger import init_logger from ..models.registry import HF_EXAMPLE_MODELS diff --git a/tests/models/multimodal/generation/vlm_utils/core.py b/tests/models/multimodal/generation/vlm_utils/core.py index 0c11f5f9b0827..5748ccc14c294 100644 --- a/tests/models/multimodal/generation/vlm_utils/core.py +++ b/tests/models/multimodal/generation/vlm_utils/core.py @@ -7,7 +7,7 @@ from typing import Any, Callable, Optional import torch from transformers.models.auto.auto_factory import _BaseAutoModelClass -from vllm.config import RunnerOption +from vllm.config.model import RunnerOption from vllm.transformers_utils.tokenizer import AnyTokenizer from .....conftest import HfRunner, VllmRunner diff --git a/tests/models/multimodal/generation/vlm_utils/types.py b/tests/models/multimodal/generation/vlm_utils/types.py index bb34d1cc6dad2..6e82f7e3306ab 100644 --- a/tests/models/multimodal/generation/vlm_utils/types.py +++ b/tests/models/multimodal/generation/vlm_utils/types.py @@ -12,7 +12,7 @@ from pytest import MarkDecorator from transformers import AutoModelForCausalLM from transformers.models.auto.auto_factory import _BaseAutoModelClass -from vllm.config import RunnerOption +from vllm.config.model import RunnerOption from vllm.logprobs import SampleLogprobs from vllm.transformers_utils.tokenizer import AnyTokenizer diff --git a/tests/models/registry.py b/tests/models/registry.py index e7affb41565c5..c389c9c2d81ed 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -10,7 +10,7 @@ import torch from packaging.version import Version from transformers import __version__ as TRANSFORMERS_VERSION -from vllm.config import ModelDType, TokenizerMode +from vllm.config.model import ModelDType, TokenizerMode @dataclass(frozen=True) diff --git a/tests/models/utils.py b/tests/models/utils.py index c20e50ff1bffc..84697ad68d441 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -10,7 +10,7 @@ import torch import torch.nn.functional as F from transformers import PretrainedConfig -from vllm.config import ModelConfig, ModelDType, RunnerOption +from vllm.config.model import ModelConfig, ModelDType, RunnerOption from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs from vllm.multimodal.processing import InputProcessingContext from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py index feed66d33b586..a22f32c9a31ca 100644 --- a/tests/v1/attention/utils.py +++ b/tests/v1/attention/utils.py @@ -15,11 +15,11 @@ from vllm.config import ( DeviceConfig, LoadConfig, ModelConfig, - ModelDType, ParallelConfig, SchedulerConfig, VllmConfig, ) +from vllm.config.model import ModelDType from vllm.platforms import current_platform from vllm.utils import resolve_obj_by_qualname from vllm.v1.attention.backends.utils import CommonAttentionMetadata diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 16cdc19037ba7..b5d04679317e6 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -29,7 +29,7 @@ from vllm.sampling_params import ( ) if TYPE_CHECKING: - from vllm.config import TokenizerMode + from vllm.config.model import TokenizerMode NGRAM_SPEC_CONFIG = { "model": "[ngram]", diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index 2cb5e6733b79f..f08d9f684921d 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -4,7 +4,7 @@ import pytest import ray -from vllm.config import ModelDType +from vllm.config.model import ModelDType from vllm.sampling_params import SamplingParams from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM from vllm.v1.metrics.ray_wrappers import RayPrometheusMetric, RayPrometheusStatLogger diff --git a/tests/v1/sample/test_logprobs.py b/tests/v1/sample/test_logprobs.py index bda430a080f61..ff3d568fbe624 100644 --- a/tests/v1/sample/test_logprobs.py +++ b/tests/v1/sample/test_logprobs.py @@ -16,7 +16,7 @@ from tests.v1.sample.utils import ( get_test_batch, ) from vllm import SamplingParams -from vllm.config import LogprobsMode +from vllm.config.model import LogprobsMode from ...conftest import HfRunner, VllmRunner diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 7c5052c822f8f..6a0197d044dcd 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -1,42 +1,28 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from vllm.config.cache import ( - BlockSize, - CacheConfig, - CacheDType, - MambaDType, - PrefixCachingHashAlgo, -) +from vllm.config.cache import CacheConfig from vllm.config.compilation import ( CompilationConfig, CompilationLevel, CUDAGraphMode, PassConfig, ) -from vllm.config.device import Device, DeviceConfig +from vllm.config.device import DeviceConfig from vllm.config.kv_events import KVEventsConfig from vllm.config.kv_transfer import KVTransferConfig from vllm.config.load import LoadConfig from vllm.config.lora import LoRAConfig from vllm.config.model import ( - ConvertOption, - HfOverrides, - LogprobsMode, ModelConfig, - ModelDType, - ModelImpl, - RunnerOption, - TaskOption, - TokenizerMode, iter_architecture_defaults, try_match_architecture_defaults, ) -from vllm.config.multimodal import MMCacheType, MMEncoderTPMode, MultiModalConfig -from vllm.config.observability import DetailedTraceModules, ObservabilityConfig -from vllm.config.parallel import DistributedExecutorBackend, EPLBConfig, ParallelConfig +from vllm.config.multimodal import MultiModalConfig +from vllm.config.observability import ObservabilityConfig +from vllm.config.parallel import EPLBConfig, ParallelConfig from vllm.config.pooler import PoolerConfig -from vllm.config.scheduler import RunnerType, SchedulerConfig, SchedulerPolicy +from vllm.config.scheduler import SchedulerConfig from vllm.config.speculative import SpeculativeConfig from vllm.config.speech_to_text import SpeechToTextConfig from vllm.config.structured_outputs import StructuredOutputsConfig @@ -56,20 +42,17 @@ from vllm.config.vllm import ( set_current_vllm_config, ) +# __all__ should only contain classes and functions. +# Types and globals should be imported from their respective modules. __all__ = [ # From vllm.config.cache - "BlockSize", "CacheConfig", - "CacheDType", - "MambaDType", - "PrefixCachingHashAlgo", # From vllm.config.compilation "CompilationConfig", "CompilationLevel", "CUDAGraphMode", "PassConfig", # From vllm.config.device - "Device", "DeviceConfig", # From vllm.config.kv_events "KVEventsConfig", @@ -80,34 +63,20 @@ __all__ = [ # From vllm.config.lora "LoRAConfig", # From vllm.config.model - "ConvertOption", - "HfOverrides", - "LogprobsMode", "ModelConfig", - "ModelDType", - "ModelImpl", - "RunnerOption", - "TaskOption", - "TokenizerMode", "iter_architecture_defaults", "try_match_architecture_defaults", # From vllm.config.multimodal - "MMCacheType", - "MMEncoderTPMode", "MultiModalConfig", # From vllm.config.observability - "DetailedTraceModules", "ObservabilityConfig", # From vllm.config.parallel - "DistributedExecutorBackend", "EPLBConfig", "ParallelConfig", # From vllm.config.pooler "PoolerConfig", # From vllm.config.scheduler - "RunnerType", "SchedulerConfig", - "SchedulerPolicy", # From vllm.config.speculative "SpeculativeConfig", # From vllm.config.speech_to_text diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index e01f2d32d914e..b757e94a7cbed 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -31,43 +31,41 @@ from typing_extensions import TypeIs, deprecated import vllm.envs as envs from vllm.config import ( - BlockSize, CacheConfig, - CacheDType, CompilationConfig, ConfigType, - ConvertOption, - DetailedTraceModules, - Device, DeviceConfig, - DistributedExecutorBackend, EPLBConfig, - HfOverrides, KVEventsConfig, KVTransferConfig, LoadConfig, - LogprobsMode, LoRAConfig, - MambaDType, - MMEncoderTPMode, ModelConfig, - ModelDType, + MultiModalConfig, ObservabilityConfig, ParallelConfig, PoolerConfig, - PrefixCachingHashAlgo, - RunnerOption, SchedulerConfig, - SchedulerPolicy, SpeculativeConfig, StructuredOutputsConfig, - TaskOption, - TokenizerMode, VllmConfig, get_attr_docs, ) -from vllm.config.multimodal import MMCacheType, MultiModalConfig -from vllm.config.parallel import ExpertPlacementStrategy +from vllm.config.cache import BlockSize, CacheDType, MambaDType, PrefixCachingHashAlgo +from vllm.config.device import Device +from vllm.config.model import ( + ConvertOption, + HfOverrides, + LogprobsMode, + ModelDType, + RunnerOption, + TaskOption, + TokenizerMode, +) +from vllm.config.multimodal import MMCacheType, MMEncoderTPMode +from vllm.config.observability import DetailedTraceModules +from vllm.config.parallel import DistributedExecutorBackend, ExpertPlacementStrategy +from vllm.config.scheduler import SchedulerPolicy from vllm.config.utils import get_field from vllm.logger import init_logger from vllm.platforms import CpuArchEnum, current_platform diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index c797735f0c2d6..b94771bc9bbef 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -19,18 +19,18 @@ from vllm.beam_search import ( ) from vllm.config import ( CompilationConfig, - ModelDType, + PoolerConfig, StructuredOutputsConfig, - TokenizerMode, is_init_field, ) -from vllm.engine.arg_utils import ( +from vllm.config.model import ( ConvertOption, - EngineArgs, HfOverrides, - PoolerConfig, + ModelDType, RunnerOption, + TokenizerMode, ) +from vllm.engine.arg_utils import EngineArgs from vllm.entrypoints.chat_utils import ( ChatCompletionMessageParam, ChatTemplateContentFormatOption, diff --git a/vllm/model_executor/layers/mamba/mamba_utils.py b/vllm/model_executor/layers/mamba/mamba_utils.py index 21c36617a872d..0f160b2c924fb 100644 --- a/vllm/model_executor/layers/mamba/mamba_utils.py +++ b/vllm/model_executor/layers/mamba/mamba_utils.py @@ -4,7 +4,8 @@ from typing import Union import torch -from vllm.config import MambaDType, ModelDType +from vllm.config.cache import MambaDType +from vllm.config.model import ModelDType from vllm.distributed import divide from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, get_kv_cache_torch_dtype diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index c0888247f5939..8c23b1de44e4e 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -15,7 +15,8 @@ from .interface import Platform, PlatformEnum if TYPE_CHECKING: from vllm.attention.backends.registry import _Backend - from vllm.config import BlockSize, ModelConfig, VllmConfig + from vllm.config import ModelConfig, VllmConfig + from vllm.config.cache import BlockSize from vllm.pooling_params import PoolingParams else: BlockSize = None diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index dbcdad07e4dee..5fa7a9ad44cd4 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -8,7 +8,7 @@ import torch.nn as nn from packaging import version from vllm import envs -from vllm.config import LogprobsMode +from vllm.config.model import LogprobsMode from vllm.logger import init_logger from vllm.platforms import current_platform diff --git a/vllm/v1/sample/sampler.py b/vllm/v1/sample/sampler.py index 101d2ebed4b75..80cc866487f53 100644 --- a/vllm/v1/sample/sampler.py +++ b/vllm/v1/sample/sampler.py @@ -7,7 +7,7 @@ from typing import Optional import torch import torch.nn as nn -from vllm.config import LogprobsMode +from vllm.config.model import LogprobsMode from vllm.utils import is_pin_memory_available from vllm.v1.outputs import LogprobsTensors, SamplerOutput from vllm.v1.sample.metadata import SamplingMetadata