[Deprecation] Remove TokenizerPoolConfig (#20968)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-12-13 23:05:02 +08:00 · 2025-07-15 15:00:50 +01:00 · 2025-07-15 15:00:50 +01:00 · 56fe4bedd6
commit 56fe4bedd6
parent d91278181d
4 changed files with 4 additions and 62 deletions
--- a/docs/api/README.md
+++ b/docs/api/README.md
@ -8,7 +8,6 @@ API documentation for vLLM's configuration classes.
 - [vllm.config.ModelConfig][]
 - [vllm.config.CacheConfig][]
 - [vllm.config.TokenizerPoolConfig][]
 - [vllm.config.LoadConfig][]
 - [vllm.config.ParallelConfig][]
 - [vllm.config.SchedulerConfig][]
--- a/tests/async_engine/test_api_server.py
+++ b/tests/async_engine/test_api_server.py
@ -29,7 +29,7 @@ def _query_server_long(prompt: str) -> dict:
@pytest.fixture
-def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
+def api_server(distributed_executor_backend: str):
    script_path = Path(__file__).parent.joinpath(
        "api_server_async_engine.py").absolute()
    commands = [
@ -40,8 +40,6 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
        "facebook/opt-125m",
        "--host",
        "127.0.0.1",
        "--tokenizer-pool-size",
        str(tokenizer_pool_size),
        "--distributed-executor-backend",
        distributed_executor_backend,
    ]
@ -54,10 +52,8 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
    uvicorn_process.terminate()
@pytest.mark.parametrize("tokenizer_pool_size", [0, 2])
@pytest.mark.parametrize("distributed_executor_backend", ["mp", "ray"])
-def test_api_server(api_server, tokenizer_pool_size: int,
+def test_api_server(api_server, distributed_executor_backend: str):
                    distributed_executor_backend: str):
    """
    Run the API server and test it.
--- a/vllm/config.py
+++ b/vllm/config.py
@ -1730,35 +1730,6 @@ class CacheConfig:
            logger.warning("Possibly too large swap space. %s", msg)
@config
@dataclass
 class TokenizerPoolConfig:
    """This config is deprecated and will be removed in a future release.
    Passing these parameters will have no effect. Please remove them from your
    configurations.
    """
    pool_size: int = 0
    """This parameter is deprecated and will be removed in a future release.
    Passing this parameter will have no effect. Please remove it from your
    configurations."""
    pool_type: str = "ray"
    """This parameter is deprecated and will be removed in a future release.
    Passing this parameter will have no effect. Please remove it from your
    configurations."""
    extra_config: dict = field(default_factory=dict)
    """This parameter is deprecated and will be removed in a future release.
    Passing this parameter will have no effect. Please remove it from your
    configurations."""
    def __post_init__(self) -> None:
        logger.warning_once(
            "TokenizerPoolConfig is deprecated and will be removed in a "
            "future release. Passing this parameter will have no effect. "
            "Please remove it from your configurations.")
 class LoadFormat(str, enum.Enum):
    AUTO = "auto"
    PT = "pt"
@ -1922,10 +1893,6 @@ class ParallelConfig:
    disable_custom_all_reduce: bool = False
    """Disable the custom all-reduce kernel and fall back to NCCL."""
    tokenizer_pool_config: Optional[TokenizerPoolConfig] = None
    """This parameter is deprecated and will be removed in a future release.
    Please remove it from your configs"""
    ray_workers_use_nsight: bool = False
    """Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler."""
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -32,8 +32,8 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
                         ObservabilityConfig, ParallelConfig, PoolerConfig,
                         PrefixCachingHashAlgo, PromptAdapterConfig,
                         SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
-                         TaskOption, TokenizerMode, TokenizerPoolConfig,
+                         TaskOption, TokenizerMode, VllmConfig, get_attr_docs,
-                         VllmConfig, get_attr_docs, get_field)
+                         get_field)
 from vllm.logger import init_logger
 from vllm.platforms import CpuArchEnum, current_platform
 from vllm.plugins import load_general_plugins
@ -373,13 +373,6 @@ class EngineArgs:
    enforce_eager: bool = ModelConfig.enforce_eager
    max_seq_len_to_capture: int = ModelConfig.max_seq_len_to_capture
    disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce
    # The following three fields are deprecated and will be removed in a future
    # release. Setting them will have no effect. Please remove them from your
    # configurations.
    tokenizer_pool_size: int = TokenizerPoolConfig.pool_size
    tokenizer_pool_type: str = TokenizerPoolConfig.pool_type
    tokenizer_pool_extra_config: dict = \
        get_field(TokenizerPoolConfig, "extra_config")
    limit_mm_per_prompt: dict[str, int] = \
        get_field(MultiModalConfig, "limit_per_prompt")
    interleave_mm_strings: bool = MultiModalConfig.interleave_mm_strings
@ -751,19 +744,6 @@ class EngineArgs:
        cache_group.add_argument("--calculate-kv-scales",
                                 **cache_kwargs["calculate_kv_scales"])
        # Tokenizer arguments
        tokenizer_kwargs = get_kwargs(TokenizerPoolConfig)
        tokenizer_group = parser.add_argument_group(
            title="TokenizerPoolConfig",
            description=TokenizerPoolConfig.__doc__,
        )
        tokenizer_group.add_argument("--tokenizer-pool-size",
                                     **tokenizer_kwargs["pool_size"])
        tokenizer_group.add_argument("--tokenizer-pool-type",
                                     **tokenizer_kwargs["pool_type"])
        tokenizer_group.add_argument("--tokenizer-pool-extra-config",
                                     **tokenizer_kwargs["extra_config"])
        # Multimodal related configs
        multimodal_kwargs = get_kwargs(MultiModalConfig)
        multimodal_group = parser.add_argument_group(