[Deprecation] Remove TokenizerPoolConfig (#20968)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-07-15 15:00:50 +01:00 committed by GitHub
parent d91278181d
commit 56fe4bedd6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 4 additions and 62 deletions

View File

@ -8,7 +8,6 @@ API documentation for vLLM's configuration classes.
- [vllm.config.ModelConfig][]
- [vllm.config.CacheConfig][]
- [vllm.config.TokenizerPoolConfig][]
- [vllm.config.LoadConfig][]
- [vllm.config.ParallelConfig][]
- [vllm.config.SchedulerConfig][]

View File

@ -29,7 +29,7 @@ def _query_server_long(prompt: str) -> dict:
@pytest.fixture
def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
def api_server(distributed_executor_backend: str):
script_path = Path(__file__).parent.joinpath(
"api_server_async_engine.py").absolute()
commands = [
@ -40,8 +40,6 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
"facebook/opt-125m",
"--host",
"127.0.0.1",
"--tokenizer-pool-size",
str(tokenizer_pool_size),
"--distributed-executor-backend",
distributed_executor_backend,
]
@ -54,10 +52,8 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
uvicorn_process.terminate()
@pytest.mark.parametrize("tokenizer_pool_size", [0, 2])
@pytest.mark.parametrize("distributed_executor_backend", ["mp", "ray"])
def test_api_server(api_server, tokenizer_pool_size: int,
distributed_executor_backend: str):
def test_api_server(api_server, distributed_executor_backend: str):
"""
Run the API server and test it.

View File

@ -1730,35 +1730,6 @@ class CacheConfig:
logger.warning("Possibly too large swap space. %s", msg)
@config
@dataclass
class TokenizerPoolConfig:
"""This config is deprecated and will be removed in a future release.
Passing these parameters will have no effect. Please remove them from your
configurations.
"""
pool_size: int = 0
"""This parameter is deprecated and will be removed in a future release.
Passing this parameter will have no effect. Please remove it from your
configurations."""
pool_type: str = "ray"
"""This parameter is deprecated and will be removed in a future release.
Passing this parameter will have no effect. Please remove it from your
configurations."""
extra_config: dict = field(default_factory=dict)
"""This parameter is deprecated and will be removed in a future release.
Passing this parameter will have no effect. Please remove it from your
configurations."""
def __post_init__(self) -> None:
logger.warning_once(
"TokenizerPoolConfig is deprecated and will be removed in a "
"future release. Passing this parameter will have no effect. "
"Please remove it from your configurations.")
class LoadFormat(str, enum.Enum):
AUTO = "auto"
PT = "pt"
@ -1922,10 +1893,6 @@ class ParallelConfig:
disable_custom_all_reduce: bool = False
"""Disable the custom all-reduce kernel and fall back to NCCL."""
tokenizer_pool_config: Optional[TokenizerPoolConfig] = None
"""This parameter is deprecated and will be removed in a future release.
Please remove it from your configs"""
ray_workers_use_nsight: bool = False
"""Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler."""

View File

@ -32,8 +32,8 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
ObservabilityConfig, ParallelConfig, PoolerConfig,
PrefixCachingHashAlgo, PromptAdapterConfig,
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
TaskOption, TokenizerMode, TokenizerPoolConfig,
VllmConfig, get_attr_docs, get_field)
TaskOption, TokenizerMode, VllmConfig, get_attr_docs,
get_field)
from vllm.logger import init_logger
from vllm.platforms import CpuArchEnum, current_platform
from vllm.plugins import load_general_plugins
@ -373,13 +373,6 @@ class EngineArgs:
enforce_eager: bool = ModelConfig.enforce_eager
max_seq_len_to_capture: int = ModelConfig.max_seq_len_to_capture
disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce
# The following three fields are deprecated and will be removed in a future
# release. Setting them will have no effect. Please remove them from your
# configurations.
tokenizer_pool_size: int = TokenizerPoolConfig.pool_size
tokenizer_pool_type: str = TokenizerPoolConfig.pool_type
tokenizer_pool_extra_config: dict = \
get_field(TokenizerPoolConfig, "extra_config")
limit_mm_per_prompt: dict[str, int] = \
get_field(MultiModalConfig, "limit_per_prompt")
interleave_mm_strings: bool = MultiModalConfig.interleave_mm_strings
@ -751,19 +744,6 @@ class EngineArgs:
cache_group.add_argument("--calculate-kv-scales",
**cache_kwargs["calculate_kv_scales"])
# Tokenizer arguments
tokenizer_kwargs = get_kwargs(TokenizerPoolConfig)
tokenizer_group = parser.add_argument_group(
title="TokenizerPoolConfig",
description=TokenizerPoolConfig.__doc__,
)
tokenizer_group.add_argument("--tokenizer-pool-size",
**tokenizer_kwargs["pool_size"])
tokenizer_group.add_argument("--tokenizer-pool-type",
**tokenizer_kwargs["pool_type"])
tokenizer_group.add_argument("--tokenizer-pool-extra-config",
**tokenizer_kwargs["extra_config"])
# Multimodal related configs
multimodal_kwargs = get_kwargs(MultiModalConfig)
multimodal_group = parser.add_argument_group(