mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:44:57 +08:00
[Deprecation] Remove TokenizerPoolConfig (#20968)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
d91278181d
commit
56fe4bedd6
@ -8,7 +8,6 @@ API documentation for vLLM's configuration classes.
|
||||
|
||||
- [vllm.config.ModelConfig][]
|
||||
- [vllm.config.CacheConfig][]
|
||||
- [vllm.config.TokenizerPoolConfig][]
|
||||
- [vllm.config.LoadConfig][]
|
||||
- [vllm.config.ParallelConfig][]
|
||||
- [vllm.config.SchedulerConfig][]
|
||||
|
||||
@ -29,7 +29,7 @@ def _query_server_long(prompt: str) -> dict:
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
|
||||
def api_server(distributed_executor_backend: str):
|
||||
script_path = Path(__file__).parent.joinpath(
|
||||
"api_server_async_engine.py").absolute()
|
||||
commands = [
|
||||
@ -40,8 +40,6 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
|
||||
"facebook/opt-125m",
|
||||
"--host",
|
||||
"127.0.0.1",
|
||||
"--tokenizer-pool-size",
|
||||
str(tokenizer_pool_size),
|
||||
"--distributed-executor-backend",
|
||||
distributed_executor_backend,
|
||||
]
|
||||
@ -54,10 +52,8 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
|
||||
uvicorn_process.terminate()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tokenizer_pool_size", [0, 2])
|
||||
@pytest.mark.parametrize("distributed_executor_backend", ["mp", "ray"])
|
||||
def test_api_server(api_server, tokenizer_pool_size: int,
|
||||
distributed_executor_backend: str):
|
||||
def test_api_server(api_server, distributed_executor_backend: str):
|
||||
"""
|
||||
Run the API server and test it.
|
||||
|
||||
|
||||
@ -1730,35 +1730,6 @@ class CacheConfig:
|
||||
logger.warning("Possibly too large swap space. %s", msg)
|
||||
|
||||
|
||||
@config
|
||||
@dataclass
|
||||
class TokenizerPoolConfig:
|
||||
"""This config is deprecated and will be removed in a future release.
|
||||
|
||||
Passing these parameters will have no effect. Please remove them from your
|
||||
configurations.
|
||||
"""
|
||||
|
||||
pool_size: int = 0
|
||||
"""This parameter is deprecated and will be removed in a future release.
|
||||
Passing this parameter will have no effect. Please remove it from your
|
||||
configurations."""
|
||||
pool_type: str = "ray"
|
||||
"""This parameter is deprecated and will be removed in a future release.
|
||||
Passing this parameter will have no effect. Please remove it from your
|
||||
configurations."""
|
||||
extra_config: dict = field(default_factory=dict)
|
||||
"""This parameter is deprecated and will be removed in a future release.
|
||||
Passing this parameter will have no effect. Please remove it from your
|
||||
configurations."""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
logger.warning_once(
|
||||
"TokenizerPoolConfig is deprecated and will be removed in a "
|
||||
"future release. Passing this parameter will have no effect. "
|
||||
"Please remove it from your configurations.")
|
||||
|
||||
|
||||
class LoadFormat(str, enum.Enum):
|
||||
AUTO = "auto"
|
||||
PT = "pt"
|
||||
@ -1922,10 +1893,6 @@ class ParallelConfig:
|
||||
disable_custom_all_reduce: bool = False
|
||||
"""Disable the custom all-reduce kernel and fall back to NCCL."""
|
||||
|
||||
tokenizer_pool_config: Optional[TokenizerPoolConfig] = None
|
||||
"""This parameter is deprecated and will be removed in a future release.
|
||||
Please remove it from your configs"""
|
||||
|
||||
ray_workers_use_nsight: bool = False
|
||||
"""Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler."""
|
||||
|
||||
|
||||
@ -32,8 +32,8 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
||||
ObservabilityConfig, ParallelConfig, PoolerConfig,
|
||||
PrefixCachingHashAlgo, PromptAdapterConfig,
|
||||
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
|
||||
TaskOption, TokenizerMode, TokenizerPoolConfig,
|
||||
VllmConfig, get_attr_docs, get_field)
|
||||
TaskOption, TokenizerMode, VllmConfig, get_attr_docs,
|
||||
get_field)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
from vllm.plugins import load_general_plugins
|
||||
@ -373,13 +373,6 @@ class EngineArgs:
|
||||
enforce_eager: bool = ModelConfig.enforce_eager
|
||||
max_seq_len_to_capture: int = ModelConfig.max_seq_len_to_capture
|
||||
disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce
|
||||
# The following three fields are deprecated and will be removed in a future
|
||||
# release. Setting them will have no effect. Please remove them from your
|
||||
# configurations.
|
||||
tokenizer_pool_size: int = TokenizerPoolConfig.pool_size
|
||||
tokenizer_pool_type: str = TokenizerPoolConfig.pool_type
|
||||
tokenizer_pool_extra_config: dict = \
|
||||
get_field(TokenizerPoolConfig, "extra_config")
|
||||
limit_mm_per_prompt: dict[str, int] = \
|
||||
get_field(MultiModalConfig, "limit_per_prompt")
|
||||
interleave_mm_strings: bool = MultiModalConfig.interleave_mm_strings
|
||||
@ -751,19 +744,6 @@ class EngineArgs:
|
||||
cache_group.add_argument("--calculate-kv-scales",
|
||||
**cache_kwargs["calculate_kv_scales"])
|
||||
|
||||
# Tokenizer arguments
|
||||
tokenizer_kwargs = get_kwargs(TokenizerPoolConfig)
|
||||
tokenizer_group = parser.add_argument_group(
|
||||
title="TokenizerPoolConfig",
|
||||
description=TokenizerPoolConfig.__doc__,
|
||||
)
|
||||
tokenizer_group.add_argument("--tokenizer-pool-size",
|
||||
**tokenizer_kwargs["pool_size"])
|
||||
tokenizer_group.add_argument("--tokenizer-pool-type",
|
||||
**tokenizer_kwargs["pool_type"])
|
||||
tokenizer_group.add_argument("--tokenizer-pool-extra-config",
|
||||
**tokenizer_kwargs["extra_config"])
|
||||
|
||||
# Multimodal related configs
|
||||
multimodal_kwargs = get_kwargs(MultiModalConfig)
|
||||
multimodal_group = parser.add_argument_group(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user