mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 23:05:02 +08:00
[Deprecation] Remove TokenizerPoolConfig (#20968)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
d91278181d
commit
56fe4bedd6
@ -8,7 +8,6 @@ API documentation for vLLM's configuration classes.
|
|||||||
|
|
||||||
- [vllm.config.ModelConfig][]
|
- [vllm.config.ModelConfig][]
|
||||||
- [vllm.config.CacheConfig][]
|
- [vllm.config.CacheConfig][]
|
||||||
- [vllm.config.TokenizerPoolConfig][]
|
|
||||||
- [vllm.config.LoadConfig][]
|
- [vllm.config.LoadConfig][]
|
||||||
- [vllm.config.ParallelConfig][]
|
- [vllm.config.ParallelConfig][]
|
||||||
- [vllm.config.SchedulerConfig][]
|
- [vllm.config.SchedulerConfig][]
|
||||||
|
|||||||
@ -29,7 +29,7 @@ def _query_server_long(prompt: str) -> dict:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
|
def api_server(distributed_executor_backend: str):
|
||||||
script_path = Path(__file__).parent.joinpath(
|
script_path = Path(__file__).parent.joinpath(
|
||||||
"api_server_async_engine.py").absolute()
|
"api_server_async_engine.py").absolute()
|
||||||
commands = [
|
commands = [
|
||||||
@ -40,8 +40,6 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
|
|||||||
"facebook/opt-125m",
|
"facebook/opt-125m",
|
||||||
"--host",
|
"--host",
|
||||||
"127.0.0.1",
|
"127.0.0.1",
|
||||||
"--tokenizer-pool-size",
|
|
||||||
str(tokenizer_pool_size),
|
|
||||||
"--distributed-executor-backend",
|
"--distributed-executor-backend",
|
||||||
distributed_executor_backend,
|
distributed_executor_backend,
|
||||||
]
|
]
|
||||||
@ -54,10 +52,8 @@ def api_server(tokenizer_pool_size: int, distributed_executor_backend: str):
|
|||||||
uvicorn_process.terminate()
|
uvicorn_process.terminate()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("tokenizer_pool_size", [0, 2])
|
|
||||||
@pytest.mark.parametrize("distributed_executor_backend", ["mp", "ray"])
|
@pytest.mark.parametrize("distributed_executor_backend", ["mp", "ray"])
|
||||||
def test_api_server(api_server, tokenizer_pool_size: int,
|
def test_api_server(api_server, distributed_executor_backend: str):
|
||||||
distributed_executor_backend: str):
|
|
||||||
"""
|
"""
|
||||||
Run the API server and test it.
|
Run the API server and test it.
|
||||||
|
|
||||||
|
|||||||
@ -1730,35 +1730,6 @@ class CacheConfig:
|
|||||||
logger.warning("Possibly too large swap space. %s", msg)
|
logger.warning("Possibly too large swap space. %s", msg)
|
||||||
|
|
||||||
|
|
||||||
@config
|
|
||||||
@dataclass
|
|
||||||
class TokenizerPoolConfig:
|
|
||||||
"""This config is deprecated and will be removed in a future release.
|
|
||||||
|
|
||||||
Passing these parameters will have no effect. Please remove them from your
|
|
||||||
configurations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
pool_size: int = 0
|
|
||||||
"""This parameter is deprecated and will be removed in a future release.
|
|
||||||
Passing this parameter will have no effect. Please remove it from your
|
|
||||||
configurations."""
|
|
||||||
pool_type: str = "ray"
|
|
||||||
"""This parameter is deprecated and will be removed in a future release.
|
|
||||||
Passing this parameter will have no effect. Please remove it from your
|
|
||||||
configurations."""
|
|
||||||
extra_config: dict = field(default_factory=dict)
|
|
||||||
"""This parameter is deprecated and will be removed in a future release.
|
|
||||||
Passing this parameter will have no effect. Please remove it from your
|
|
||||||
configurations."""
|
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
|
||||||
logger.warning_once(
|
|
||||||
"TokenizerPoolConfig is deprecated and will be removed in a "
|
|
||||||
"future release. Passing this parameter will have no effect. "
|
|
||||||
"Please remove it from your configurations.")
|
|
||||||
|
|
||||||
|
|
||||||
class LoadFormat(str, enum.Enum):
|
class LoadFormat(str, enum.Enum):
|
||||||
AUTO = "auto"
|
AUTO = "auto"
|
||||||
PT = "pt"
|
PT = "pt"
|
||||||
@ -1922,10 +1893,6 @@ class ParallelConfig:
|
|||||||
disable_custom_all_reduce: bool = False
|
disable_custom_all_reduce: bool = False
|
||||||
"""Disable the custom all-reduce kernel and fall back to NCCL."""
|
"""Disable the custom all-reduce kernel and fall back to NCCL."""
|
||||||
|
|
||||||
tokenizer_pool_config: Optional[TokenizerPoolConfig] = None
|
|
||||||
"""This parameter is deprecated and will be removed in a future release.
|
|
||||||
Please remove it from your configs"""
|
|
||||||
|
|
||||||
ray_workers_use_nsight: bool = False
|
ray_workers_use_nsight: bool = False
|
||||||
"""Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler."""
|
"""Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler."""
|
||||||
|
|
||||||
|
|||||||
@ -32,8 +32,8 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
|||||||
ObservabilityConfig, ParallelConfig, PoolerConfig,
|
ObservabilityConfig, ParallelConfig, PoolerConfig,
|
||||||
PrefixCachingHashAlgo, PromptAdapterConfig,
|
PrefixCachingHashAlgo, PromptAdapterConfig,
|
||||||
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
|
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
|
||||||
TaskOption, TokenizerMode, TokenizerPoolConfig,
|
TaskOption, TokenizerMode, VllmConfig, get_attr_docs,
|
||||||
VllmConfig, get_attr_docs, get_field)
|
get_field)
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.platforms import CpuArchEnum, current_platform
|
from vllm.platforms import CpuArchEnum, current_platform
|
||||||
from vllm.plugins import load_general_plugins
|
from vllm.plugins import load_general_plugins
|
||||||
@ -373,13 +373,6 @@ class EngineArgs:
|
|||||||
enforce_eager: bool = ModelConfig.enforce_eager
|
enforce_eager: bool = ModelConfig.enforce_eager
|
||||||
max_seq_len_to_capture: int = ModelConfig.max_seq_len_to_capture
|
max_seq_len_to_capture: int = ModelConfig.max_seq_len_to_capture
|
||||||
disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce
|
disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce
|
||||||
# The following three fields are deprecated and will be removed in a future
|
|
||||||
# release. Setting them will have no effect. Please remove them from your
|
|
||||||
# configurations.
|
|
||||||
tokenizer_pool_size: int = TokenizerPoolConfig.pool_size
|
|
||||||
tokenizer_pool_type: str = TokenizerPoolConfig.pool_type
|
|
||||||
tokenizer_pool_extra_config: dict = \
|
|
||||||
get_field(TokenizerPoolConfig, "extra_config")
|
|
||||||
limit_mm_per_prompt: dict[str, int] = \
|
limit_mm_per_prompt: dict[str, int] = \
|
||||||
get_field(MultiModalConfig, "limit_per_prompt")
|
get_field(MultiModalConfig, "limit_per_prompt")
|
||||||
interleave_mm_strings: bool = MultiModalConfig.interleave_mm_strings
|
interleave_mm_strings: bool = MultiModalConfig.interleave_mm_strings
|
||||||
@ -751,19 +744,6 @@ class EngineArgs:
|
|||||||
cache_group.add_argument("--calculate-kv-scales",
|
cache_group.add_argument("--calculate-kv-scales",
|
||||||
**cache_kwargs["calculate_kv_scales"])
|
**cache_kwargs["calculate_kv_scales"])
|
||||||
|
|
||||||
# Tokenizer arguments
|
|
||||||
tokenizer_kwargs = get_kwargs(TokenizerPoolConfig)
|
|
||||||
tokenizer_group = parser.add_argument_group(
|
|
||||||
title="TokenizerPoolConfig",
|
|
||||||
description=TokenizerPoolConfig.__doc__,
|
|
||||||
)
|
|
||||||
tokenizer_group.add_argument("--tokenizer-pool-size",
|
|
||||||
**tokenizer_kwargs["pool_size"])
|
|
||||||
tokenizer_group.add_argument("--tokenizer-pool-type",
|
|
||||||
**tokenizer_kwargs["pool_type"])
|
|
||||||
tokenizer_group.add_argument("--tokenizer-pool-extra-config",
|
|
||||||
**tokenizer_kwargs["extra_config"])
|
|
||||||
|
|
||||||
# Multimodal related configs
|
# Multimodal related configs
|
||||||
multimodal_kwargs = get_kwargs(MultiModalConfig)
|
multimodal_kwargs = get_kwargs(MultiModalConfig)
|
||||||
multimodal_group = parser.add_argument_group(
|
multimodal_group = parser.add_argument_group(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user