mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 13:55:01 +08:00
[Misc] Remove deprecated args in v0.10 (#21349)
Signed-off-by: Kebe <mail@kebe7jun.com>
This commit is contained in:
parent
32142b3c62
commit
bc8a8ce5ec
@ -37,7 +37,6 @@ def initialize_llm():
|
|||||||
max_num_seqs=4,
|
max_num_seqs=4,
|
||||||
max_model_len=2048,
|
max_model_len=2048,
|
||||||
block_size=2048,
|
block_size=2048,
|
||||||
use_v2_block_manager=True,
|
|
||||||
device="neuron",
|
device="neuron",
|
||||||
tensor_parallel_size=32,
|
tensor_parallel_size=32,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -9,7 +9,6 @@ def test_mistral():
|
|||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
max_num_seqs=4,
|
max_num_seqs=4,
|
||||||
max_model_len=128,
|
max_model_len=128,
|
||||||
use_v2_block_manager=True,
|
|
||||||
override_neuron_config={
|
override_neuron_config={
|
||||||
"sequence_parallel_enabled": False,
|
"sequence_parallel_enabled": False,
|
||||||
"skip_warmup": True
|
"skip_warmup": True
|
||||||
|
|||||||
@ -14,7 +14,6 @@ def test_llama_single_lora():
|
|||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
max_num_seqs=4,
|
max_num_seqs=4,
|
||||||
max_model_len=512,
|
max_model_len=512,
|
||||||
use_v2_block_manager=True,
|
|
||||||
override_neuron_config={
|
override_neuron_config={
|
||||||
"sequence_parallel_enabled": False,
|
"sequence_parallel_enabled": False,
|
||||||
"skip_warmup": True,
|
"skip_warmup": True,
|
||||||
@ -57,7 +56,6 @@ def test_llama_multiple_lora():
|
|||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
max_num_seqs=4,
|
max_num_seqs=4,
|
||||||
max_model_len=512,
|
max_model_len=512,
|
||||||
use_v2_block_manager=True,
|
|
||||||
override_neuron_config={
|
override_neuron_config={
|
||||||
"sequence_parallel_enabled":
|
"sequence_parallel_enabled":
|
||||||
False,
|
False,
|
||||||
|
|||||||
@ -313,7 +313,6 @@ class EngineArgs:
|
|||||||
CacheConfig.prefix_caching_hash_algo
|
CacheConfig.prefix_caching_hash_algo
|
||||||
disable_sliding_window: bool = ModelConfig.disable_sliding_window
|
disable_sliding_window: bool = ModelConfig.disable_sliding_window
|
||||||
disable_cascade_attn: bool = ModelConfig.disable_cascade_attn
|
disable_cascade_attn: bool = ModelConfig.disable_cascade_attn
|
||||||
use_v2_block_manager: bool = True
|
|
||||||
swap_space: float = CacheConfig.swap_space
|
swap_space: float = CacheConfig.swap_space
|
||||||
cpu_offload_gb: float = CacheConfig.cpu_offload_gb
|
cpu_offload_gb: float = CacheConfig.cpu_offload_gb
|
||||||
gpu_memory_utilization: float = CacheConfig.gpu_memory_utilization
|
gpu_memory_utilization: float = CacheConfig.gpu_memory_utilization
|
||||||
@ -364,7 +363,6 @@ class EngineArgs:
|
|||||||
max_prompt_adapter_token: int = \
|
max_prompt_adapter_token: int = \
|
||||||
PromptAdapterConfig.max_prompt_adapter_token
|
PromptAdapterConfig.max_prompt_adapter_token
|
||||||
|
|
||||||
device: Device = DeviceConfig.device
|
|
||||||
num_scheduler_steps: int = SchedulerConfig.num_scheduler_steps
|
num_scheduler_steps: int = SchedulerConfig.num_scheduler_steps
|
||||||
multi_step_stream_outputs: bool = SchedulerConfig.multi_step_stream_outputs
|
multi_step_stream_outputs: bool = SchedulerConfig.multi_step_stream_outputs
|
||||||
ray_workers_use_nsight: bool = ParallelConfig.ray_workers_use_nsight
|
ray_workers_use_nsight: bool = ParallelConfig.ray_workers_use_nsight
|
||||||
@ -745,16 +743,6 @@ class EngineArgs:
|
|||||||
"--max-prompt-adapter-token",
|
"--max-prompt-adapter-token",
|
||||||
**prompt_adapter_kwargs["max_prompt_adapter_token"])
|
**prompt_adapter_kwargs["max_prompt_adapter_token"])
|
||||||
|
|
||||||
# Device arguments
|
|
||||||
device_kwargs = get_kwargs(DeviceConfig)
|
|
||||||
device_group = parser.add_argument_group(
|
|
||||||
title="DeviceConfig",
|
|
||||||
description=DeviceConfig.__doc__,
|
|
||||||
)
|
|
||||||
device_group.add_argument("--device",
|
|
||||||
**device_kwargs["device"],
|
|
||||||
deprecated=True)
|
|
||||||
|
|
||||||
# Speculative arguments
|
# Speculative arguments
|
||||||
speculative_group = parser.add_argument_group(
|
speculative_group = parser.add_argument_group(
|
||||||
title="SpeculativeConfig",
|
title="SpeculativeConfig",
|
||||||
@ -856,15 +844,6 @@ class EngineArgs:
|
|||||||
**vllm_kwargs["additional_config"])
|
**vllm_kwargs["additional_config"])
|
||||||
|
|
||||||
# Other arguments
|
# Other arguments
|
||||||
parser.add_argument('--use-v2-block-manager',
|
|
||||||
action='store_true',
|
|
||||||
default=True,
|
|
||||||
deprecated=True,
|
|
||||||
help='[DEPRECATED] block manager v1 has been '
|
|
||||||
'removed and SelfAttnBlockSpaceManager (i.e. '
|
|
||||||
'block manager v2) is now the default. '
|
|
||||||
'Setting this flag to True or False'
|
|
||||||
' has no effect on vLLM behavior.')
|
|
||||||
parser.add_argument('--disable-log-stats',
|
parser.add_argument('--disable-log-stats',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Disable logging statistics.')
|
help='Disable logging statistics.')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user