mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:15:51 +08:00
[V0 Deprecation] Remove args for multi-step scheduling (#22779)
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
parent
d31f97cf57
commit
c5830381af
@ -161,7 +161,6 @@ def parser_with_config():
|
|||||||
parser.add_argument('--port', type=int)
|
parser.add_argument('--port', type=int)
|
||||||
parser.add_argument('--tensor-parallel-size', type=int)
|
parser.add_argument('--tensor-parallel-size', type=int)
|
||||||
parser.add_argument('--trust-remote-code', action='store_true')
|
parser.add_argument('--trust-remote-code', action='store_true')
|
||||||
parser.add_argument('--multi-step-stream-outputs', action=StoreBoolean)
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -115,12 +115,6 @@ class SchedulerConfig:
|
|||||||
(e.g., beam search), recomputation is not currently supported. In
|
(e.g., beam search), recomputation is not currently supported. In
|
||||||
such a case, we use swapping instead."""
|
such a case, we use swapping instead."""
|
||||||
|
|
||||||
num_scheduler_steps: int = 1
|
|
||||||
"""Maximum number of forward steps per scheduler call."""
|
|
||||||
|
|
||||||
multi_step_stream_outputs: bool = True
|
|
||||||
"""If False, then multi-step will stream outputs at the end of all steps"""
|
|
||||||
|
|
||||||
send_delta_data: bool = False
|
send_delta_data: bool = False
|
||||||
"""Private API. If used, scheduler sends delta data to
|
"""Private API. If used, scheduler sends delta data to
|
||||||
workers instead of an entire data. It should be enabled only
|
workers instead of an entire data. It should be enabled only
|
||||||
@ -193,16 +187,7 @@ class SchedulerConfig:
|
|||||||
|
|
||||||
if self.max_num_batched_tokens is None:
|
if self.max_num_batched_tokens is None:
|
||||||
if self.enable_chunked_prefill:
|
if self.enable_chunked_prefill:
|
||||||
if self.num_scheduler_steps > 1:
|
self.max_num_batched_tokens = DEFAULT_MAX_NUM_BATCHED_TOKENS
|
||||||
# Multi-step Chunked-Prefill doesn't allow prompt-chunking
|
|
||||||
# for now. Have max_num_batched_tokens set to max_model_len
|
|
||||||
# so we don't reject sequences on account of a short
|
|
||||||
# max_num_batched_tokens.
|
|
||||||
self.max_num_batched_tokens = max(
|
|
||||||
self.max_model_len, DEFAULT_MAX_NUM_BATCHED_TOKENS)
|
|
||||||
else:
|
|
||||||
self.max_num_batched_tokens = (
|
|
||||||
DEFAULT_MAX_NUM_BATCHED_TOKENS)
|
|
||||||
else:
|
else:
|
||||||
# If max_model_len is too short, use
|
# If max_model_len is too short, use
|
||||||
# DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
|
# DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
|
||||||
@ -293,12 +278,6 @@ class SchedulerConfig:
|
|||||||
f"({self.num_lookahead_slots}) must be greater than or "
|
f"({self.num_lookahead_slots}) must be greater than or "
|
||||||
"equal to 0.")
|
"equal to 0.")
|
||||||
|
|
||||||
if self.num_scheduler_steps < 1:
|
|
||||||
raise ValueError(
|
|
||||||
"num_scheduler_steps "
|
|
||||||
f"({self.num_scheduler_steps}) must be greater than or "
|
|
||||||
"equal to 1.")
|
|
||||||
|
|
||||||
if self.max_num_partial_prefills < 1:
|
if self.max_num_partial_prefills < 1:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"max_num_partial_prefills ({self.max_num_partial_prefills}) "
|
f"max_num_partial_prefills ({self.max_num_partial_prefills}) "
|
||||||
@ -323,7 +302,3 @@ class SchedulerConfig:
|
|||||||
f"max_num_partial_prefills ({self.max_num_partial_prefills}).")
|
f"max_num_partial_prefills ({self.max_num_partial_prefills}).")
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
|
||||||
def is_multi_step(self) -> bool:
|
|
||||||
return self.num_scheduler_steps > 1
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user