[V0 Deprecation] Remove args for multi-step scheduling (#22779)

Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
2026-06-02 07:24:25 +08:00 · 2025-08-12 20:38:18 -07:00 · 2025-08-12 20:38:18 -07:00 · c5830381af
commit c5830381af
parent d31f97cf57
2 changed files with 1 additions and 27 deletions
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@ -161,7 +161,6 @@ def parser_with_config():
    parser.add_argument('--port', type=int)
    parser.add_argument('--tensor-parallel-size', type=int)
    parser.add_argument('--trust-remote-code', action='store_true')
    parser.add_argument('--multi-step-stream-outputs', action=StoreBoolean)
    return parser
--- a/vllm/config/scheduler.py
+++ b/vllm/config/scheduler.py
@ -115,12 +115,6 @@ class SchedulerConfig:
    (e.g., beam search), recomputation is not currently supported. In
    such a case, we use swapping instead."""
    num_scheduler_steps: int = 1
    """Maximum number of forward steps per scheduler call."""
    multi_step_stream_outputs: bool = True
    """If False, then multi-step will stream outputs at the end of all steps"""
    send_delta_data: bool = False
    """Private API. If used, scheduler sends delta data to
    workers instead of an entire data. It should be enabled only
@ -193,16 +187,7 @@ class SchedulerConfig:
        if self.max_num_batched_tokens is None:
            if self.enable_chunked_prefill:
-                if self.num_scheduler_steps > 1:
+                self.max_num_batched_tokens = DEFAULT_MAX_NUM_BATCHED_TOKENS
                    # Multi-step Chunked-Prefill doesn't allow prompt-chunking
                    # for now. Have max_num_batched_tokens set to max_model_len
                    # so we don't reject sequences on account of a short
                    # max_num_batched_tokens.
                    self.max_num_batched_tokens = max(
                        self.max_model_len, DEFAULT_MAX_NUM_BATCHED_TOKENS)
                else:
                    self.max_num_batched_tokens = (
                        DEFAULT_MAX_NUM_BATCHED_TOKENS)
            else:
                # If max_model_len is too short, use
                # DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
@ -293,12 +278,6 @@ class SchedulerConfig:
                f"({self.num_lookahead_slots}) must be greater than or "
                "equal to 0.")
        if self.num_scheduler_steps < 1:
            raise ValueError(
                "num_scheduler_steps "
                f"({self.num_scheduler_steps}) must be greater than or "
                "equal to 1.")
        if self.max_num_partial_prefills < 1:
            raise ValueError(
                f"max_num_partial_prefills ({self.max_num_partial_prefills}) "
@ -323,7 +302,3 @@ class SchedulerConfig:
                f"max_num_partial_prefills ({self.max_num_partial_prefills}).")
        return self
    @property
    def is_multi_step(self) -> bool:
        return self.num_scheduler_steps > 1