diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 23d21c0fd45ca..bb0ecf38a7468 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -21,11 +21,7 @@ logger = init_logger(__name__) RunnerType = Literal["generate", "pooling", "draft"] SchedulerPolicy = Literal["fcfs", "priority", "sjf"] -""" SJF Scheduling Policy: -It stands for shortest-job-first — requests are scheduled by total prompt + -output length (shorter first), with aging to prevent starvation. For more -information, please check: https://github.com/vllm-project/vllm/issues/29406 -""" + @config @dataclass @@ -109,7 +105,9 @@ class SchedulerConfig: - "fcfs" means first come first served, i.e. requests are handled in order of arrival.\n - "priority" means requests are handled based on given priority (lower - value means earlier handling) and time of arrival deciding any ties).""" + value means earlier handling) and time of arrival deciding any ties).\n + - "sjf" means shortest job first. Requests are scheduled by prompt length + (shortest first), with aging to prevent starvation.""" disable_chunked_mm_input: bool = False """If set to true and chunked prefill is enabled, we do not want to