mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:45:00 +08:00
[Core] Make scheduling policy settable via EngineArgs (#8956)
This commit is contained in:
parent
2ae25f79cf
commit
be76e5aabf
@ -2,8 +2,8 @@ import argparse
|
||||
import dataclasses
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import (TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple,
|
||||
Type, Union)
|
||||
from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional,
|
||||
Tuple, Type, Union)
|
||||
|
||||
import torch
|
||||
|
||||
@ -177,6 +177,7 @@ class EngineArgs:
|
||||
disable_async_output_proc: bool = False
|
||||
override_neuron_config: Optional[Dict[str, Any]] = None
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = None
|
||||
scheduling_policy: Literal["fcfs", "priority"] = "fcfs"
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tokenizer is None:
|
||||
@ -797,6 +798,16 @@ class EngineArgs:
|
||||
default=None,
|
||||
help="override or set neuron device configuration.")
|
||||
|
||||
parser.add_argument(
|
||||
'--scheduling-policy',
|
||||
choices=['fcfs', 'priority'],
|
||||
default="fcfs",
|
||||
help='The scheduling policy to use. "fcfs" (first come first served'
|
||||
', i.e. requests are handled in order of arrival; default) '
|
||||
'or "priority" (requests are handled based on given '
|
||||
'priority (lower value means earlier handling) and time of '
|
||||
'arrival deciding any ties).')
|
||||
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
@ -1011,6 +1022,7 @@ class EngineArgs:
|
||||
multi_step_stream_outputs=self.multi_step_stream_outputs,
|
||||
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
|
||||
and parallel_config.use_ray),
|
||||
policy=self.scheduling_policy,
|
||||
)
|
||||
lora_config = LoRAConfig(
|
||||
max_lora_rank=self.max_lora_rank,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user