mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 17:59:48 +08:00
[Core] Make scheduling policy settable via EngineArgs (#8956)
This commit is contained in:
parent
2ae25f79cf
commit
be76e5aabf
@ -2,8 +2,8 @@ import argparse
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
import json
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import (TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple,
|
from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional,
|
||||||
Type, Union)
|
Tuple, Type, Union)
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@ -177,6 +177,7 @@ class EngineArgs:
|
|||||||
disable_async_output_proc: bool = False
|
disable_async_output_proc: bool = False
|
||||||
override_neuron_config: Optional[Dict[str, Any]] = None
|
override_neuron_config: Optional[Dict[str, Any]] = None
|
||||||
mm_processor_kwargs: Optional[Dict[str, Any]] = None
|
mm_processor_kwargs: Optional[Dict[str, Any]] = None
|
||||||
|
scheduling_policy: Literal["fcfs", "priority"] = "fcfs"
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.tokenizer is None:
|
if self.tokenizer is None:
|
||||||
@ -797,6 +798,16 @@ class EngineArgs:
|
|||||||
default=None,
|
default=None,
|
||||||
help="override or set neuron device configuration.")
|
help="override or set neuron device configuration.")
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--scheduling-policy',
|
||||||
|
choices=['fcfs', 'priority'],
|
||||||
|
default="fcfs",
|
||||||
|
help='The scheduling policy to use. "fcfs" (first come first served'
|
||||||
|
', i.e. requests are handled in order of arrival; default) '
|
||||||
|
'or "priority" (requests are handled based on given '
|
||||||
|
'priority (lower value means earlier handling) and time of '
|
||||||
|
'arrival deciding any ties).')
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1011,6 +1022,7 @@ class EngineArgs:
|
|||||||
multi_step_stream_outputs=self.multi_step_stream_outputs,
|
multi_step_stream_outputs=self.multi_step_stream_outputs,
|
||||||
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
|
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
|
||||||
and parallel_config.use_ray),
|
and parallel_config.use_ray),
|
||||||
|
policy=self.scheduling_policy,
|
||||||
)
|
)
|
||||||
lora_config = LoRAConfig(
|
lora_config = LoRAConfig(
|
||||||
max_lora_rank=self.max_lora_rank,
|
max_lora_rank=self.max_lora_rank,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user