mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-20 18:47:01 +08:00
interface
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
8d46d5d11d
commit
06e22ba44c
69
vllm/v1/core/sched/interface.py
Normal file
69
vllm/v1/core/sched/interface.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from typing import TYPE_CHECKING, Optional, Union
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from vllm.v1.core.sched.output import SchedulerOutput
|
||||||
|
from vllm.v1.engine import EngineCoreOutputs
|
||||||
|
from vllm.v1.metrics.stats import SchedulerStats
|
||||||
|
from vllm.v1.outputs import ModelRunnerOutput
|
||||||
|
from vllm.v1.request import Request, RequestStatus
|
||||||
|
|
||||||
|
|
||||||
|
class SchedulerInterface(ABC):
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def schedule(self) -> "SchedulerOutput":
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def update_from_output(
|
||||||
|
self,
|
||||||
|
scheduler_output: "SchedulerOutput",
|
||||||
|
model_runner_output: "ModelRunnerOutput",
|
||||||
|
) -> "EngineCoreOutputs":
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def add_request(self, request: "Request") -> None:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def finish_requests(
|
||||||
|
self,
|
||||||
|
request_ids: Union[str, Iterable[str]],
|
||||||
|
finished_status: "RequestStatus",
|
||||||
|
) -> None:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_num_unfinished_requests(self) -> int:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def has_unfinished_requests(self) -> bool:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def has_finished_requests(self) -> bool:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def has_requests(self) -> bool:
|
||||||
|
"""Returns True if there are unfinished requests, or finished requests
|
||||||
|
not yet returned in SchedulerOutputs."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_num_unscheduled_requests(self) -> int:
|
||||||
|
"""Number of requests that are not being processed by the executor."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def reset_prefix_cache(self) -> bool:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def make_stats(self) -> Optional["SchedulerStats"]:
|
||||||
|
raise NotImplementedError
|
||||||
@ -13,6 +13,7 @@ from vllm.logger import init_logger
|
|||||||
from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager,
|
from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager,
|
||||||
compute_encoder_budget)
|
compute_encoder_budget)
|
||||||
from vllm.v1.core.kv_cache_manager import KVCacheManager
|
from vllm.v1.core.kv_cache_manager import KVCacheManager
|
||||||
|
from vllm.v1.core.sched.interface import SchedulerInterface
|
||||||
from vllm.v1.core.sched.output import (CachedRequestData, NewRequestData,
|
from vllm.v1.core.sched.output import (CachedRequestData, NewRequestData,
|
||||||
SchedulerOutput)
|
SchedulerOutput)
|
||||||
from vllm.v1.core.sched.utils import check_stop
|
from vllm.v1.core.sched.utils import check_stop
|
||||||
@ -26,7 +27,7 @@ from vllm.v1.structured_output import StructuredOutputManager
|
|||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Scheduler:
|
class Scheduler(SchedulerInterface):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user