mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 15:47:22 +08:00
reorder funcs
Signed-off-by: Alexander Matveev <amatveev@redhat.com>
This commit is contained in:
parent
c715fb19e5
commit
0bddb6b9a5
@ -79,33 +79,6 @@ class ModelRunnerBase:
|
||||
assert self.model is not None
|
||||
return self.model
|
||||
|
||||
def execute_model(
|
||||
self,
|
||||
scheduler_output: "SchedulerOutput",
|
||||
) -> ModelRunnerOutput:
|
||||
raise NotImplementedError()
|
||||
|
||||
def load_model(self) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def dummy_run(
|
||||
self,
|
||||
kv_caches,
|
||||
num_tokens: int,
|
||||
seq_len: Optional[int] = None,
|
||||
exec_mode: Optional[ExecutionMode] = None,
|
||||
) -> torch.Tensor:
|
||||
raise NotImplementedError()
|
||||
|
||||
def profile_run(self) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def capture_model(self) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_kv_cache_spec(self) -> KVCacheSpec:
|
||||
"""
|
||||
Generates the KVCacheSpec by parsing the kv cache format from each
|
||||
@ -140,3 +113,30 @@ class ModelRunnerBase:
|
||||
f"Unknown attention type: {attn_module.attn_type}")
|
||||
|
||||
return kv_cache_spec
|
||||
|
||||
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def execute_model(
|
||||
self,
|
||||
scheduler_output: "SchedulerOutput",
|
||||
) -> ModelRunnerOutput:
|
||||
raise NotImplementedError()
|
||||
|
||||
def load_model(self) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def dummy_run(
|
||||
self,
|
||||
kv_caches,
|
||||
num_tokens: int,
|
||||
seq_len: Optional[int] = None,
|
||||
exec_mode: Optional[ExecutionMode] = None,
|
||||
) -> torch.Tensor:
|
||||
raise NotImplementedError()
|
||||
|
||||
def profile_run(self) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def capture_model(self) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
@ -80,16 +80,10 @@ class WorkerBase:
|
||||
# Initialized by the specific platform
|
||||
self.model_runner: Optional[ModelRunnerBase] = None
|
||||
|
||||
def init_device(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def load_model(self) -> None:
|
||||
assert self.model_runner is not None
|
||||
self.model_runner.load_model()
|
||||
|
||||
def determine_available_memory(self) -> int:
|
||||
raise NotImplementedError()
|
||||
|
||||
def compile_or_warm_up_model(self) -> None:
|
||||
assert self.model_runner is not None
|
||||
|
||||
@ -113,12 +107,6 @@ class WorkerBase:
|
||||
assert self.model_runner is not None
|
||||
self.model_runner.initialize_kv_cache(kv_cache_config)
|
||||
|
||||
def execute_model(
|
||||
self,
|
||||
scheduler_output: "SchedulerOutput",
|
||||
) -> Optional[ModelRunnerOutput]:
|
||||
raise NotImplementedError()
|
||||
|
||||
def profile(self, is_start: bool = True):
|
||||
if self.profiler is None:
|
||||
raise RuntimeError("Profiler is not enabled.")
|
||||
@ -131,6 +119,18 @@ class WorkerBase:
|
||||
# worker will always be healthy as long as it's running.
|
||||
return
|
||||
|
||||
def init_device(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def determine_available_memory(self) -> int:
|
||||
raise NotImplementedError()
|
||||
|
||||
def execute_model(
|
||||
self,
|
||||
scheduler_output: "SchedulerOutput",
|
||||
) -> Optional[ModelRunnerOutput]:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
|
||||
# Check if the GPU supports the dtype.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user