reorder funcs

Signed-off-by: Alexander Matveev <amatveev@redhat.com>
This commit is contained in:
Alexander Matveev 2025-01-27 19:45:59 +00:00
parent c715fb19e5
commit 0bddb6b9a5
2 changed files with 39 additions and 39 deletions

View File

@ -79,33 +79,6 @@ class ModelRunnerBase:
assert self.model is not None
return self.model
def execute_model(
self,
scheduler_output: "SchedulerOutput",
) -> ModelRunnerOutput:
raise NotImplementedError()
def load_model(self) -> None:
raise NotImplementedError()
def dummy_run(
self,
kv_caches,
num_tokens: int,
seq_len: Optional[int] = None,
exec_mode: Optional[ExecutionMode] = None,
) -> torch.Tensor:
raise NotImplementedError()
def profile_run(self) -> None:
raise NotImplementedError()
def capture_model(self) -> None:
raise NotImplementedError()
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
raise NotImplementedError()
def get_kv_cache_spec(self) -> KVCacheSpec:
"""
Generates the KVCacheSpec by parsing the kv cache format from each
@ -140,3 +113,30 @@ class ModelRunnerBase:
f"Unknown attention type: {attn_module.attn_type}")
return kv_cache_spec
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
raise NotImplementedError()
def execute_model(
self,
scheduler_output: "SchedulerOutput",
) -> ModelRunnerOutput:
raise NotImplementedError()
def load_model(self) -> None:
raise NotImplementedError()
def dummy_run(
self,
kv_caches,
num_tokens: int,
seq_len: Optional[int] = None,
exec_mode: Optional[ExecutionMode] = None,
) -> torch.Tensor:
raise NotImplementedError()
def profile_run(self) -> None:
raise NotImplementedError()
def capture_model(self) -> None:
raise NotImplementedError()

View File

@ -80,16 +80,10 @@ class WorkerBase:
# Initialized by the specific platform
self.model_runner: Optional[ModelRunnerBase] = None
def init_device(self):
raise NotImplementedError()
def load_model(self) -> None:
assert self.model_runner is not None
self.model_runner.load_model()
def determine_available_memory(self) -> int:
raise NotImplementedError()
def compile_or_warm_up_model(self) -> None:
assert self.model_runner is not None
@ -113,12 +107,6 @@ class WorkerBase:
assert self.model_runner is not None
self.model_runner.initialize_kv_cache(kv_cache_config)
def execute_model(
self,
scheduler_output: "SchedulerOutput",
) -> Optional[ModelRunnerOutput]:
raise NotImplementedError()
def profile(self, is_start: bool = True):
if self.profiler is None:
raise RuntimeError("Profiler is not enabled.")
@ -131,6 +119,18 @@ class WorkerBase:
# worker will always be healthy as long as it's running.
return
def init_device(self):
raise NotImplementedError()
def determine_available_memory(self) -> int:
raise NotImplementedError()
def execute_model(
self,
scheduler_output: "SchedulerOutput",
) -> Optional[ModelRunnerOutput]:
raise NotImplementedError()
def check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
# Check if the GPU supports the dtype.