mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-15 12:37:05 +08:00
wip
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
parent
dfc84b11a9
commit
83d11373a4
@ -78,7 +78,7 @@ class GPUModelRunner:
|
||||
)
|
||||
self.sampler = Sampler()
|
||||
|
||||
def load_model(self, eep_scale_up: bool = False) -> None:
|
||||
def load_model(self, *args, **kwargs) -> None:
|
||||
time_before_load = time.perf_counter()
|
||||
with DeviceMemoryProfiler() as m:
|
||||
model_loader = get_model_loader(self.vllm_config.load_config)
|
||||
@ -131,6 +131,12 @@ class GPUModelRunner:
|
||||
self.kv_caches,
|
||||
)
|
||||
|
||||
def _dummy_run(self, num_tokens: int, *args, **kwargs) -> None:
|
||||
return None, None
|
||||
|
||||
def _dummy_sampler_run(self, hidden_states: torch.Tensor, *args, **kwargs) -> None:
|
||||
return None
|
||||
|
||||
def update_states(self, scheduler_output: SchedulerOutput) -> None:
|
||||
for req_id in scheduler_output.preempted_req_ids:
|
||||
self.req_states.remove_request(req_id)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user