Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
Woosuk Kwon 2025-09-16 00:58:12 +00:00
parent dfc84b11a9
commit 83d11373a4

View File

@ -78,7 +78,7 @@ class GPUModelRunner:
)
self.sampler = Sampler()
def load_model(self, eep_scale_up: bool = False) -> None:
def load_model(self, *args, **kwargs) -> None:
time_before_load = time.perf_counter()
with DeviceMemoryProfiler() as m:
model_loader = get_model_loader(self.vllm_config.load_config)
@ -131,6 +131,12 @@ class GPUModelRunner:
self.kv_caches,
)
def _dummy_run(self, num_tokens: int, *args, **kwargs) -> None:
return None, None
def _dummy_sampler_run(self, hidden_states: torch.Tensor, *args, **kwargs) -> None:
return None
def update_states(self, scheduler_output: SchedulerOutput) -> None:
for req_id in scheduler_output.preempted_req_ids:
self.req_states.remove_request(req_id)