From 83d11373a434d276ee206101beb24c36e7304646 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Tue, 16 Sep 2025 00:58:12 +0000 Subject: [PATCH] wip Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu/model_runner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu/model_runner.py b/vllm/v1/worker/gpu/model_runner.py index 4ba18fdfe1c62..5778a27da2a39 100644 --- a/vllm/v1/worker/gpu/model_runner.py +++ b/vllm/v1/worker/gpu/model_runner.py @@ -78,7 +78,7 @@ class GPUModelRunner: ) self.sampler = Sampler() - def load_model(self, eep_scale_up: bool = False) -> None: + def load_model(self, *args, **kwargs) -> None: time_before_load = time.perf_counter() with DeviceMemoryProfiler() as m: model_loader = get_model_loader(self.vllm_config.load_config) @@ -131,6 +131,12 @@ class GPUModelRunner: self.kv_caches, ) + def _dummy_run(self, num_tokens: int, *args, **kwargs) -> None: + return None, None + + def _dummy_sampler_run(self, hidden_states: torch.Tensor, *args, **kwargs) -> None: + return None + def update_states(self, scheduler_output: SchedulerOutput) -> None: for req_id in scheduler_output.preempted_req_ids: self.req_states.remove_request(req_id)