diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index e02a22093b29b..9e7e44d068612 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -328,7 +328,6 @@ class Worker(WorkerBase): sort_by="self_cuda_time_total")) def execute_dummy_batch(self) -> None: - # TODO: adding allow_microbatching will break non-gpu backends self.model_runner._dummy_run(1) def add_lora(self, lora_request: LoRARequest) -> bool: