diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 8c75e8914857b..c9a7d2009ebe2 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -488,7 +488,7 @@ class Worker(WorkerBase): sort_by="self_cuda_time_total")) def execute_dummy_batch(self) -> None: - self.model_runner._dummy_run(1, uniform_decode=True) + self.model_runner._dummy_run(16, uniform_decode=True) def add_lora(self, lora_request: LoRARequest) -> bool: return self.model_runner.add_lora(lora_request)