From 5323969fcfd5c88817e67cb8136af10ed5c70f6a Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 24 Apr 2024 08:56:58 +0000 Subject: [PATCH] Increase #blocks --- vllm/worker/tpu_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/tpu_worker.py b/vllm/worker/tpu_worker.py index 0bd1b8437da24..d69acb4ac16bf 100644 --- a/vllm/worker/tpu_worker.py +++ b/vllm/worker/tpu_worker.py @@ -64,7 +64,7 @@ class TPUWorker(LoraNotSupportedWorkerBase): self.model_runner.load_model() def determine_num_available_blocks(self) -> Tuple[int, int]: - num_tpu_blocks = 100 + num_tpu_blocks = 1000 return num_tpu_blocks, 0 def initialize_cache(