diff --git a/vllm/worker/tpu_worker.py b/vllm/worker/tpu_worker.py index 0bd1b8437da24..d69acb4ac16bf 100644 --- a/vllm/worker/tpu_worker.py +++ b/vllm/worker/tpu_worker.py @@ -64,7 +64,7 @@ class TPUWorker(LoraNotSupportedWorkerBase): self.model_runner.load_model() def determine_num_available_blocks(self) -> Tuple[int, int]: - num_tpu_blocks = 100 + num_tpu_blocks = 1000 return num_tpu_blocks, 0 def initialize_cache(