From 7888e1d0a3eb83becda81bbad5e9848dbb598453 Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Date: Thu, 13 Mar 2025 23:40:05 -0400 Subject: [PATCH] [V1] TPU - Enable prefix caching by default (#14773) --- vllm/platforms/tpu.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index fc68e5d63a6e5..8e2c28d9327b5 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -108,12 +108,6 @@ class TpuPlatform(Platform): parallel_config.worker_cls = \ "vllm.worker.tpu_worker.TPUWorker" - # Adjust scheduler config for V1 - # TODO: Add support for these - if envs.VLLM_USE_V1 and vllm_config.cache_config.enable_prefix_caching: - logger.warning("[V1][TPU] Disable prefix caching") - vllm_config.cache_config.enable_prefix_caching = False - assert not vllm_config.speculative_config, ( "Speculative decoding is not yet supported for TPU backend")