mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 18:35:00 +08:00
[V1] TPU - Enable prefix caching by default (#14773)
This commit is contained in:
parent
60c872d4b6
commit
7888e1d0a3
@ -108,12 +108,6 @@ class TpuPlatform(Platform):
|
|||||||
parallel_config.worker_cls = \
|
parallel_config.worker_cls = \
|
||||||
"vllm.worker.tpu_worker.TPUWorker"
|
"vllm.worker.tpu_worker.TPUWorker"
|
||||||
|
|
||||||
# Adjust scheduler config for V1
|
|
||||||
# TODO: Add support for these
|
|
||||||
if envs.VLLM_USE_V1 and vllm_config.cache_config.enable_prefix_caching:
|
|
||||||
logger.warning("[V1][TPU] Disable prefix caching")
|
|
||||||
vllm_config.cache_config.enable_prefix_caching = False
|
|
||||||
|
|
||||||
assert not vllm_config.speculative_config, (
|
assert not vllm_config.speculative_config, (
|
||||||
"Speculative decoding is not yet supported for TPU backend")
|
"Speculative decoding is not yet supported for TPU backend")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user