[Bugfix][TPU] Set readonly=True for non-root devices (#6980)

This commit is contained in:
Woosuk Kwon 2024-07-31 00:19:28 -07:00 committed by GitHub
parent 9f0e69b653
commit 533d1932d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -104,7 +104,10 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
# Use persistent cache to avoid XLA recompilation.
# NOTE(woosuk): This does not completely eliminate the recompilation
# overhead because dynamo does not cache the compiled results.
xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH, readonly=False)
# NOTE(woosuk): Set readonly=False only for the rank 0 process to avoid
# race conditions.
xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH,
readonly=not self.is_driver_worker)
def load_model(self):
self.model_runner.load_model()