mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 22:15:29 +08:00
[Bugfix][TPU] Set readonly=True for non-root devices (#6980)
This commit is contained in:
parent
9f0e69b653
commit
533d1932d2
@ -104,7 +104,10 @@ class TPUWorker(LoraNotSupportedWorkerBase, LocalOrDistributedWorkerBase):
|
|||||||
# Use persistent cache to avoid XLA recompilation.
|
# Use persistent cache to avoid XLA recompilation.
|
||||||
# NOTE(woosuk): This does not completely eliminate the recompilation
|
# NOTE(woosuk): This does not completely eliminate the recompilation
|
||||||
# overhead because dynamo does not cache the compiled results.
|
# overhead because dynamo does not cache the compiled results.
|
||||||
xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH, readonly=False)
|
# NOTE(woosuk): Set readonly=False only for the rank 0 process to avoid
|
||||||
|
# race conditions.
|
||||||
|
xr.initialize_cache(envs.VLLM_XLA_CACHE_PATH,
|
||||||
|
readonly=not self.is_driver_worker)
|
||||||
|
|
||||||
def load_model(self):
|
def load_model(self):
|
||||||
self.model_runner.load_model()
|
self.model_runner.load_model()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user