From 53d2420b4447fbcab572dc23d2c3bb9224a8a561 Mon Sep 17 00:00:00 2001 From: Daniele <36171005+dtrifiro@users.noreply.github.com> Date: Wed, 10 Dec 2025 13:58:35 +0100 Subject: [PATCH] [Bugfix] tpu_model_runner: set vllm config context when calling reset_dynamo_cache() (#30331) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniele Trifirò --- vllm/v1/worker/tpu_worker.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py index 7a10ac198985e..5f6136b178b46 100644 --- a/vllm/v1/worker/tpu_worker.py +++ b/vllm/v1/worker/tpu_worker.py @@ -10,7 +10,7 @@ import torch import torch.nn as nn import vllm.envs as envs -from vllm.config import VllmConfig +from vllm.config import VllmConfig, set_current_vllm_config from vllm.distributed import ( ensure_model_parallel_initialized, init_distributed_environment, @@ -207,7 +207,8 @@ class TPUWorker: # one compiled bytecode. Having one FX graph/cached bytecode per # compiled model is required for `support_torch_compile` decorator to # skip dynamo guard. - self.model_runner.reset_dynamo_cache() + with set_current_vllm_config(self.vllm_config): + self.model_runner.reset_dynamo_cache() # Get the maximum amount of memory used by the model weights and # intermediate activations.