From 53d2420b4447fbcab572dc23d2c3bb9224a8a561 Mon Sep 17 00:00:00 2001
From: Daniele <36171005+dtrifiro@users.noreply.github.com>
Date: Wed, 10 Dec 2025 13:58:35 +0100
Subject: [PATCH] [Bugfix] tpu_model_runner: set vllm config context when
 calling reset_dynamo_cache() (#30331)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>
---
 vllm/v1/worker/tpu_worker.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py
index 7a10ac198985e..5f6136b178b46 100644
--- a/vllm/v1/worker/tpu_worker.py
+++ b/vllm/v1/worker/tpu_worker.py
@@ -10,7 +10,7 @@ import torch
 import torch.nn as nn
 
 import vllm.envs as envs
-from vllm.config import VllmConfig
+from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.distributed import (
     ensure_model_parallel_initialized,
     init_distributed_environment,
@@ -207,7 +207,8 @@ class TPUWorker:
         # one compiled bytecode. Having one FX graph/cached bytecode per
         # compiled model is required for `support_torch_compile` decorator to
         # skip dynamo guard.
-        self.model_runner.reset_dynamo_cache()
+        with set_current_vllm_config(self.vllm_config):
+            self.model_runner.reset_dynamo_cache()
 
         # Get the maximum amount of memory used by the model weights and
         # intermediate activations.