[Bugfix] tpu_model_runner: set vllm config context when calling reset_dynamo_cache() (#30331)

Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>
This commit is contained in:
Daniele 2025-12-10 13:58:35 +01:00 committed by GitHub
parent 9db78f34dc
commit 53d2420b44
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,7 +10,7 @@ import torch
import torch.nn as nn
import vllm.envs as envs
from vllm.config import VllmConfig
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.distributed import (
ensure_model_parallel_initialized,
init_distributed_environment,
@ -207,7 +207,8 @@ class TPUWorker:
# one compiled bytecode. Having one FX graph/cached bytecode per
# compiled model is required for `support_torch_compile` decorator to
# skip dynamo guard.
self.model_runner.reset_dynamo_cache()
with set_current_vllm_config(self.vllm_config):
self.model_runner.reset_dynamo_cache()
# Get the maximum amount of memory used by the model weights and
# intermediate activations.