mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-29 22:11:56 +08:00
[Bugfix] tpu_model_runner: set vllm config context when calling reset_dynamo_cache() (#30331)
Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>
This commit is contained in:
parent
9db78f34dc
commit
53d2420b44
@ -10,7 +10,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.config import VllmConfig, set_current_vllm_config
|
||||
from vllm.distributed import (
|
||||
ensure_model_parallel_initialized,
|
||||
init_distributed_environment,
|
||||
@ -207,7 +207,8 @@ class TPUWorker:
|
||||
# one compiled bytecode. Having one FX graph/cached bytecode per
|
||||
# compiled model is required for `support_torch_compile` decorator to
|
||||
# skip dynamo guard.
|
||||
self.model_runner.reset_dynamo_cache()
|
||||
with set_current_vllm_config(self.vllm_config):
|
||||
self.model_runner.reset_dynamo_cache()
|
||||
|
||||
# Get the maximum amount of memory used by the model weights and
|
||||
# intermediate activations.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user