mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-23 17:24:25 +08:00
[Bugfix] tpu_model_runner: set vllm config context when calling reset_dynamo_cache() (#30331)
Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com>
This commit is contained in:
parent
9db78f34dc
commit
53d2420b44
@ -10,7 +10,7 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig, set_current_vllm_config
|
||||||
from vllm.distributed import (
|
from vllm.distributed import (
|
||||||
ensure_model_parallel_initialized,
|
ensure_model_parallel_initialized,
|
||||||
init_distributed_environment,
|
init_distributed_environment,
|
||||||
@ -207,7 +207,8 @@ class TPUWorker:
|
|||||||
# one compiled bytecode. Having one FX graph/cached bytecode per
|
# one compiled bytecode. Having one FX graph/cached bytecode per
|
||||||
# compiled model is required for `support_torch_compile` decorator to
|
# compiled model is required for `support_torch_compile` decorator to
|
||||||
# skip dynamo guard.
|
# skip dynamo guard.
|
||||||
self.model_runner.reset_dynamo_cache()
|
with set_current_vllm_config(self.vllm_config):
|
||||||
|
self.model_runner.reset_dynamo_cache()
|
||||||
|
|
||||||
# Get the maximum amount of memory used by the model weights and
|
# Get the maximum amount of memory used by the model weights and
|
||||||
# intermediate activations.
|
# intermediate activations.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user