mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 10:36:32 +08:00
Disable remote caching when calling compile_fx (#16611)
Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
parent
0d7d05f4b6
commit
966c742ed2
@ -290,6 +290,19 @@ class InductorAdaptor(CompilerInterface):
|
||||
# Dynamo metrics context, see method for more details.
|
||||
stack.enter_context(self.metrics_context())
|
||||
|
||||
# Disable remote caching. When these are on, on remote cache-hit,
|
||||
# the monkey-patched functions never actually get called.
|
||||
# vLLM today assumes and requires the monkey-patched functions to
|
||||
# get hit.
|
||||
# TODO(zou3519): we're going to replace this all with
|
||||
# standalone_compile sometime.
|
||||
if is_torch_equal_or_newer("2.6"):
|
||||
stack.enter_context(
|
||||
torch._inductor.config.patch(fx_graph_remote_cache=False))
|
||||
stack.enter_context(
|
||||
torch._functorch.config.patch(
|
||||
enable_remote_autograd_cache=False))
|
||||
|
||||
compiled_graph = compile_fx(
|
||||
graph,
|
||||
example_inputs,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user