Disable remote caching when calling compile_fx (#16611)

Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
Richard Zou 2025-04-16 01:18:28 -04:00 committed by GitHub
parent 0d7d05f4b6
commit 966c742ed2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -290,6 +290,19 @@ class InductorAdaptor(CompilerInterface):
# Dynamo metrics context, see method for more details.
stack.enter_context(self.metrics_context())
# Disable remote caching. When these are on, on remote cache-hit,
# the monkey-patched functions never actually get called.
# vLLM today assumes and requires the monkey-patched functions to
# get hit.
# TODO(zou3519): we're going to replace this all with
# standalone_compile sometime.
if is_torch_equal_or_newer("2.6"):
stack.enter_context(
torch._inductor.config.patch(fx_graph_remote_cache=False))
stack.enter_context(
torch._functorch.config.patch(
enable_remote_autograd_cache=False))
compiled_graph = compile_fx(
graph,
example_inputs,