From 53cd7f868b3632cbbe982cffaee8e16fb49dd694 Mon Sep 17 00:00:00 2001 From: Zhengxu Chen Date: Wed, 17 Dec 2025 05:00:12 -0500 Subject: [PATCH] [compile] Recompile graph module during Dynamo cache loading. (#30743) Signed-off-by: Zhengxu Chen --- vllm/compilation/caching.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index ce482572b401b..fc02a08f74265 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -104,6 +104,7 @@ class VllmSerializableFunction(SerializableCallable): state = pickle.loads(data) fake_mode = FakeTensorMode(shape_env=ShapeEnv()) state["graph_module"] = GraphPickler.loads(state["graph_module"], fake_mode) + state["graph_module"].recompile() state["example_inputs"] = GraphPickler.loads(state["example_inputs"], fake_mode) vllm_backend = VllmBackend(get_current_vllm_config(), state["prefix"])