mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-10 13:02:30 +08:00
[compile] Recompile graph module during Dynamo cache loading. (#30743)
Signed-off-by: Zhengxu Chen <zhxchen17@fb.com>
This commit is contained in:
parent
7b966ae2ba
commit
53cd7f868b
@ -104,6 +104,7 @@ class VllmSerializableFunction(SerializableCallable):
|
|||||||
state = pickle.loads(data)
|
state = pickle.loads(data)
|
||||||
fake_mode = FakeTensorMode(shape_env=ShapeEnv())
|
fake_mode = FakeTensorMode(shape_env=ShapeEnv())
|
||||||
state["graph_module"] = GraphPickler.loads(state["graph_module"], fake_mode)
|
state["graph_module"] = GraphPickler.loads(state["graph_module"], fake_mode)
|
||||||
|
state["graph_module"].recompile()
|
||||||
state["example_inputs"] = GraphPickler.loads(state["example_inputs"], fake_mode)
|
state["example_inputs"] = GraphPickler.loads(state["example_inputs"], fake_mode)
|
||||||
vllm_backend = VllmBackend(get_current_vllm_config(), state["prefix"])
|
vllm_backend = VllmBackend(get_current_vllm_config(), state["prefix"])
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user