add compile

2026-03-16 14:27:19 +08:00 · 2024-07-26 19:29:36 -07:00 · 2024-07-26 19:29:36 -07:00 · 617fb893d5
commit 617fb893d5
parent 55712941e5
1 changed files with 10 additions and 0 deletions
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@ -787,6 +787,16 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
                    "provided. Defaulting to scaling factors of 1.0. "
                    "This may lead to less accurate results!")

+        count = 0
+
+        def backend(gm, input):
+            nonlocal count
+            count += 1
+            print(count)
+            return gm.forward
+
+        self.model = torch.compile(self.model, backend=backend, fullgraph=True)
+
    def save_sharded_state(
        self,
        path: str,