mem

2026-06-02 13:37:53 +08:00 · 2025-10-23 00:19:05 +00:00 · 2025-10-23 00:19:05 +00:00 · f65da69c72
commit f65da69c72
parent a5281395e9
1 changed files with 8 additions and 0 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -509,6 +509,14 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
            pin_memory=self.pin_memory,
        )
        # device_id = self.device.index
        # def cb(_device, _alloc, _device_alloc, _device_free):
        #     torch.cuda.memory._dump_snapshot(f"/tmp/vllm_oom_{device_id}.pickle")
        # torch.cuda.memory._record_memory_history(max_entries=100_000)
        # torch._C._cuda_attach_out_of_memory_observer(cb)
    def reset_mm_cache(self) -> None:
        if self.mm_budget:
            self.mm_budget.reset_cache()