diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index f64ff0014b2dd..c953fb9db3bc1 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -281,3 +281,9 @@ class DeepEPLLAll2AllManager(DeepEPAll2AllManagerBase): first_handle = self.handle_caches[0].get_or_create(buffer_kwargs, deep_ep.Buffer) second_handle = self.handle_caches[1].get_or_create(buffer_kwargs, deep_ep.Buffer) return [first_handle, second_handle] + + def destroy(self): + for handle_cache in self.handle_caches: + with handle_cache._lock: + for _, handle in handle_cache._cache.items(): + handle.destroy()