diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py index ec4417290f611..ee0c1168f3cd0 100644 --- a/vllm/v1/utils.py +++ b/vllm/v1/utils.py @@ -117,7 +117,7 @@ class CpuGpuBuffer: dtype=dtype, device="cpu", pin_memory=pin_memory) - self.gpu = self.cpu.to(device) + self.gpu = torch.zeros_like(self.cpu, device=device) self.np: np.ndarray # To keep type hints simple (avoiding generics and subclasses), we # only conditionally create the numpy array attribute. This can cause