From c3c2903e72c6e85a81ff6de8b879f4c82e8ad364 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Thu, 13 Jun 2024 03:58:53 +0800 Subject: [PATCH] [Bugfix] Add device assertion to TorchSDPA (#5402) --- vllm/attention/selector.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py index 3f0e29c73e0cd..8b07fb2d768f5 100644 --- a/vllm/attention/selector.py +++ b/vllm/attention/selector.py @@ -58,6 +58,9 @@ def get_attn_backend( ROCmFlashAttentionBackend) return ROCmFlashAttentionBackend elif backend == _Backend.TORCH_SDPA: + # TODO: make XPU backend available here. + assert is_cpu(), RuntimeError( + "Torch SDPA backend is only used for the CPU device.") logger.info("Using Torch SDPA backend.") from vllm.attention.backends.torch_sdpa import TorchSDPABackend return TorchSDPABackend