From c3c2903e72c6e85a81ff6de8b879f4c82e8ad364 Mon Sep 17 00:00:00 2001
From: "Li, Jiang" <jiang1.li@intel.com>
Date: Thu, 13 Jun 2024 03:58:53 +0800
Subject: [PATCH] [Bugfix] Add device assertion to TorchSDPA (#5402)

---
 vllm/attention/selector.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py
index 3f0e29c73e0cd..8b07fb2d768f5 100644
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -58,6 +58,9 @@ def get_attn_backend(
             ROCmFlashAttentionBackend)
         return ROCmFlashAttentionBackend
     elif backend == _Backend.TORCH_SDPA:
+        # TODO: make XPU backend available here.
+        assert is_cpu(), RuntimeError(
+            "Torch SDPA backend is only used for the CPU device.")
         logger.info("Using Torch SDPA backend.")
         from vllm.attention.backends.torch_sdpa import TorchSDPABackend
         return TorchSDPABackend