From 6c5f82e5aa87cd73ce03ce10fc44138f75ee1aea Mon Sep 17 00:00:00 2001
From: "Chendi.Xue" <chendi.xue@intel.com>
Date: Fri, 19 Sep 2025 23:41:23 -0500
Subject: [PATCH] [BUG FIX][NON-CUDA]quick fix to avoid call cudagraph_unsafe
 in attention (#25298)

Signed-off-by: Chendi Xue <Chendi.Xue@intel.com>
---
 vllm/attention/layer.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index 3d1269c0ecea..544a72052442 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -29,6 +29,10 @@ from vllm.utils import GiB_bytes, direct_register_custom_op
 
 logger = init_logger(__name__)
 USE_XFORMERS_OPS = None
+try:
+    tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, )
+except AttributeError:
+    tag_cudagraph_unsafe = ()  # type: ignore[assignment]
 
 
 def check_xformers_availability():
@@ -577,7 +581,7 @@ direct_register_custom_op(
     mutates_args=[],
     fake_impl=unified_attention_fake,
     dispatch_key=current_platform.dispatch_key,
-    tags=(torch._C.Tag.cudagraph_unsafe, ),
+    tags=tag_cudagraph_unsafe,
 )
 
 
@@ -628,5 +632,5 @@ direct_register_custom_op(
     mutates_args=["output", "output_block_scale"],
     fake_impl=unified_attention_with_output_fake,
     dispatch_key=current_platform.dispatch_key,
-    tags=(torch._C.Tag.cudagraph_unsafe, ),
+    tags=tag_cudagraph_unsafe,
 )