From 6c5f82e5aa87cd73ce03ce10fc44138f75ee1aea Mon Sep 17 00:00:00 2001 From: "Chendi.Xue" Date: Fri, 19 Sep 2025 23:41:23 -0500 Subject: [PATCH] [BUG FIX][NON-CUDA]quick fix to avoid call cudagraph_unsafe in attention (#25298) Signed-off-by: Chendi Xue --- vllm/attention/layer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 3d1269c0ecea..544a72052442 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -29,6 +29,10 @@ from vllm.utils import GiB_bytes, direct_register_custom_op logger = init_logger(__name__) USE_XFORMERS_OPS = None +try: + tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) +except AttributeError: + tag_cudagraph_unsafe = () # type: ignore[assignment] def check_xformers_availability(): @@ -577,7 +581,7 @@ direct_register_custom_op( mutates_args=[], fake_impl=unified_attention_fake, dispatch_key=current_platform.dispatch_key, - tags=(torch._C.Tag.cudagraph_unsafe, ), + tags=tag_cudagraph_unsafe, ) @@ -628,5 +632,5 @@ direct_register_custom_op( mutates_args=["output", "output_block_scale"], fake_impl=unified_attention_with_output_fake, dispatch_key=current_platform.dispatch_key, - tags=(torch._C.Tag.cudagraph_unsafe, ), + tags=tag_cudagraph_unsafe, )