From bbdc0f2366997536207abc212fcdae7a1b688159 Mon Sep 17 00:00:00 2001
From: Rohan Potdar <66227218+Rohan138@users.noreply.github.com>
Date: Thu, 18 Sep 2025 12:46:47 -0500
Subject: [PATCH] [ROCm][AITER][Bugfix] Switch AITER to use PIECEWISE_AND_FULL
 compilation (#25104)

Signed-off-by: Rohan138 <rohanpotdar138@gmail.com>
---
 vllm/v1/attention/backends/rocm_aiter_fa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index 8eb3505cf274d..afb2283c44d37 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -232,7 +232,7 @@ class AiterFlashAttentionMetadata:
 
 class AiterFlashAttentionMetadataBuilder(
         AttentionMetadataBuilder[AiterFlashAttentionMetadata]):
-    cudagraph_support = AttentionCGSupport.ALWAYS
+    cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
 
     def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                  vllm_config: VllmConfig, device: torch.device):