From a772948c9d02a59d22ac7a6ecf085a3596d27082 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 4 Aug 2025 12:56:13 -0700
Subject: [PATCH] add gemma3 to test

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 tests/compile/piecewise/test_full_cudagraph.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/compile/piecewise/test_full_cudagraph.py b/tests/compile/piecewise/test_full_cudagraph.py
index efe9c843f144c..d99ec5b8c6cb3 100644
--- a/tests/compile/piecewise/test_full_cudagraph.py
+++ b/tests/compile/piecewise/test_full_cudagraph.py
@@ -71,7 +71,8 @@ def llm_pair(request):
     [
         # Model names for the llm_pair fixture
         "deepseek-ai/DeepSeek-V2-Lite",
-        "Qwen/Qwen2-1.5B-Instruct"
+        "Qwen/Qwen2-1.5B-Instruct",
+        "google/gemma-3-1b-it",
     ],
     indirect=True)
 @pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
@@ -126,6 +127,8 @@ class TestFullCUDAGraph:
         ("Qwen/Qwen2-1.5B-Instruct", True),
         # MLA does not support capturing CUDA Graphs with size > max_num_seqs
         ("deepseek-ai/DeepSeek-V2-Lite", False),
+        # Full CUDA graph supports mixed full and sliding window attention.
+        ("google/gemma-3-1b-it", True),
     ])
 @pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
                     reason="Only Hopper GPUs support FA3 and FlashMLA")