From a772948c9d02a59d22ac7a6ecf085a3596d27082 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 4 Aug 2025 12:56:13 -0700 Subject: [PATCH] add gemma3 to test Signed-off-by: Woosuk Kwon --- tests/compile/piecewise/test_full_cudagraph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/compile/piecewise/test_full_cudagraph.py b/tests/compile/piecewise/test_full_cudagraph.py index efe9c843f144c..d99ec5b8c6cb3 100644 --- a/tests/compile/piecewise/test_full_cudagraph.py +++ b/tests/compile/piecewise/test_full_cudagraph.py @@ -71,7 +71,8 @@ def llm_pair(request): [ # Model names for the llm_pair fixture "deepseek-ai/DeepSeek-V2-Lite", - "Qwen/Qwen2-1.5B-Instruct" + "Qwen/Qwen2-1.5B-Instruct", + "google/gemma-3-1b-it", ], indirect=True) @pytest.mark.skipif(current_platform.get_device_capability() != (9, 0), @@ -126,6 +127,8 @@ class TestFullCUDAGraph: ("Qwen/Qwen2-1.5B-Instruct", True), # MLA does not support capturing CUDA Graphs with size > max_num_seqs ("deepseek-ai/DeepSeek-V2-Lite", False), + # Full CUDA graph supports mixed full and sliding window attention. + ("google/gemma-3-1b-it", True), ]) @pytest.mark.skipif(current_platform.get_device_capability() != (9, 0), reason="Only Hopper GPUs support FA3 and FlashMLA")