add gemma3 to test

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon 2025-08-04 12:56:13 -07:00
parent 06fba5410c
commit a772948c9d

View File

@ -71,7 +71,8 @@ def llm_pair(request):
[ [
# Model names for the llm_pair fixture # Model names for the llm_pair fixture
"deepseek-ai/DeepSeek-V2-Lite", "deepseek-ai/DeepSeek-V2-Lite",
"Qwen/Qwen2-1.5B-Instruct" "Qwen/Qwen2-1.5B-Instruct",
"google/gemma-3-1b-it",
], ],
indirect=True) indirect=True)
@pytest.mark.skipif(current_platform.get_device_capability() != (9, 0), @pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
@ -126,6 +127,8 @@ class TestFullCUDAGraph:
("Qwen/Qwen2-1.5B-Instruct", True), ("Qwen/Qwen2-1.5B-Instruct", True),
# MLA does not support capturing CUDA Graphs with size > max_num_seqs # MLA does not support capturing CUDA Graphs with size > max_num_seqs
("deepseek-ai/DeepSeek-V2-Lite", False), ("deepseek-ai/DeepSeek-V2-Lite", False),
# Full CUDA graph supports mixed full and sliding window attention.
("google/gemma-3-1b-it", True),
]) ])
@pytest.mark.skipif(current_platform.get_device_capability() != (9, 0), @pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
reason="Only Hopper GPUs support FA3 and FlashMLA") reason="Only Hopper GPUs support FA3 and FlashMLA")