mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-12 05:27:12 +08:00
add gemma3 to test
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
06fba5410c
commit
a772948c9d
@ -71,7 +71,8 @@ def llm_pair(request):
|
|||||||
[
|
[
|
||||||
# Model names for the llm_pair fixture
|
# Model names for the llm_pair fixture
|
||||||
"deepseek-ai/DeepSeek-V2-Lite",
|
"deepseek-ai/DeepSeek-V2-Lite",
|
||||||
"Qwen/Qwen2-1.5B-Instruct"
|
"Qwen/Qwen2-1.5B-Instruct",
|
||||||
|
"google/gemma-3-1b-it",
|
||||||
],
|
],
|
||||||
indirect=True)
|
indirect=True)
|
||||||
@pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
|
@pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
|
||||||
@ -126,6 +127,8 @@ class TestFullCUDAGraph:
|
|||||||
("Qwen/Qwen2-1.5B-Instruct", True),
|
("Qwen/Qwen2-1.5B-Instruct", True),
|
||||||
# MLA does not support capturing CUDA Graphs with size > max_num_seqs
|
# MLA does not support capturing CUDA Graphs with size > max_num_seqs
|
||||||
("deepseek-ai/DeepSeek-V2-Lite", False),
|
("deepseek-ai/DeepSeek-V2-Lite", False),
|
||||||
|
# Full CUDA graph supports mixed full and sliding window attention.
|
||||||
|
("google/gemma-3-1b-it", True),
|
||||||
])
|
])
|
||||||
@pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
|
@pytest.mark.skipif(current_platform.get_device_capability() != (9, 0),
|
||||||
reason="Only Hopper GPUs support FA3 and FlashMLA")
|
reason="Only Hopper GPUs support FA3 and FlashMLA")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user