mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 05:51:19 +08:00
[CI] Retry flaky fp8 cutlass mla tests (#24536)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
41f160b974
commit
7e7db04310
@ -49,7 +49,13 @@ CUTLASS_MLA_UNSUPPORTED_REASON = \
|
|||||||
@pytest.mark.parametrize("block_size", [64])
|
@pytest.mark.parametrize("block_size", [64])
|
||||||
@pytest.mark.parametrize("causal", [True])
|
@pytest.mark.parametrize("causal", [True])
|
||||||
@pytest.mark.parametrize("varlen", [False, True])
|
@pytest.mark.parametrize("varlen", [False, True])
|
||||||
@pytest.mark.parametrize("torch_dtype", [torch.bfloat16, torch.float8_e4m3fn])
|
@pytest.mark.parametrize(
|
||||||
|
"torch_dtype",
|
||||||
|
[
|
||||||
|
torch.bfloat16,
|
||||||
|
# fp8 can have occasional precision-related failures.
|
||||||
|
pytest.param(torch.float8_e4m3fn, marks=pytest.mark.flaky(reruns=2))
|
||||||
|
])
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def test_cutlass_mla_decode(b, s_q, mean_sk, h_q, h_kv, d, dv, block_size,
|
def test_cutlass_mla_decode(b, s_q, mean_sk, h_q, h_kv, d, dv, block_size,
|
||||||
causal, varlen, torch_dtype):
|
causal, varlen, torch_dtype):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user