Mark DBO test as flaky on b200 for Distributed B200 test (#29913)

Signed-off-by: dougbtv <dosmith@redhat.com>
This commit is contained in:
Doug Smith 2025-12-04 10:38:03 -05:00 committed by GitHub
parent cc050558f4
commit 5b4b42c0b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -9,10 +9,22 @@ correctly with the DeepSeek-V2-Lite model using GSM8K evaluation.
"""
import pytest
import torch
from tests.evals.gsm8k.gsm8k_eval import evaluate_gsm8k
from tests.utils import RemoteOpenAIServer
# Detect Blackwell / B200 (compute capability 10.x)
try:
if torch.cuda.is_available():
cap = torch.cuda.get_device_capability(0)
IS_BLACKWELL = cap[0] >= 10
else:
IS_BLACKWELL = False
except Exception:
# Be conservative: if we can't detect, don't xfail by default
IS_BLACKWELL = False
MODEL_NAME = "deepseek-ai/DeepSeek-V2-Lite-Chat"
DP_SIZE = 2
@ -33,6 +45,13 @@ DEEPEP_BACKENDS = [
@pytest.mark.parametrize("all2all_backend", DEEPEP_BACKENDS)
@pytest.mark.xfail(
IS_BLACKWELL,
reason=(
"Temporary: DBO accuracy unstable on Blackwell "
"(doesn't meet expectation of MIN_ACCURACY = 0.62)"
),
)
def test_dbo_dp_ep_gsm8k(all2all_backend: str, num_gpus_available):
"""
Test DBO with DP+EP using GSM8K evaluation.