diff --git a/tests/v1/distributed/test_dbo.py b/tests/v1/distributed/test_dbo.py index 16f154d196ba..f3a159762ea5 100644 --- a/tests/v1/distributed/test_dbo.py +++ b/tests/v1/distributed/test_dbo.py @@ -9,10 +9,22 @@ correctly with the DeepSeek-V2-Lite model using GSM8K evaluation. """ import pytest +import torch from tests.evals.gsm8k.gsm8k_eval import evaluate_gsm8k from tests.utils import RemoteOpenAIServer +# Detect Blackwell / B200 (compute capability 10.x) +try: + if torch.cuda.is_available(): + cap = torch.cuda.get_device_capability(0) + IS_BLACKWELL = cap[0] >= 10 + else: + IS_BLACKWELL = False +except Exception: + # Be conservative: if we can't detect, don't xfail by default + IS_BLACKWELL = False + MODEL_NAME = "deepseek-ai/DeepSeek-V2-Lite-Chat" DP_SIZE = 2 @@ -33,6 +45,13 @@ DEEPEP_BACKENDS = [ @pytest.mark.parametrize("all2all_backend", DEEPEP_BACKENDS) +@pytest.mark.xfail( + IS_BLACKWELL, + reason=( + "Temporary: DBO accuracy unstable on Blackwell " + "(doesn't meet expectation of MIN_ACCURACY = 0.62)" + ), +) def test_dbo_dp_ep_gsm8k(all2all_backend: str, num_gpus_available): """ Test DBO with DP+EP using GSM8K evaluation.