From 7920e9b1c5e168fe6218d2d147bdb9acf6bc993d Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sat, 9 Aug 2025 15:03:26 +0800 Subject: [PATCH] [Bugfix] Fix failing GPT-OSS initialization test (#22557) Signed-off-by: Isotr0py --- tests/models/registry.py | 2 +- tests/models/test_initialization.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 2bb06b7d190b..64eeed655519 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -200,7 +200,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { {"6b": "EleutherAI/gpt-j-6b"}), "GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m", {"1b": "EleutherAI/pythia-1.4b"}), - "GptOssForCausalLM": _HfExamplesInfo("openai/gpt-oss-20b"), + "GptOssForCausalLM": _HfExamplesInfo("lmsys/gpt-oss-20b-bf16"), "GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"), "GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"), "GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501 diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index f0aa91566b57..f06b34285eae 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -68,6 +68,11 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch, if model_arch == "Phi4FlashForCausalLM": # Phi4FlashForCausalLM only supports DIFFERENTIAL_FLASH_ATTN backend m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN") + if model_arch == "GptOssForCausalLM": + # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU + # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when + # L4 supports FA3. + m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1") LLM( model_info.default, tokenizer=model_info.tokenizer,