mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 13:44:58 +08:00
[Bugfix] Fix failing GPT-OSS initialization test (#22557)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
b7c0942b65
commit
7920e9b1c5
@ -200,7 +200,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
|||||||
{"6b": "EleutherAI/gpt-j-6b"}),
|
{"6b": "EleutherAI/gpt-j-6b"}),
|
||||||
"GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m",
|
"GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m",
|
||||||
{"1b": "EleutherAI/pythia-1.4b"}),
|
{"1b": "EleutherAI/pythia-1.4b"}),
|
||||||
"GptOssForCausalLM": _HfExamplesInfo("openai/gpt-oss-20b"),
|
"GptOssForCausalLM": _HfExamplesInfo("lmsys/gpt-oss-20b-bf16"),
|
||||||
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
|
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
|
||||||
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
|
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
|
||||||
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501
|
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501
|
||||||
|
|||||||
@ -68,6 +68,11 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
|
|||||||
if model_arch == "Phi4FlashForCausalLM":
|
if model_arch == "Phi4FlashForCausalLM":
|
||||||
# Phi4FlashForCausalLM only supports DIFFERENTIAL_FLASH_ATTN backend
|
# Phi4FlashForCausalLM only supports DIFFERENTIAL_FLASH_ATTN backend
|
||||||
m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN")
|
m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN")
|
||||||
|
if model_arch == "GptOssForCausalLM":
|
||||||
|
# FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
|
||||||
|
# has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
|
||||||
|
# L4 supports FA3.
|
||||||
|
m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1")
|
||||||
LLM(
|
LLM(
|
||||||
model_info.default,
|
model_info.default,
|
||||||
tokenizer=model_info.tokenizer,
|
tokenizer=model_info.tokenizer,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user