From c07a73317d202c2dad67f12893fcddb6d3664950 Mon Sep 17 00:00:00 2001 From: Jiangyun Zhu Date: Thu, 28 Aug 2025 14:51:24 +0800 Subject: [PATCH] [CI] enable idefics3 and fuyu-8b test in multimodal test (#23790) Signed-off-by: zjy0516 --- .../multimodal/generation/test_common.py | 36 ++++++++----------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 2b60faae8ec0b..d61b182761e44 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -189,23 +189,21 @@ VLM_TEST_SETTINGS = { }, marks=[pytest.mark.core_model], ), - # FIXME(Isotr0py): Enable this test after - # https://github.com/huggingface/transformers/pull/39470 released - # "idefics3-transformers": VLMTestInfo( - # models=["HuggingFaceTB/SmolVLM-256M-Instruct"], - # test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), - # prompt_formatter=lambda img_prompt:f"<|begin_of_text|>User:{img_prompt}\nAssistant:", # noqa: E501 - # img_idx_to_prompt=lambda idx: "", - # max_model_len=8192, - # max_num_seqs=2, - # auto_cls=AutoModelForImageTextToText, - # hf_output_post_proc=model_utils.idefics3_trunc_hf_output, - # image_size_factors=[(0.25, 0.5, 1.0)], - # vllm_runner_kwargs={ - # "model_impl": "transformers", - # }, - # marks=[pytest.mark.core_model], - # ), + "idefics3-transformers": VLMTestInfo( + models=["HuggingFaceTB/SmolVLM-256M-Instruct"], + test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), + prompt_formatter=lambda img_prompt:f"<|begin_of_text|>User:{img_prompt}\nAssistant:", # noqa: E501 + img_idx_to_prompt=lambda idx: "", + max_model_len=8192, + max_num_seqs=2, + auto_cls=AutoModelForImageTextToText, + hf_output_post_proc=model_utils.idefics3_trunc_hf_output, + image_size_factors=[(0.25, 0.5, 1.0)], + vllm_runner_kwargs={ + "model_impl": "transformers", + }, + marks=[pytest.mark.core_model], + ), # Pixel values from processor are not 4D or 5D arrays "qwen2_5_vl-transformers": VLMTestInfo( models=["Qwen/Qwen2.5-VL-3B-Instruct"], @@ -322,10 +320,6 @@ VLM_TEST_SETTINGS = { vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output, num_logprobs=10, image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], - # FIXME(Isotr0py): This model is broken in Transformers v4.54.1, we - # should enable this again after the fix is released: - # https://github.com/huggingface/transformers/pull/39915 - marks=[pytest.mark.skip("HF model is broken")], ), "gemma3": VLMTestInfo( models=["google/gemma-3-4b-it"],