[CI/Build] Avoid duplicate empty inputs test for common multimodal generation tests (#29907)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2026-01-26 21:54:36 +08:00 · 2025-12-03 18:27:36 +08:00 · 2025-12-03 18:27:36 +08:00 · cc4e296ea6
commit cc4e296ea6
parent a21cd9ed23
3 changed files with 69 additions and 63 deletions
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@ -137,7 +137,7 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
-        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    "qwen2_5_omni": VLMTestInfo(
@ -152,7 +152,7 @@ VLM_TEST_SETTINGS = {
        auto_cls=AutoModelForTextToWaveform,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner,
-        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    "qwen3_vl": VLMTestInfo(
@ -173,7 +173,7 @@ VLM_TEST_SETTINGS = {
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        patch_hf_runner=model_utils.qwen3_vl_patch_hf_runner,
-        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[
            pytest.mark.core_model,
        ],
@ -350,7 +350,7 @@ VLM_TEST_SETTINGS = {
        patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
        hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],
-        image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
+        image_size_factors=[(1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
    ),
    "fuyu": VLMTestInfo(
        models=["adept/fuyu-8b"],
@ -707,7 +707,7 @@ VLM_TEST_SETTINGS = {
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForCausalLM,
-        image_size_factors=[(), (0.25,)],
+        image_size_factors=[(0.25,)],
        marks=[
            pytest.mark.skipif(
                Version(TRANSFORMERS_VERSION) == Version("4.57.3"),
@ -760,7 +760,7 @@ VLM_TEST_SETTINGS = {
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
-        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.cpu_model],
    ),
    "skywork_r1v": VLMTestInfo(
@ -812,7 +812,7 @@ VLM_TEST_SETTINGS = {
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
-        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.skip("Model initialization hangs")],
    ),
    ### Tensor parallel / multi-gpu broadcast tests
--- a/tests/models/multimodal/generation/vlm_utils/case_filtering.py
+++ b/tests/models/multimodal/generation/vlm_utils/case_filtering.py
@ -62,6 +62,65 @@ def get_filtered_test_settings(
    return matching_tests


+def get_model_type_cases(
+    model_type: str,
+    test_info: VLMTestInfo,
+    test_type: VLMTestType,
+):
+    # Ensure that something is wrapped as an iterable it's not already
+    ensure_wrapped = lambda e: e if isinstance(e, (list, tuple)) else (e,)
+
+    # This is essentially the same as nesting a bunch of mark.parametrize
+    # decorators, but we do it programmatically to allow overrides for on
+    # a per-model basis, while still being able to execute each of these
+    # as individual test cases in pytest.
+    iter_kwargs = OrderedDict(
+        [
+            ("model", ensure_wrapped(test_info.models)),
+            ("max_tokens", ensure_wrapped(test_info.max_tokens)),
+            ("num_logprobs", ensure_wrapped(test_info.num_logprobs)),
+            ("dtype", ensure_wrapped(test_info.dtype)),
+            (
+                "distributed_executor_backend",
+                ensure_wrapped(test_info.distributed_executor_backend),
+            ),
+        ]
+    )
+
+    # num_frames is video only
+    if test_type == VLMTestType.VIDEO:
+        iter_kwargs["num_video_frames"] = ensure_wrapped(test_info.num_video_frames)
+        iter_kwargs["needs_video_metadata"] = ensure_wrapped(
+            test_info.needs_video_metadata
+        )
+
+    # No sizes passed for custom inputs, since inputs are directly provided
+    if test_type not in (
+        VLMTestType.CUSTOM_INPUTS,
+        VLMTestType.AUDIO,
+    ):
+        wrapped_sizes = get_wrapped_test_sizes(test_info, test_type)
+        if wrapped_sizes is None:
+            raise ValueError(f"Sizes must be set for test type {test_type}")
+        iter_kwargs["size_wrapper"] = wrapped_sizes
+
+    # Otherwise expand the custom test options instead
+    elif test_type == VLMTestType.CUSTOM_INPUTS:
+        if test_info.custom_test_opts is None:
+            raise ValueError("Test has type CUSTOM_INPUTS, but none given")
+        iter_kwargs["custom_test_opts"] = test_info.custom_test_opts
+
+    # Wrap all model cases in a pytest parameter & pass marks through
+    return [
+        pytest.param(
+            model_type,
+            ExpandableVLMTestArgs(**{k: v for k, v in zip(iter_kwargs.keys(), case)}),
+            marks=test_info.marks if test_info.marks is not None else [],
+        )
+        for case in list(itertools.product(*iter_kwargs.values()))
+    ]
+
+
 def get_parametrized_options(
    test_settings: dict[str, VLMTestInfo],
    test_type: VLMTestType,
@ -76,64 +135,11 @@ def get_parametrized_options(
        test_settings, test_type, create_new_process_for_each_test
    )

-    # Ensure that something is wrapped as an iterable it's not already
-    ensure_wrapped = lambda e: e if isinstance(e, (list, tuple)) else (e,)
-
-    def get_model_type_cases(model_type: str, test_info: VLMTestInfo):
-        # This is essentially the same as nesting a bunch of mark.parametrize
-        # decorators, but we do it programmatically to allow overrides for on
-        # a per-model basis, while still being able to execute each of these
-        # as individual test cases in pytest.
-        iter_kwargs = OrderedDict(
-            [
-                ("model", ensure_wrapped(test_info.models)),
-                ("max_tokens", ensure_wrapped(test_info.max_tokens)),
-                ("num_logprobs", ensure_wrapped(test_info.num_logprobs)),
-                ("dtype", ensure_wrapped(test_info.dtype)),
-                (
-                    "distributed_executor_backend",
-                    ensure_wrapped(test_info.distributed_executor_backend),
-                ),
-            ]
-        )
-
-        # num_frames is video only
-        if test_type == VLMTestType.VIDEO:
-            iter_kwargs["num_video_frames"] = ensure_wrapped(test_info.num_video_frames)
-            iter_kwargs["needs_video_metadata"] = ensure_wrapped(
-                test_info.needs_video_metadata
-            )
-
-        # No sizes passed for custom inputs, since inputs are directly provided
-        if test_type not in (VLMTestType.CUSTOM_INPUTS, VLMTestType.AUDIO):
-            wrapped_sizes = get_wrapped_test_sizes(test_info, test_type)
-            if wrapped_sizes is None:
-                raise ValueError(f"Sizes must be set for test type {test_type}")
-            iter_kwargs["size_wrapper"] = wrapped_sizes
-
-        # Otherwise expand the custom test options instead
-        elif test_type == VLMTestType.CUSTOM_INPUTS:
-            if test_info.custom_test_opts is None:
-                raise ValueError("Test has type CUSTOM_INPUTS, but none given")
-            iter_kwargs["custom_test_opts"] = test_info.custom_test_opts
-
-        # Wrap all model cases in a pytest parameter & pass marks through
-        return [
-            pytest.param(
-                model_type,
-                ExpandableVLMTestArgs(
-                    **{k: v for k, v in zip(iter_kwargs.keys(), case)}
-                ),
-                marks=test_info.marks if test_info.marks is not None else [],
-            )
-            for case in list(itertools.product(*iter_kwargs.values()))
-        ]
-
    # Get a list per model type, where each entry contains a tuple of all of
    # that model type's cases, then flatten them into the top level so that
    # we can consume them in one mark.parametrize call.
    cases_by_model_type = [
-        get_model_type_cases(model_type, test_info)
+        get_model_type_cases(model_type, test_info, test_type)
        for model_type, test_info in matching_tests.items()
    ]
    return list(itertools.chain(*cases_by_model_type))
--- a/tests/models/multimodal/generation/vlm_utils/types.py
+++ b/tests/models/multimodal/generation/vlm_utils/types.py
@ -50,8 +50,8 @@ MULTI_IMAGE_BASE_PROMPT = f"Image-1: {TEST_IMG_PLACEHOLDER}Image-2: {TEST_IMG_PL
 VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"


-IMAGE_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
-EMBEDDING_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0)]
+IMAGE_SIZE_FACTORS = [(1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
+EMBEDDING_SIZE_FACTORS = [(1.0,), (1.0, 1.0, 1.0)]
 RunnerOutput = tuple[list[int], str, SampleLogprobs | None]