[Bugfix] Fix OOM tests in initialization test (#21921)

Signed-off-by: Isotr0py <2037008807@qq.com>
2026-05-24 13:51:18 +08:00 · 2025-07-30 22:35:47 +08:00 · 2025-07-30 22:35:47 +08:00 · 6e599eebe8
commit 6e599eebe8
parent 88edf5994c
2 changed files with 9 additions and 6 deletions
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@ -33,12 +33,6 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
    model_info.check_available_online(on_fail="skip")
    model_info.check_transformers_version(on_fail="skip")
    # FIXME: Possible memory leak in the previous tests?
    if model_arch in ("Glm4vForConditionalGeneration",
                      "GraniteSpeechForConditionalGeneration",
                      "KimiVLForConditionalGeneration"):
        pytest.skip("Avoid OOM")
    if model_arch in ("Llama4ForCausalLM", "EagleLlama4ForCausalLM"):
        from vllm.model_executor.models.llama4 import Llama4ForCausalLM
        from vllm.model_executor.models.registry import ModelRegistry
@ -87,6 +81,14 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
                "num_hidden_layers": 1,
            })
        # e.g.: Qwen/Qwen2-Audio-7B-Instruct
        if hasattr(hf_config, "audio_config"):
            hf_config.audio_config.update({
                "num_layers": 1,
                "num_hidden_layers": 1,
                "encoder_layers": 1,
            })
        return hf_config
    # Avoid calling model.forward()
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@ -1275,6 +1275,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
            vllm_config=vllm_config,
            prefix=maybe_prefix(prefix, ""),
            architectures=["Glm4ForCausalLM"],
            hf_config=self.config.get_text_config(),
        )
        self.make_empty_intermediate_tensors = (