From 6e599eebe8655dab75462a8a165f6d811d0d845f Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Wed, 30 Jul 2025 22:35:47 +0800 Subject: [PATCH] [Bugfix] Fix OOM tests in initialization test (#21921) Signed-off-by: Isotr0py <2037008807@qq.com> --- tests/models/test_initialization.py | 14 ++++++++------ vllm/model_executor/models/glm4_1v.py | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index d5441540176e8..4c7da24fca32a 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -33,12 +33,6 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch, model_info.check_available_online(on_fail="skip") model_info.check_transformers_version(on_fail="skip") - # FIXME: Possible memory leak in the previous tests? - if model_arch in ("Glm4vForConditionalGeneration", - "GraniteSpeechForConditionalGeneration", - "KimiVLForConditionalGeneration"): - pytest.skip("Avoid OOM") - if model_arch in ("Llama4ForCausalLM", "EagleLlama4ForCausalLM"): from vllm.model_executor.models.llama4 import Llama4ForCausalLM from vllm.model_executor.models.registry import ModelRegistry @@ -87,6 +81,14 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch, "num_hidden_layers": 1, }) + # e.g.: Qwen/Qwen2-Audio-7B-Instruct + if hasattr(hf_config, "audio_config"): + hf_config.audio_config.update({ + "num_layers": 1, + "num_hidden_layers": 1, + "encoder_layers": 1, + }) + return hf_config # Avoid calling model.forward() diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 1fd65cc9099b7..ae1bf22c704e5 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -1275,6 +1275,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal, vllm_config=vllm_config, prefix=maybe_prefix(prefix, ""), architectures=["Glm4ForCausalLM"], + hf_config=self.config.get_text_config(), ) self.make_empty_intermediate_tensors = (