mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 13:51:18 +08:00
[Bugfix] Fix OOM tests in initialization test (#21921)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
parent
88edf5994c
commit
6e599eebe8
@ -33,12 +33,6 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
|
|||||||
model_info.check_available_online(on_fail="skip")
|
model_info.check_available_online(on_fail="skip")
|
||||||
model_info.check_transformers_version(on_fail="skip")
|
model_info.check_transformers_version(on_fail="skip")
|
||||||
|
|
||||||
# FIXME: Possible memory leak in the previous tests?
|
|
||||||
if model_arch in ("Glm4vForConditionalGeneration",
|
|
||||||
"GraniteSpeechForConditionalGeneration",
|
|
||||||
"KimiVLForConditionalGeneration"):
|
|
||||||
pytest.skip("Avoid OOM")
|
|
||||||
|
|
||||||
if model_arch in ("Llama4ForCausalLM", "EagleLlama4ForCausalLM"):
|
if model_arch in ("Llama4ForCausalLM", "EagleLlama4ForCausalLM"):
|
||||||
from vllm.model_executor.models.llama4 import Llama4ForCausalLM
|
from vllm.model_executor.models.llama4 import Llama4ForCausalLM
|
||||||
from vllm.model_executor.models.registry import ModelRegistry
|
from vllm.model_executor.models.registry import ModelRegistry
|
||||||
@ -87,6 +81,14 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
|
|||||||
"num_hidden_layers": 1,
|
"num_hidden_layers": 1,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# e.g.: Qwen/Qwen2-Audio-7B-Instruct
|
||||||
|
if hasattr(hf_config, "audio_config"):
|
||||||
|
hf_config.audio_config.update({
|
||||||
|
"num_layers": 1,
|
||||||
|
"num_hidden_layers": 1,
|
||||||
|
"encoder_layers": 1,
|
||||||
|
})
|
||||||
|
|
||||||
return hf_config
|
return hf_config
|
||||||
|
|
||||||
# Avoid calling model.forward()
|
# Avoid calling model.forward()
|
||||||
|
|||||||
@ -1275,6 +1275,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
prefix=maybe_prefix(prefix, ""),
|
prefix=maybe_prefix(prefix, ""),
|
||||||
architectures=["Glm4ForCausalLM"],
|
architectures=["Glm4ForCausalLM"],
|
||||||
|
hf_config=self.config.get_text_config(),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.make_empty_intermediate_tensors = (
|
self.make_empty_intermediate_tensors = (
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user