mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 21:55:38 +08:00
for glm-4.1V update (#22000)
Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
parent
58eee5f2e0
commit
25373b6c6c
@ -591,7 +591,8 @@ See [this page](generative_models.md) for more information on how to use generat
|
|||||||
| `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ |
|
| `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ |
|
||||||
| `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
| `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
|
| `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V-Air`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
| `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
|
| `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
|
||||||
| `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎ |
|
| `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎ |
|
||||||
| `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
|
| `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
|
||||||
|
|||||||
@ -377,9 +377,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
|
|||||||
"GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
|
"GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
|
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
|
||||||
"Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking", min_transformers_version="4.53"), # noqa: E501
|
"Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking"), # noqa: E501
|
||||||
"Glm4MoeForCausalLM": _HfExamplesInfo("THUDM/GLM-4.5",
|
"Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
|
||||||
min_transformers_version="4.54",
|
min_transformers_version="4.54"), # noqa: E501
|
||||||
|
"Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
|
||||||
is_available_online=False), # noqa: E501
|
is_available_online=False), # noqa: E501
|
||||||
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
|
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
|
||||||
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
|
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
|
||||||
@ -515,8 +516,8 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
|
|||||||
is_available_online=False,
|
is_available_online=False,
|
||||||
speculative_model="openbmb/MiniCPM-2B-sft-bf16",
|
speculative_model="openbmb/MiniCPM-2B-sft-bf16",
|
||||||
tokenizer="openbmb/MiniCPM-2B-sft-bf16"),
|
tokenizer="openbmb/MiniCPM-2B-sft-bf16"),
|
||||||
"Glm4MoeMTPModel": _HfExamplesInfo("THUDM/GLM-4.5",
|
"Glm4MoeMTPModel": _HfExamplesInfo("zai-org/GLM-4.5",
|
||||||
speculative_model="THUDM/GLM-4.5",
|
speculative_model="zai-org/GLM-4.5",
|
||||||
min_transformers_version="4.54",
|
min_transformers_version="4.54",
|
||||||
is_available_online=False),
|
is_available_online=False),
|
||||||
"MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
|
"MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
|
||||||
|
|||||||
@ -12,7 +12,7 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
|
|||||||
|
|
||||||
pytest.skip("skip glm4_moe parser test", allow_module_level=True)
|
pytest.skip("skip glm4_moe parser test", allow_module_level=True)
|
||||||
# Use a common model that is likely to be available
|
# Use a common model that is likely to be available
|
||||||
MODEL = "THUDM/GLM-4.5"
|
MODEL = "zai-org/GLM-4.5"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
|
|||||||
@ -1096,7 +1096,7 @@ class MRotaryEmbedding(RotaryEmbedding):
|
|||||||
audio_feature_lengths=audio_feature_lengths,
|
audio_feature_lengths=audio_feature_lengths,
|
||||||
use_audio_in_video=use_audio_in_video,
|
use_audio_in_video=use_audio_in_video,
|
||||||
)
|
)
|
||||||
elif "glm4v" in hf_config.model_type:
|
elif hf_config.model_type in ["glm4v", "glm4v_moe"]:
|
||||||
return cls._glm4v_get_input_positions_tensor(
|
return cls._glm4v_get_input_positions_tensor(
|
||||||
input_tokens=input_tokens,
|
input_tokens=input_tokens,
|
||||||
hf_config=hf_config,
|
hf_config=hf_config,
|
||||||
|
|||||||
@ -37,8 +37,7 @@ import torch.nn as nn
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from einops import rearrange
|
from einops import rearrange
|
||||||
from transformers import BatchFeature
|
from transformers import BatchFeature
|
||||||
from transformers.models.glm4v.configuration_glm4v import (Glm4vConfig,
|
from transformers.models.glm4v.configuration_glm4v import Glm4vVisionConfig
|
||||||
Glm4vVisionConfig)
|
|
||||||
from transformers.models.glm4v.image_processing_glm4v import (
|
from transformers.models.glm4v.image_processing_glm4v import (
|
||||||
Glm4vImageProcessor, smart_resize)
|
Glm4vImageProcessor, smart_resize)
|
||||||
from transformers.models.glm4v.video_processing_glm4v import (
|
from transformers.models.glm4v.video_processing_glm4v import (
|
||||||
@ -801,7 +800,7 @@ class Glm4vVisionTransformer(nn.Module):
|
|||||||
class Glm4vProcessingInfo(BaseProcessingInfo):
|
class Glm4vProcessingInfo(BaseProcessingInfo):
|
||||||
|
|
||||||
def get_hf_config(self):
|
def get_hf_config(self):
|
||||||
return self.ctx.get_hf_config(Glm4vConfig)
|
return self.ctx.get_hf_config()
|
||||||
|
|
||||||
def get_tokenizer(self):
|
def get_tokenizer(self):
|
||||||
return self.ctx.tokenizer
|
return self.ctx.tokenizer
|
||||||
@ -1253,7 +1252,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
|
|
||||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
config: Glm4vConfig = vllm_config.model_config.hf_config
|
config = vllm_config.model_config.hf_config
|
||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
multimodal_config = vllm_config.model_config.multimodal_config
|
multimodal_config = vllm_config.model_config.multimodal_config
|
||||||
|
|
||||||
@ -1267,12 +1266,18 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
prefix=maybe_prefix(prefix, "visual"),
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if config.model_type == "glm4v":
|
||||||
|
architectures = ["Glm4ForCausalLM"]
|
||||||
|
elif config.model_type == "glm4v_moe":
|
||||||
|
architectures = ["Glm4MoeForCausalLM"]
|
||||||
|
else:
|
||||||
|
architectures = None
|
||||||
|
|
||||||
self.language_model = init_vllm_registered_model(
|
self.language_model = init_vllm_registered_model(
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
prefix=maybe_prefix(prefix, ""),
|
hf_config=config.text_config,
|
||||||
architectures=["Glm4ForCausalLM"],
|
prefix=maybe_prefix(prefix, "language_model"),
|
||||||
hf_config=self.config.get_text_config(),
|
architectures=architectures)
|
||||||
)
|
|
||||||
|
|
||||||
self.make_empty_intermediate_tensors = (
|
self.make_empty_intermediate_tensors = (
|
||||||
self.language_model.make_empty_intermediate_tensors)
|
self.language_model.make_empty_intermediate_tensors)
|
||||||
|
|||||||
@ -206,6 +206,7 @@ _MULTIMODAL_MODELS = {
|
|||||||
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
|
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
|
||||||
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
|
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
|
||||||
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
||||||
|
"Glm4v_moeForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
||||||
"GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"), # noqa: E501
|
"GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"), # noqa: E501
|
||||||
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
|
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
|
||||||
"InternVLChatModel": ("internvl", "InternVLChatModel"),
|
"InternVLChatModel": ("internvl", "InternVLChatModel"),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user