diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index c8b6c6c861209..0143d137ff3f9 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -623,6 +623,12 @@ Specified using `--task generate`.
| `TarsierForConditionalGeneration` | Tarsier | T + IE+ | `omni-search/Tarsier-7b`, `omni-search/Tarsier-34b` | | ✅︎ | ✅︎ |
| `Tarsier2ForConditionalGeneration`^ | Tarsier2 | T + IE+ + VE+ | `omni-research/Tarsier2-Recap-7b`, `omni-research/Tarsier2-7b-0115` | | ✅︎ | ✅︎ |
+Some models are supported only via the [Transformers backend](#transformers). The purpose of the table below is to acknowledge models which we officially support in this way. The logs will say that the Transformers backend is being used, and you will see no warning that this is fallback behaviour. This means that, if you have issues with any of the models listed below, please [make an issue](https://github.com/vllm-project/vllm/issues/new/choose) and we'll do our best to fix it!
+
+| Architecture | Models | Inputs | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/distributed_serving.md) | [V1](gh-issue:8779) |
+|--------------|--------|--------|-------------------|-----------------------------|-----------------------------------------|---------------------|
+| `Emu3ForConditionalGeneration` | Emu3 | T + I | `BAAI/Emu3-Chat-hf` | ✅︎ | ✅︎ | ✅︎ |
+
^ You need to set the architecture name via `--hf-overrides` to match the one in vLLM.
• For example, to use DeepSeek-VL2 series models:
`--hf-overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'`
diff --git a/tests/models/multimodal/test_mapping.py b/tests/models/multimodal/test_mapping.py
index 5f20452aff3d8..f323dfd04cb95 100644
--- a/tests/models/multimodal/test_mapping.py
+++ b/tests/models/multimodal/test_mapping.py
@@ -23,18 +23,14 @@ def create_repo_dummy_weights(repo: str) -> Iterable[tuple[str, torch.Tensor]]:
return ((name, torch.empty(0)) for name in weight_names)
-def create_model_dummy_weights(
- repo: str,
- model_arch: str,
-) -> Iterable[tuple[str, torch.Tensor]]:
+def create_dummy_model(repo: str, model_arch: str) -> PreTrainedModel:
"""
Create weights from a dummy meta deserialized hf model with name conversion
"""
model_cls: PreTrainedModel = getattr(transformers, model_arch)
config = AutoConfig.from_pretrained(repo)
with torch.device("meta"):
- model: PreTrainedModel = model_cls._from_config(config)
- return model.named_parameters()
+ return model_cls._from_config(config)
def model_architectures_for_test() -> list[str]:
@@ -70,14 +66,21 @@ def test_hf_model_weights_mapper(model_arch: str):
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
original_weights = create_repo_dummy_weights(model_id)
- hf_converted_weights = create_model_dummy_weights(model_id, model_arch)
+ hf_dummy_model = create_dummy_model(model_id, model_arch)
+ hf_converted_weights = hf_dummy_model.named_parameters()
+ hf_converted_buffers = hf_dummy_model.named_buffers()
mapper: WeightsMapper = model_cls.hf_to_vllm_mapper
mapped_original_weights = mapper.apply(original_weights)
mapped_hf_converted_weights = mapper.apply(hf_converted_weights)
+ mapped_hf_converted_buffers = mapper.apply(hf_converted_buffers)
ref_weight_names = set(map(lambda x: x[0], mapped_original_weights))
weight_names = set(map(lambda x: x[0], mapped_hf_converted_weights))
+ buffer_names = set(map(lambda x: x[0], mapped_hf_converted_buffers))
+
+ # Some checkpoints may have buffers, we ignore them for this test
+ ref_weight_names -= buffer_names
weights_missing = ref_weight_names - weight_names
weights_unmapped = weight_names - ref_weight_names
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 84ca0bc60003e..3b92462e58a85 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -357,6 +357,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
max_transformers_version="4.48", # noqa: E501
transformers_version_reason="HF model is not compatible.", # noqa: E501
hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501
+ "Emu3ForConditionalGeneration": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
"Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
"GraniteSpeechForConditionalGeneration": _HfExamplesInfo("ibm-granite/granite-speech-3.3-2b"), # noqa: E501
@@ -501,7 +502,7 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
speculative_model="XiaomiMiMo/MiMo-7B-RL")
}
-_TRANSFORMERS_MODELS = {
+_TRANSFORMERS_BACKEND_MODELS = {
"TransformersForCausalLM": _HfExamplesInfo("hmellor/Ilama-3.2-1B", trust_remote_code=True), # noqa: E501
"TransformersForMultimodalLM": _HfExamplesInfo("OpenGVLab/InternVL3-1B-hf"),
}
@@ -512,7 +513,7 @@ _EXAMPLE_MODELS = {
**_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS,
**_MULTIMODAL_EXAMPLE_MODELS,
**_SPECULATIVE_DECODING_EXAMPLE_MODELS,
- **_TRANSFORMERS_MODELS,
+ **_TRANSFORMERS_BACKEND_MODELS,
}
diff --git a/vllm/model_executor/model_loader/utils.py b/vllm/model_executor/model_loader/utils.py
index 4b30336f01324..a0cd94c969a1f 100644
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -26,7 +26,7 @@ from vllm.model_executor.models.adapters import (as_embedding_model,
as_seq_cls_model)
from vllm.model_executor.models.interfaces import SupportsQuant
from vllm.model_executor.models.registry import (_PREVIOUSLY_SUPPORTED_MODELS,
- _TRANSFORMERS_MODELS)
+ _TRANSFORMERS_BACKEND_MODELS)
from vllm.utils import is_pin_memory_available
logger = init_logger(__name__)
@@ -178,7 +178,7 @@ def resolve_transformers_arch(model_config: ModelConfig,
"happen.")
for i, arch in enumerate(architectures):
- if arch in _TRANSFORMERS_MODELS:
+ if arch in _TRANSFORMERS_BACKEND_MODELS:
continue
if model_config.model_impl == ModelImpl.AUTO:
@@ -241,7 +241,7 @@ def get_model_architecture(
vllm_supported_archs = ModelRegistry.get_supported_archs()
is_supported = lambda arch: (arch in vllm_supported_archs and arch not in
- _TRANSFORMERS_MODELS)
+ _TRANSFORMERS_BACKEND_MODELS)
vllm_not_supported = not any(is_supported(arch) for arch in architectures)
if vllm_not_supported:
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 2aaac7798fc01..7470b31e1253d 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -254,7 +254,11 @@ _SPECULATIVE_DECODING_MODELS = {
# "MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"),
}
-_TRANSFORMERS_MODELS = {
+_TRANSFORMERS_SUPPORTED_MODELS = {
+ "Emu3ForConditionalGeneration": ("transformers", "TransformersForMultimodalLM"), # noqa: E501
+}
+
+_TRANSFORMERS_BACKEND_MODELS = {
"TransformersForMultimodalLM": ("transformers", "TransformersForMultimodalLM"), # noqa: E501
"TransformersForCausalLM": ("transformers", "TransformersForCausalLM"),
}
@@ -266,7 +270,8 @@ _VLLM_MODELS = {
**_CROSS_ENCODER_MODELS,
**_MULTIMODAL_MODELS,
**_SPECULATIVE_DECODING_MODELS,
- **_TRANSFORMERS_MODELS,
+ **_TRANSFORMERS_SUPPORTED_MODELS,
+ **_TRANSFORMERS_BACKEND_MODELS,
}
# This variable is used as the args for subprocess.run(). We