From 80bb1e8afe950342e93b7262e7bf25eb6d29b287 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:38:48 +0100 Subject: [PATCH] Officially support SmolLM3 using the Transformers backend (#22665) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/models/supported_models.md | 6 ++++++ tests/models/registry.py | 1 + vllm/model_executor/models/registry.py | 3 +++ 3 files changed, 10 insertions(+) diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index ea363315428f..a24fa4bcce33 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -409,6 +409,12 @@ th { | `MiniMaxText01ForCausalLM` | MiniMax-Text | `MiniMaxAI/MiniMax-Text-01`, etc. | | | ✅︎ | | `Zamba2ForCausalLM` | Zamba2 | `Zyphra/Zamba2-7B-instruct`, `Zyphra/Zamba2-2.7B-instruct`, `Zyphra/Zamba2-1.2B-instruct`, etc. | | | ✅︎ | +Some models are supported only via the [Transformers backend](#transformers). The purpose of the table below is to acknowledge models which we officially support in this way. The logs will say that the Transformers backend is being used, and you will see no warning that this is fallback behaviour. This means that, if you have issues with any of the models listed below, please [make an issue](https://github.com/vllm-project/vllm/issues/new/choose) and we'll do our best to fix it! + +| Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/parallelism_scaling.md) | [V1](gh-issue:8779) | +|--------------|--------|-------------------|----------------------|---------------------------|---------------------| +| `SmolLM3ForCausalLM` | SmolLM3 | `HuggingFaceTB/SmolLM3-3B` | ✅︎ | ✅︎ | ✅︎ | + !!! note Currently, the ROCm version of vLLM supports Mistral and Mixtral only for context lengths up to 4096. diff --git a/tests/models/registry.py b/tests/models/registry.py index eae582903082..d7d20d1f3abf 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -291,6 +291,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { "Qwen3ForCausalLM": _HfExamplesInfo("Qwen/Qwen3-8B"), "Qwen3MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen3-30B-A3B"), "RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b"), + "SmolLM3ForCausalLM": _HfExamplesInfo("HuggingFaceTB/SmolLM3-3B"), "StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"), # noqa: E501 "StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"), "Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"), diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 279e045a707c..64dbde4916a2 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -271,6 +271,9 @@ _SPECULATIVE_DECODING_MODELS = { } _TRANSFORMERS_SUPPORTED_MODELS = { + # Text generation models + "SmolLM3ForCausalLM": ("transformers", "TransformersForCausalLM"), + # Multimodal models "Emu3ForConditionalGeneration": ("transformers", "TransformersForMultimodalLM"), # noqa: E501 }