From 879a06579ea8a057c250e9b6cc4e632dabd87d2e Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Sat, 1 Nov 2025 13:11:07 +0800
Subject: [PATCH] [CI/Build] Bump transformers version (#27528)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 requirements/common.txt                             |  2 +-
 requirements/nightly_torch_test.txt                 |  2 +-
 requirements/test.in                                |  2 +-
 requirements/test.txt                               |  2 +-
 tests/models/multimodal/generation/test_maverick.py |  2 ++
 tests/models/registry.py                            | 12 ++++++------
 tests/models/test_transformers.py                   |  2 +-
 vllm/model_executor/models/moonvit.py               |  4 ++--
 vllm/model_executor/models/qwen2_vl.py              |  6 ++----
 9 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/requirements/common.txt b/requirements/common.txt
index 81c4d6675006d..724360f8bc9e4 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -7,7 +7,7 @@ requests >= 2.26.0
 tqdm
 blake3
 py-cpuinfo
-transformers >= 4.56.0
+transformers >= 4.56.0, < 5
 tokenizers >= 0.21.1  # Required for fast incremental detokenization.
 protobuf # Required by LlamaTokenizer.
 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt
index 63c1908f024b3..d9c5d89c1d52f 100644
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@@ -29,7 +29,7 @@ opencv-python-headless >= 4.11.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
 mteb>=1.38.11, <2 # required for mteb test
-transformers==4.56.2
+transformers==4.57.1
 tokenizers==0.22.0
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
diff --git a/requirements/test.in b/requirements/test.in
index b1ab599ff16e5..f57ec31277ce9 100644
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -37,7 +37,7 @@ datamodel_code_generator # required for minicpm3 test
 # TODO: Use lm-eval[api]==0.4.10 once released
 lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
 mteb[bm25s]>=1.38.11, <2 # required for mteb test
-transformers==4.56.2
+transformers==4.57.1
 tokenizers==0.22.0
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
diff --git a/requirements/test.txt b/requirements/test.txt
index e54bb49fde684..a975f247065da 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -1196,7 +1196,7 @@ tqdm==4.66.6
     #   transformers
 tqdm-multiprocess==0.0.11
     # via lm-eval
-transformers==4.56.2
+transformers==4.57.1
     # via
     #   -r requirements/test.in
     #   genai-perf
diff --git a/tests/models/multimodal/generation/test_maverick.py b/tests/models/multimodal/generation/test_maverick.py
index fd3386ff67df2..6fc2efa418ddf 100644
--- a/tests/models/multimodal/generation/test_maverick.py
+++ b/tests/models/multimodal/generation/test_maverick.py
@@ -186,6 +186,8 @@ def create_reduced_config(
     if "text_config" in config_dict:
         original_text_layers = config_dict["text_config"]["num_hidden_layers"]
         config_dict["text_config"]["num_hidden_layers"] = text_layers
+        original_layer_types = config_dict["text_config"]["layer_types"]
+        config_dict["text_config"]["layer_types"] = original_layer_types[:text_layers]
         print(f"Reduced text layers from {original_text_layers} to {text_layers}")
 
         original_num_experts = config_dict["text_config"]["num_local_experts"]
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 7b5977ec58e53..8e1dd4ba91f1d 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -882,27 +882,27 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
 
 _TRANSFORMERS_BACKEND_MODELS = {
     "TransformersEmbeddingModel": _HfExamplesInfo(
-        "BAAI/bge-base-en-v1.5", min_transformers_version="4.57.0.dev0"
+        "BAAI/bge-base-en-v1.5", min_transformers_version="5.0.0"
     ),
     "TransformersForSequenceClassification": _HfExamplesInfo(
         "papluca/xlm-roberta-base-language-detection",
-        min_transformers_version="4.57.0.dev0",
+        min_transformers_version="5.0.0",
     ),
     "TransformersForCausalLM": _HfExamplesInfo(
         "hmellor/Ilama-3.2-1B", trust_remote_code=True
     ),
     "TransformersMultiModalForCausalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
     "TransformersMoEForCausalLM": _HfExamplesInfo(
-        "allenai/OLMoE-1B-7B-0924", min_transformers_version="4.57.0.dev0"
+        "allenai/OLMoE-1B-7B-0924", min_transformers_version="5.0.0"
     ),
     "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
-        "Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="4.57.0.dev0"
+        "Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="5.0.0"
     ),
     "TransformersMoEEmbeddingModel": _HfExamplesInfo(
-        "Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"
+        "Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0"
     ),
     "TransformersMoEForSequenceClassification": _HfExamplesInfo(
-        "Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"
+        "Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0"
     ),
     "TransformersMultiModalEmbeddingModel": _HfExamplesInfo("google/gemma-3-4b-it"),
     "TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py
index d8a1aace83325..06e51df32d184 100644
--- a/tests/models/test_transformers.py
+++ b/tests/models/test_transformers.py
@@ -82,7 +82,7 @@ def test_models(
     from packaging.version import Version
 
     installed = Version(transformers.__version__)
-    required = Version("4.57.0.dev0")
+    required = Version("5.0.0")
     if model == "allenai/OLMoE-1B-7B-0924" and installed < required:
         pytest.skip(
             "MoE models with the Transformers backend require "
diff --git a/vllm/model_executor/models/moonvit.py b/vllm/model_executor/models/moonvit.py
index 96ec6e6b56acb..8017c947bf9ad 100644
--- a/vllm/model_executor/models/moonvit.py
+++ b/vllm/model_executor/models/moonvit.py
@@ -49,7 +49,7 @@ from functools import cached_property
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from transformers.activations import ACT2FN, PytorchGELUTanh
+from transformers.activations import ACT2FN
 from transformers.modeling_utils import PreTrainedModel
 from transformers.utils import is_flash_attn_2_available
 
@@ -651,7 +651,7 @@ class MoonVitPretrainedModel(PreTrainedModel):
                 "num_heads": config.num_attention_heads,
                 "hidden_dim": config.hidden_size,
                 "mlp_dim": config.intermediate_size,
-                "activation": PytorchGELUTanh(),
+                "activation": ACT2FN["gelu_pytorch_tanh"],
                 "attn_bias": True,
                 "attn_implementation": config._attn_implementation,
             },
diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py
index a81acf9f9a36d..1ec12bdb55dfe 100644
--- a/vllm/model_executor/models/qwen2_vl.py
+++ b/vllm/model_executor/models/qwen2_vl.py
@@ -34,7 +34,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from einops import rearrange, repeat
-from transformers import AutoConfig, BatchFeature, PretrainedConfig
+from transformers import BatchFeature, PretrainedConfig
 from transformers.models.qwen2_vl import Qwen2VLImageProcessor, Qwen2VLProcessor
 from transformers.models.qwen2_vl.configuration_qwen2_vl import (
     Qwen2VLConfig,
@@ -1651,9 +1651,7 @@ class Tarsier2Processor(Qwen2VLProcessor):
 class Tarsier2ProcessingInfo(Qwen2VLProcessingInfo):
     def get_hf_config(self) -> Qwen2VLConfig:
         model_path = self.ctx.model_config.model
-        original_config = AutoConfig.from_pretrained(model_path)
-        config_dict = original_config.to_dict()
-        correct_config = Qwen2VLConfig.from_dict(config_dict)
+        correct_config = Qwen2VLConfig.from_pretrained(model_path)
 
         return correct_config