From f946a8d74308f4a47bc1624c9307c1bce45874eb Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Sat, 29 Nov 2025 00:46:51 +0800
Subject: [PATCH] [Chore]: Reorganize model repo operating functions in
 `transformers_utils` (#29680)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
---
 tests/transformers_utils/test_config.py       |   4 +-
 vllm/engine/arg_utils.py                      |   2 +-
 .../model_loader/default_loader.py            |   2 +-
 vllm/model_executor/models/adapters.py        |   2 +-
 vllm/transformers_utils/config.py             | 279 +----------------
 vllm/transformers_utils/gguf_utils.py         |   2 +-
 vllm/transformers_utils/repo_utils.py         | 287 ++++++++++++++++++
 vllm/transformers_utils/tokenizer.py          |   6 +-
 8 files changed, 304 insertions(+), 280 deletions(-)
 create mode 100644 vllm/transformers_utils/repo_utils.py

diff --git a/tests/transformers_utils/test_config.py b/tests/transformers_utils/test_config.py
index de28ab5f99e8..7107ad0f7b99 100644
--- a/tests/transformers_utils/test_config.py
+++ b/tests/transformers_utils/test_config.py
@@ -8,7 +8,7 @@ from unittest.mock import MagicMock, call, patch
 
 import pytest
 
-from vllm.transformers_utils.config import list_filtered_repo_files
+from vllm.transformers_utils.repo_utils import list_filtered_repo_files
 
 
 @pytest.mark.parametrize(
@@ -44,7 +44,7 @@ def test_list_filtered_repo_files(
 
         # Patch list_repo_files called by fn
         with patch(
-            "vllm.transformers_utils.config.list_repo_files",
+            "vllm.transformers_utils.repo_utils.list_repo_files",
             MagicMock(return_value=_glob_path()),
         ) as mock_list_repo_files:
             out_files = sorted(
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 1126e9ce12db..8f0eb832064f 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -83,10 +83,10 @@ from vllm.platforms import CpuArchEnum, current_platform
 from vllm.plugins import load_general_plugins
 from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized
 from vllm.transformers_utils.config import (
-    get_model_path,
     is_interleaved,
     maybe_override_with_speculators,
 )
+from vllm.transformers_utils.repo_utils import get_model_path
 from vllm.transformers_utils.utils import is_cloud_storage, is_gguf
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 from vllm.utils.mem_constants import GiB_bytes
diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py
index 7401a7a0e2db..88c6d1e27e39 100644
--- a/vllm/model_executor/model_loader/default_loader.py
+++ b/vllm/model_executor/model_loader/default_loader.py
@@ -31,7 +31,7 @@ from vllm.model_executor.model_loader.weight_utils import (
     safetensors_weights_iterator,
 )
 from vllm.platforms import current_platform
-from vllm.transformers_utils.config import list_filtered_repo_files
+from vllm.transformers_utils.repo_utils import list_filtered_repo_files
 
 logger = init_logger(__name__)
 
diff --git a/vllm/model_executor/models/adapters.py b/vllm/model_executor/models/adapters.py
index 5aba46f8614b..738400ae864a 100644
--- a/vllm/model_executor/models/adapters.py
+++ b/vllm/model_executor/models/adapters.py
@@ -14,9 +14,9 @@ from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import get_act_fn
 from vllm.model_executor.models.config import VerifyAndUpdateConfig
 from vllm.transformers_utils.config import (
-    get_hf_file_bytes,
     try_get_dense_modules,
 )
+from vllm.transformers_utils.repo_utils import get_hf_file_bytes
 
 from .interfaces_base import VllmModelForPooling, is_pooling_model
 
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 224542c824ef..45c4358bbc8f 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -1,30 +1,17 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import fnmatch
-import json
 import os
-import time
 from collections.abc import Callable
 from dataclasses import asdict
 from functools import cache, partial
 from importlib.metadata import version
 from pathlib import Path
-from typing import Any, Literal, TypeAlias, TypeVar
+from typing import Any, Literal, TypeAlias
 
 import huggingface_hub
 from huggingface_hub import (
     get_safetensors_metadata,
-    hf_hub_download,
-    try_to_load_from_cache,
-)
-from huggingface_hub import list_repo_files as hf_list_repo_files
-from huggingface_hub.utils import (
-    EntryNotFoundError,
-    HfHubHTTPError,
-    LocalEntryNotFoundError,
-    RepositoryNotFoundError,
-    RevisionNotFoundError,
 )
 from packaging.version import Version
 from transformers import GenerationConfig, PretrainedConfig
@@ -40,6 +27,14 @@ from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
 from vllm import envs
 from vllm.logger import init_logger
 from vllm.transformers_utils.config_parser_base import ConfigParserBase
+from vllm.transformers_utils.repo_utils import (
+    _get_hf_token,
+    file_or_path_exists,
+    get_hf_file_to_dict,
+    list_repo_files,
+    try_get_local_file,
+    with_retry,
+)
 from vllm.transformers_utils.utils import (
     check_gguf_file,
     is_gguf,
@@ -58,21 +53,6 @@ MISTRAL_CONFIG_NAME = "params.json"
 logger = init_logger(__name__)
 
 
-def _get_hf_token() -> str | None:
-    """
-    Get the HuggingFace token from environment variable.
-
-    Returns None if the token is not set, is an empty string,
-    or contains only whitespace.
-    This follows the same pattern as huggingface_hub library which
-    treats empty string tokens as None to avoid authentication errors.
-    """
-    token = os.getenv("HF_TOKEN")
-    if token and token.strip():
-        return token
-    return None
-
-
 class LazyConfigDict(dict):
     def __getitem__(self, key):
         if isinstance(value := super().__getitem__(key), type):
@@ -308,143 +288,6 @@ def register_config_parser(config_format: str):
     return _wrapper
 
 
-_R = TypeVar("_R")
-
-
-def with_retry(
-    func: Callable[[], _R],
-    log_msg: str,
-    max_retries: int = 2,
-    retry_delay: int = 2,
-) -> _R:
-    for attempt in range(max_retries):
-        try:
-            return func()
-        except Exception as e:
-            if attempt == max_retries - 1:
-                logger.error("%s: %s", log_msg, e)
-                raise
-            logger.error(
-                "%s: %s, retrying %d of %d", log_msg, e, attempt + 1, max_retries
-            )
-            time.sleep(retry_delay)
-            retry_delay *= 2
-
-    raise AssertionError("Should not be reached")
-
-
-# @cache doesn't cache exceptions
-@cache
-def list_repo_files(
-    repo_id: str,
-    *,
-    revision: str | None = None,
-    repo_type: str | None = None,
-    token: str | bool | None = None,
-) -> list[str]:
-    def lookup_files() -> list[str]:
-        # directly list files if model is local
-        if (local_path := Path(repo_id)).exists():
-            return [
-                str(file.relative_to(local_path))
-                for file in local_path.rglob("*")
-                if file.is_file()
-            ]
-        # if model is remote, use hf_hub api to list files
-        try:
-            if envs.VLLM_USE_MODELSCOPE:
-                from vllm.transformers_utils.utils import modelscope_list_repo_files
-
-                return modelscope_list_repo_files(
-                    repo_id,
-                    revision=revision,
-                    token=os.getenv("MODELSCOPE_API_TOKEN", None),
-                )
-            return hf_list_repo_files(
-                repo_id, revision=revision, repo_type=repo_type, token=token
-            )
-        except huggingface_hub.errors.OfflineModeIsEnabled:
-            # Don't raise in offline mode,
-            # all we know is that we don't have this
-            # file cached.
-            return []
-
-    return with_retry(lookup_files, "Error retrieving file list")
-
-
-def list_filtered_repo_files(
-    model_name_or_path: str,
-    allow_patterns: list[str],
-    revision: str | None = None,
-    repo_type: str | None = None,
-    token: str | bool | None = None,
-) -> list[str]:
-    try:
-        all_files = list_repo_files(
-            repo_id=model_name_or_path,
-            revision=revision,
-            token=token,
-            repo_type=repo_type,
-        )
-    except Exception:
-        logger.error(
-            "Error retrieving file list. Please ensure your `model_name_or_path`"
-            "`repo_type`, `token` and `revision` arguments are correctly set. "
-            "Returning an empty list."
-        )
-        return []
-
-    file_list = []
-    # Filter patterns on filenames
-    for pattern in allow_patterns:
-        file_list.extend(
-            [
-                file
-                for file in all_files
-                if fnmatch.fnmatch(os.path.basename(file), pattern)
-            ]
-        )
-    return file_list
-
-
-def file_exists(
-    repo_id: str,
-    file_name: str,
-    *,
-    repo_type: str | None = None,
-    revision: str | None = None,
-    token: str | bool | None = None,
-) -> bool:
-    file_list = list_repo_files(
-        repo_id, repo_type=repo_type, revision=revision, token=token
-    )
-    return file_name in file_list
-
-
-# In offline mode the result can be a false negative
-def file_or_path_exists(
-    model: str | Path, config_name: str, revision: str | None
-) -> bool:
-    if (local_path := Path(model)).exists():
-        return (local_path / config_name).is_file()
-
-    # Offline mode support: Check if config file is cached already
-    cached_filepath = try_to_load_from_cache(
-        repo_id=model, filename=config_name, revision=revision
-    )
-    if isinstance(cached_filepath, str):
-        # The config file exists in cache- we can continue trying to load
-        return True
-
-    # NB: file_exists will only check for the existence of the config file on
-    # hf_hub. This will fail in offline mode.
-
-    # Call HF to check if the file exists
-    return file_exists(
-        str(model), config_name, revision=revision, token=_get_hf_token()
-    )
-
-
 def set_default_rope_theta(config: PretrainedConfig, default_theta: float) -> None:
     """Some models may have no rope_theta in their config but still use RoPE.
     This function sets a default rope_theta if it's missing."""
@@ -836,72 +679,6 @@ def get_config(
     return config
 
 
-def try_get_local_file(
-    model: str | Path, file_name: str, revision: str | None = "main"
-) -> Path | None:
-    file_path = Path(model) / file_name
-    if file_path.is_file():
-        return file_path
-    else:
-        try:
-            cached_filepath = try_to_load_from_cache(
-                repo_id=model, filename=file_name, revision=revision
-            )
-            if isinstance(cached_filepath, str):
-                return Path(cached_filepath)
-        except ValueError:
-            ...
-    return None
-
-
-def get_hf_file_to_dict(
-    file_name: str, model: str | Path, revision: str | None = "main"
-):
-    """
-    Downloads a file from the Hugging Face Hub and returns
-    its contents as a dictionary.
-
-    Parameters:
-    - file_name (str): The name of the file to download.
-    - model (str): The name of the model on the Hugging Face Hub.
-    - revision (str): The specific version of the model.
-
-    Returns:
-    - config_dict (dict): A dictionary containing
-    the contents of the downloaded file.
-    """
-
-    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)
-
-    if file_path is None:
-        try:
-            hf_hub_file = hf_hub_download(model, file_name, revision=revision)
-        except huggingface_hub.errors.OfflineModeIsEnabled:
-            return None
-        except (
-            RepositoryNotFoundError,
-            RevisionNotFoundError,
-            EntryNotFoundError,
-            LocalEntryNotFoundError,
-        ) as e:
-            logger.debug("File or repository not found in hf_hub_download", e)
-            return None
-        except HfHubHTTPError as e:
-            logger.warning(
-                "Cannot connect to Hugging Face Hub. Skipping file download for '%s':",
-                file_name,
-                exc_info=e,
-            )
-            return None
-        file_path = Path(hf_hub_file)
-
-    if file_path is not None and file_path.is_file():
-        with open(file_path) as file:
-            return json.load(file)
-
-    return None
-
-
 @cache
 def get_pooling_config(model: str, revision: str | None = "main") -> dict | None:
     """
@@ -1316,41 +1093,3 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
         )
 
     return max_position_embeddings
-
-
-def get_model_path(model: str | Path, revision: str | None = None):
-    if os.path.exists(model):
-        return model
-    assert huggingface_hub.constants.HF_HUB_OFFLINE
-    common_kwargs = {
-        "local_files_only": huggingface_hub.constants.HF_HUB_OFFLINE,
-        "revision": revision,
-    }
-
-    if envs.VLLM_USE_MODELSCOPE:
-        from modelscope.hub.snapshot_download import snapshot_download
-
-        return snapshot_download(model_id=model, **common_kwargs)
-
-    from huggingface_hub import snapshot_download
-
-    return snapshot_download(repo_id=model, **common_kwargs)
-
-
-def get_hf_file_bytes(
-    file_name: str, model: str | Path, revision: str | None = "main"
-) -> bytes | None:
-    """Get file contents from HuggingFace repository as bytes."""
-    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)
-
-    if file_path is None:
-        hf_hub_file = hf_hub_download(
-            model, file_name, revision=revision, token=_get_hf_token()
-        )
-        file_path = Path(hf_hub_file)
-
-    if file_path is not None and file_path.is_file():
-        with open(file_path, "rb") as file:
-            return file.read()
-
-    return None
diff --git a/vllm/transformers_utils/gguf_utils.py b/vllm/transformers_utils/gguf_utils.py
index f727b1b4726b..c5b4d3f00090 100644
--- a/vllm/transformers_utils/gguf_utils.py
+++ b/vllm/transformers_utils/gguf_utils.py
@@ -9,7 +9,7 @@ from gguf.constants import Keys, VisionProjectorType
 from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
 
 from vllm.logger import init_logger
-from vllm.transformers_utils.config import list_filtered_repo_files
+from vllm.transformers_utils.repo_utils import list_filtered_repo_files
 
 logger = init_logger(__name__)
 
diff --git a/vllm/transformers_utils/repo_utils.py b/vllm/transformers_utils/repo_utils.py
new file mode 100644
index 000000000000..3ccec04fc487
--- /dev/null
+++ b/vllm/transformers_utils/repo_utils.py
@@ -0,0 +1,287 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Utilities for model repo interaction."""
+
+import fnmatch
+import json
+import os
+import time
+from collections.abc import Callable
+from functools import cache
+from pathlib import Path
+from typing import TypeVar
+
+import huggingface_hub
+from huggingface_hub import (
+    hf_hub_download,
+    try_to_load_from_cache,
+)
+from huggingface_hub import list_repo_files as hf_list_repo_files
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    HfHubHTTPError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
+
+from vllm import envs
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+def _get_hf_token() -> str | None:
+    """
+    Get the HuggingFace token from environment variable.
+
+    Returns None if the token is not set, is an empty string,
+    or contains only whitespace.
+    This follows the same pattern as huggingface_hub library which
+    treats empty string tokens as None to avoid authentication errors.
+    """
+    token = os.getenv("HF_TOKEN")
+    if token and token.strip():
+        return token
+    return None
+
+
+_R = TypeVar("_R")
+
+
+def with_retry(
+    func: Callable[[], _R],
+    log_msg: str,
+    max_retries: int = 2,
+    retry_delay: int = 2,
+) -> _R:
+    for attempt in range(max_retries):
+        try:
+            return func()
+        except Exception as e:
+            if attempt == max_retries - 1:
+                logger.error("%s: %s", log_msg, e)
+                raise
+            logger.error(
+                "%s: %s, retrying %d of %d", log_msg, e, attempt + 1, max_retries
+            )
+            time.sleep(retry_delay)
+            retry_delay *= 2
+
+    raise AssertionError("Should not be reached")
+
+
+# @cache doesn't cache exceptions
+@cache
+def list_repo_files(
+    repo_id: str,
+    *,
+    revision: str | None = None,
+    repo_type: str | None = None,
+    token: str | bool | None = None,
+) -> list[str]:
+    def lookup_files() -> list[str]:
+        # directly list files if model is local
+        if (local_path := Path(repo_id)).exists():
+            return [
+                str(file.relative_to(local_path))
+                for file in local_path.rglob("*")
+                if file.is_file()
+            ]
+        # if model is remote, use hf_hub api to list files
+        try:
+            if envs.VLLM_USE_MODELSCOPE:
+                from vllm.transformers_utils.utils import modelscope_list_repo_files
+
+                return modelscope_list_repo_files(
+                    repo_id,
+                    revision=revision,
+                    token=os.getenv("MODELSCOPE_API_TOKEN", None),
+                )
+            return hf_list_repo_files(
+                repo_id, revision=revision, repo_type=repo_type, token=token
+            )
+        except huggingface_hub.errors.OfflineModeIsEnabled:
+            # Don't raise in offline mode,
+            # all we know is that we don't have this
+            # file cached.
+            return []
+
+    return with_retry(lookup_files, "Error retrieving file list")
+
+
+def list_filtered_repo_files(
+    model_name_or_path: str,
+    allow_patterns: list[str],
+    revision: str | None = None,
+    repo_type: str | None = None,
+    token: str | bool | None = None,
+) -> list[str]:
+    try:
+        all_files = list_repo_files(
+            repo_id=model_name_or_path,
+            revision=revision,
+            token=token,
+            repo_type=repo_type,
+        )
+    except Exception:
+        logger.error(
+            "Error retrieving file list. Please ensure your `model_name_or_path`"
+            "`repo_type`, `token` and `revision` arguments are correctly set. "
+            "Returning an empty list."
+        )
+        return []
+
+    file_list = []
+    # Filter patterns on filenames
+    for pattern in allow_patterns:
+        file_list.extend(
+            [
+                file
+                for file in all_files
+                if fnmatch.fnmatch(os.path.basename(file), pattern)
+            ]
+        )
+    return file_list
+
+
+def file_exists(
+    repo_id: str,
+    file_name: str,
+    *,
+    repo_type: str | None = None,
+    revision: str | None = None,
+    token: str | bool | None = None,
+) -> bool:
+    file_list = list_repo_files(
+        repo_id, repo_type=repo_type, revision=revision, token=token
+    )
+    return file_name in file_list
+
+
+# In offline mode the result can be a false negative
+def file_or_path_exists(
+    model: str | Path, config_name: str, revision: str | None
+) -> bool:
+    if (local_path := Path(model)).exists():
+        return (local_path / config_name).is_file()
+
+    # Offline mode support: Check if config file is cached already
+    cached_filepath = try_to_load_from_cache(
+        repo_id=model, filename=config_name, revision=revision
+    )
+    if isinstance(cached_filepath, str):
+        # The config file exists in cache- we can continue trying to load
+        return True
+
+    # NB: file_exists will only check for the existence of the config file on
+    # hf_hub. This will fail in offline mode.
+
+    # Call HF to check if the file exists
+    return file_exists(
+        str(model), config_name, revision=revision, token=_get_hf_token()
+    )
+
+
+def get_model_path(model: str | Path, revision: str | None = None):
+    if os.path.exists(model):
+        return model
+    assert huggingface_hub.constants.HF_HUB_OFFLINE
+    common_kwargs = {
+        "local_files_only": huggingface_hub.constants.HF_HUB_OFFLINE,
+        "revision": revision,
+    }
+
+    if envs.VLLM_USE_MODELSCOPE:
+        from modelscope.hub.snapshot_download import snapshot_download
+
+        return snapshot_download(model_id=model, **common_kwargs)
+
+    from huggingface_hub import snapshot_download
+
+    return snapshot_download(repo_id=model, **common_kwargs)
+
+
+def get_hf_file_bytes(
+    file_name: str, model: str | Path, revision: str | None = "main"
+) -> bytes | None:
+    """Get file contents from HuggingFace repository as bytes."""
+    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)
+
+    if file_path is None:
+        hf_hub_file = hf_hub_download(
+            model, file_name, revision=revision, token=_get_hf_token()
+        )
+        file_path = Path(hf_hub_file)
+
+    if file_path is not None and file_path.is_file():
+        with open(file_path, "rb") as file:
+            return file.read()
+
+    return None
+
+
+def try_get_local_file(
+    model: str | Path, file_name: str, revision: str | None = "main"
+) -> Path | None:
+    file_path = Path(model) / file_name
+    if file_path.is_file():
+        return file_path
+    else:
+        try:
+            cached_filepath = try_to_load_from_cache(
+                repo_id=model, filename=file_name, revision=revision
+            )
+            if isinstance(cached_filepath, str):
+                return Path(cached_filepath)
+        except ValueError:
+            ...
+    return None
+
+
+def get_hf_file_to_dict(
+    file_name: str, model: str | Path, revision: str | None = "main"
+):
+    """
+    Downloads a file from the Hugging Face Hub and returns
+    its contents as a dictionary.
+
+    Parameters:
+    - file_name (str): The name of the file to download.
+    - model (str): The name of the model on the Hugging Face Hub.
+    - revision (str): The specific version of the model.
+
+    Returns:
+    - config_dict (dict): A dictionary containing
+    the contents of the downloaded file.
+    """
+
+    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)
+
+    if file_path is None:
+        try:
+            hf_hub_file = hf_hub_download(model, file_name, revision=revision)
+        except huggingface_hub.errors.OfflineModeIsEnabled:
+            return None
+        except (
+            RepositoryNotFoundError,
+            RevisionNotFoundError,
+            EntryNotFoundError,
+            LocalEntryNotFoundError,
+        ) as e:
+            logger.debug("File or repository not found in hf_hub_download", e)
+            return None
+        except HfHubHTTPError as e:
+            logger.warning(
+                "Cannot connect to Hugging Face Hub. Skipping file download for '%s':",
+                file_name,
+                exc_info=e,
+            )
+            return None
+        file_path = Path(hf_hub_file)
+
+    if file_path is not None and file_path.is_file():
+        with open(file_path) as file:
+            return json.load(file)
+
+    return None
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 929dc8bf481c..9eb7fe37912b 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -15,11 +15,9 @@ from typing_extensions import assert_never
 
 from vllm import envs
 from vllm.logger import init_logger
-from vllm.transformers_utils.config import (
-    get_sentence_transformer_tokenizer_config,
-    list_filtered_repo_files,
-)
+from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config
 from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
+from vllm.transformers_utils.repo_utils import list_filtered_repo_files
 from vllm.transformers_utils.tokenizers import MistralTokenizer
 from vllm.transformers_utils.utils import (
     check_gguf_file,