Prevent unecessary requests to huggingface hub (#12837)

2025-12-11 19:27:41 +08:00 · 2025-02-07 02:37:41 -03:00 · 2025-02-07 02:37:41 -03:00 · 6e1fc61f0f
commit 6e1fc61f0f
parent aa375dca9f
2 changed files with 96 additions and 40 deletions
--- a/tests/entrypoints/offline_mode/test_offline_mode.py
+++ b/tests/entrypoints/offline_mode/test_offline_mode.py
@ -4,6 +4,7 @@ import importlib
 import sys
 import pytest
 import urllib3
 from vllm import LLM
 from vllm.distributed import cleanup_dist_env_and_memory
@ -28,6 +29,15 @@ MODEL_CONFIGS = [
        "tensor_parallel_size": 1,
        "tokenizer_mode": "mistral",
    },
    {
        "model": "sentence-transformers/all-MiniLM-L12-v2",
        "enforce_eager": True,
        "gpu_memory_utilization": 0.20,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
    },
 ]
@ -47,6 +57,16 @@ def test_offline_mode(monkeypatch):
    # Set HF to offline mode and ensure we can still construct an LLM
    try:
        monkeypatch.setenv("HF_HUB_OFFLINE", "1")
        monkeypatch.setenv("VLLM_NO_USAGE_STATS", "1")
        def disable_connect(*args, **kwargs):
            raise RuntimeError("No http calls allowed")
        monkeypatch.setattr(urllib3.connection.HTTPConnection, "connect",
                            disable_connect)
        monkeypatch.setattr(urllib3.connection.HTTPSConnection, "connect",
                            disable_connect)
        # Need to re-import huggingface_hub and friends to setup offline mode
        _re_import_modules()
        # Cached model files should be used in offline mode
@ -56,6 +76,7 @@ def test_offline_mode(monkeypatch):
        # Reset the environment after the test
        # NB: Assuming tests are run in online mode
        monkeypatch.delenv("HF_HUB_OFFLINE")
        monkeypatch.delenv("VLLM_NO_USAGE_STATS")
        _re_import_modules()
        pass
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@ -10,7 +10,7 @@ import huggingface_hub
 from huggingface_hub import (file_exists, hf_hub_download, list_repo_files,
                             try_to_load_from_cache)
 from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
-                                   LocalEntryNotFoundError,
+                                   HFValidationError, LocalEntryNotFoundError,
                                   RepositoryNotFoundError,
                                   RevisionNotFoundError)
 from torch import nn
@ -265,49 +265,66 @@ def get_config(
    return config
 def try_get_local_file(model: Union[str, Path],
                       file_name: str,
                       revision: Optional[str] = 'main') -> Optional[Path]:
    file_path = Path(model) / file_name
    if file_path.is_file():
        return file_path
    else:
        try:
            cached_filepath = try_to_load_from_cache(repo_id=model,
                                                     filename=file_name,
                                                     revision=revision)
            if isinstance(cached_filepath, str):
                return Path(cached_filepath)
        except HFValidationError:
            ...
    return None
 def get_hf_file_to_dict(file_name: str,
                        model: Union[str, Path],
                        revision: Optional[str] = 'main'):
    """
-    Downloads a file from the Hugging Face Hub and returns 
+    Downloads a file from the Hugging Face Hub and returns
    its contents as a dictionary.
    Parameters:
    - file_name (str): The name of the file to download.
    - model (str): The name of the model on the Hugging Face Hub.
-    - revision (str): The specific version of the model. 
+    - revision (str): The specific version of the model.
    Returns:
-    - config_dict (dict): A dictionary containing 
+    - config_dict (dict): A dictionary containing
    the contents of the downloaded file.
    """
    file_path = Path(model) / file_name
-    if file_or_path_exists(model=model,
+    file_path = try_get_local_file(model=model,
-                           config_name=file_name,
+                                   file_name=file_name,
-                           revision=revision):
+                                   revision=revision)
-        if not file_path.is_file():
+    if file_path is None and file_or_path_exists(
-            try:
+            model=model, config_name=file_name, revision=revision):
-                hf_hub_file = hf_hub_download(model,
+        try:
-                                              file_name,
+            hf_hub_file = hf_hub_download(model, file_name, revision=revision)
-                                              revision=revision)
+        except (RepositoryNotFoundError, RevisionNotFoundError,
-            except (RepositoryNotFoundError, RevisionNotFoundError,
+                EntryNotFoundError, LocalEntryNotFoundError) as e:
-                    EntryNotFoundError, LocalEntryNotFoundError) as e:
+            logger.debug("File or repository not found in hf_hub_download", e)
-                logger.debug("File or repository not found in hf_hub_download",
+            return None
-                             e)
+        except HfHubHTTPError as e:
-                return None
+            logger.warning(
-            except HfHubHTTPError as e:
+                "Cannot connect to Hugging Face Hub. Skipping file "
-                logger.warning(
+                "download for '%s':",
-                    "Cannot connect to Hugging Face Hub. Skipping file "
+                file_name,
-                    "download for '%s':",
+                exc_info=e)
-                    file_name,
+            return None
-                    exc_info=e)
+        file_path = Path(hf_hub_file)
                return None
            file_path = Path(hf_hub_file)
    if file_path is not None and file_path.is_file():
        with open(file_path) as file:
            return json.load(file)
    return None
@ -328,7 +345,12 @@ def get_pooling_config(model: str, revision: Optional[str] = 'main'):
    """
    modules_file_name = "modules.json"
-    modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)
+
    modules_dict = None
    if file_or_path_exists(model=model,
                           config_name=modules_file_name,
                           revision=revision):
        modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)
    if modules_dict is None:
        return None
@ -382,17 +404,17 @@ def get_sentence_transformer_tokenizer_config(model: str,
                                              revision: Optional[str] = 'main'
                                              ):
    """
-    Returns the tokenization configuration dictionary for a 
+    Returns the tokenization configuration dictionary for a
    given Sentence Transformer BERT model.
    Parameters:
-    - model (str): The name of the Sentence Transformer 
+    - model (str): The name of the Sentence Transformer
    BERT model.
    - revision (str, optional): The revision of the m
    odel to use. Defaults to 'main'.
    Returns:
-    - dict: A dictionary containing the configuration parameters 
+    - dict: A dictionary containing the configuration parameters
    for the Sentence Transformer BERT model.
    """
    sentence_transformer_config_files = [
@ -404,20 +426,33 @@ def get_sentence_transformer_tokenizer_config(model: str,
        "sentence_xlm-roberta_config.json",
        "sentence_xlnet_config.json",
    ]
    try:
        # If model is on HuggingfaceHub, get the repo files
        repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN)
    except Exception as e:
        logger.debug("Error getting repo files", e)
        repo_files = []
    encoder_dict = None
-    for config_name in sentence_transformer_config_files:
+
-        if config_name in repo_files or Path(model).exists():
+    for config_file in sentence_transformer_config_files:
-            encoder_dict = get_hf_file_to_dict(config_name, model, revision)
+        if try_get_local_file(model=model,
                              file_name=config_file,
                              revision=revision) is not None:
            encoder_dict = get_hf_file_to_dict(config_file, model, revision)
            if encoder_dict:
                break
    if not encoder_dict:
        try:
            # If model is on HuggingfaceHub, get the repo files
            repo_files = list_repo_files(model,
                                         revision=revision,
                                         token=HF_TOKEN)
        except Exception as e:
            logger.debug("Error getting repo files", e)
            repo_files = []
        for config_name in sentence_transformer_config_files:
            if config_name in repo_files:
                encoder_dict = get_hf_file_to_dict(config_name, model,
                                                   revision)
                if encoder_dict:
                    break
    if not encoder_dict:
        return None