mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 01:24:54 +08:00
Prevent unecessary requests to huggingface hub (#12837)
This commit is contained in:
parent
aa375dca9f
commit
6e1fc61f0f
@ -4,6 +4,7 @@ import importlib
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
import urllib3
|
||||
|
||||
from vllm import LLM
|
||||
from vllm.distributed import cleanup_dist_env_and_memory
|
||||
@ -28,6 +29,15 @@ MODEL_CONFIGS = [
|
||||
"tensor_parallel_size": 1,
|
||||
"tokenizer_mode": "mistral",
|
||||
},
|
||||
{
|
||||
"model": "sentence-transformers/all-MiniLM-L12-v2",
|
||||
"enforce_eager": True,
|
||||
"gpu_memory_utilization": 0.20,
|
||||
"max_model_len": 64,
|
||||
"max_num_batched_tokens": 64,
|
||||
"max_num_seqs": 64,
|
||||
"tensor_parallel_size": 1,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@ -47,6 +57,16 @@ def test_offline_mode(monkeypatch):
|
||||
# Set HF to offline mode and ensure we can still construct an LLM
|
||||
try:
|
||||
monkeypatch.setenv("HF_HUB_OFFLINE", "1")
|
||||
monkeypatch.setenv("VLLM_NO_USAGE_STATS", "1")
|
||||
|
||||
def disable_connect(*args, **kwargs):
|
||||
raise RuntimeError("No http calls allowed")
|
||||
|
||||
monkeypatch.setattr(urllib3.connection.HTTPConnection, "connect",
|
||||
disable_connect)
|
||||
monkeypatch.setattr(urllib3.connection.HTTPSConnection, "connect",
|
||||
disable_connect)
|
||||
|
||||
# Need to re-import huggingface_hub and friends to setup offline mode
|
||||
_re_import_modules()
|
||||
# Cached model files should be used in offline mode
|
||||
@ -56,6 +76,7 @@ def test_offline_mode(monkeypatch):
|
||||
# Reset the environment after the test
|
||||
# NB: Assuming tests are run in online mode
|
||||
monkeypatch.delenv("HF_HUB_OFFLINE")
|
||||
monkeypatch.delenv("VLLM_NO_USAGE_STATS")
|
||||
_re_import_modules()
|
||||
pass
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ import huggingface_hub
|
||||
from huggingface_hub import (file_exists, hf_hub_download, list_repo_files,
|
||||
try_to_load_from_cache)
|
||||
from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
|
||||
LocalEntryNotFoundError,
|
||||
HFValidationError, LocalEntryNotFoundError,
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError)
|
||||
from torch import nn
|
||||
@ -265,49 +265,66 @@ def get_config(
|
||||
return config
|
||||
|
||||
|
||||
def try_get_local_file(model: Union[str, Path],
|
||||
file_name: str,
|
||||
revision: Optional[str] = 'main') -> Optional[Path]:
|
||||
file_path = Path(model) / file_name
|
||||
if file_path.is_file():
|
||||
return file_path
|
||||
else:
|
||||
try:
|
||||
cached_filepath = try_to_load_from_cache(repo_id=model,
|
||||
filename=file_name,
|
||||
revision=revision)
|
||||
if isinstance(cached_filepath, str):
|
||||
return Path(cached_filepath)
|
||||
except HFValidationError:
|
||||
...
|
||||
return None
|
||||
|
||||
|
||||
def get_hf_file_to_dict(file_name: str,
|
||||
model: Union[str, Path],
|
||||
revision: Optional[str] = 'main'):
|
||||
"""
|
||||
Downloads a file from the Hugging Face Hub and returns
|
||||
Downloads a file from the Hugging Face Hub and returns
|
||||
its contents as a dictionary.
|
||||
|
||||
Parameters:
|
||||
- file_name (str): The name of the file to download.
|
||||
- model (str): The name of the model on the Hugging Face Hub.
|
||||
- revision (str): The specific version of the model.
|
||||
- revision (str): The specific version of the model.
|
||||
|
||||
Returns:
|
||||
- config_dict (dict): A dictionary containing
|
||||
- config_dict (dict): A dictionary containing
|
||||
the contents of the downloaded file.
|
||||
"""
|
||||
file_path = Path(model) / file_name
|
||||
|
||||
if file_or_path_exists(model=model,
|
||||
config_name=file_name,
|
||||
revision=revision):
|
||||
file_path = try_get_local_file(model=model,
|
||||
file_name=file_name,
|
||||
revision=revision)
|
||||
|
||||
if not file_path.is_file():
|
||||
try:
|
||||
hf_hub_file = hf_hub_download(model,
|
||||
file_name,
|
||||
revision=revision)
|
||||
except (RepositoryNotFoundError, RevisionNotFoundError,
|
||||
EntryNotFoundError, LocalEntryNotFoundError) as e:
|
||||
logger.debug("File or repository not found in hf_hub_download",
|
||||
e)
|
||||
return None
|
||||
except HfHubHTTPError as e:
|
||||
logger.warning(
|
||||
"Cannot connect to Hugging Face Hub. Skipping file "
|
||||
"download for '%s':",
|
||||
file_name,
|
||||
exc_info=e)
|
||||
return None
|
||||
file_path = Path(hf_hub_file)
|
||||
if file_path is None and file_or_path_exists(
|
||||
model=model, config_name=file_name, revision=revision):
|
||||
try:
|
||||
hf_hub_file = hf_hub_download(model, file_name, revision=revision)
|
||||
except (RepositoryNotFoundError, RevisionNotFoundError,
|
||||
EntryNotFoundError, LocalEntryNotFoundError) as e:
|
||||
logger.debug("File or repository not found in hf_hub_download", e)
|
||||
return None
|
||||
except HfHubHTTPError as e:
|
||||
logger.warning(
|
||||
"Cannot connect to Hugging Face Hub. Skipping file "
|
||||
"download for '%s':",
|
||||
file_name,
|
||||
exc_info=e)
|
||||
return None
|
||||
file_path = Path(hf_hub_file)
|
||||
|
||||
if file_path is not None and file_path.is_file():
|
||||
with open(file_path) as file:
|
||||
return json.load(file)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@ -328,7 +345,12 @@ def get_pooling_config(model: str, revision: Optional[str] = 'main'):
|
||||
"""
|
||||
|
||||
modules_file_name = "modules.json"
|
||||
modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)
|
||||
|
||||
modules_dict = None
|
||||
if file_or_path_exists(model=model,
|
||||
config_name=modules_file_name,
|
||||
revision=revision):
|
||||
modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)
|
||||
|
||||
if modules_dict is None:
|
||||
return None
|
||||
@ -382,17 +404,17 @@ def get_sentence_transformer_tokenizer_config(model: str,
|
||||
revision: Optional[str] = 'main'
|
||||
):
|
||||
"""
|
||||
Returns the tokenization configuration dictionary for a
|
||||
Returns the tokenization configuration dictionary for a
|
||||
given Sentence Transformer BERT model.
|
||||
|
||||
Parameters:
|
||||
- model (str): The name of the Sentence Transformer
|
||||
- model (str): The name of the Sentence Transformer
|
||||
BERT model.
|
||||
- revision (str, optional): The revision of the m
|
||||
odel to use. Defaults to 'main'.
|
||||
|
||||
Returns:
|
||||
- dict: A dictionary containing the configuration parameters
|
||||
- dict: A dictionary containing the configuration parameters
|
||||
for the Sentence Transformer BERT model.
|
||||
"""
|
||||
sentence_transformer_config_files = [
|
||||
@ -404,20 +426,33 @@ def get_sentence_transformer_tokenizer_config(model: str,
|
||||
"sentence_xlm-roberta_config.json",
|
||||
"sentence_xlnet_config.json",
|
||||
]
|
||||
try:
|
||||
# If model is on HuggingfaceHub, get the repo files
|
||||
repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN)
|
||||
except Exception as e:
|
||||
logger.debug("Error getting repo files", e)
|
||||
repo_files = []
|
||||
|
||||
encoder_dict = None
|
||||
for config_name in sentence_transformer_config_files:
|
||||
if config_name in repo_files or Path(model).exists():
|
||||
encoder_dict = get_hf_file_to_dict(config_name, model, revision)
|
||||
|
||||
for config_file in sentence_transformer_config_files:
|
||||
if try_get_local_file(model=model,
|
||||
file_name=config_file,
|
||||
revision=revision) is not None:
|
||||
encoder_dict = get_hf_file_to_dict(config_file, model, revision)
|
||||
if encoder_dict:
|
||||
break
|
||||
|
||||
if not encoder_dict:
|
||||
try:
|
||||
# If model is on HuggingfaceHub, get the repo files
|
||||
repo_files = list_repo_files(model,
|
||||
revision=revision,
|
||||
token=HF_TOKEN)
|
||||
except Exception as e:
|
||||
logger.debug("Error getting repo files", e)
|
||||
repo_files = []
|
||||
|
||||
for config_name in sentence_transformer_config_files:
|
||||
if config_name in repo_files:
|
||||
encoder_dict = get_hf_file_to_dict(config_name, model,
|
||||
revision)
|
||||
if encoder_dict:
|
||||
break
|
||||
|
||||
if not encoder_dict:
|
||||
return None
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user