mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 20:24:34 +08:00
[Chore]: Reorganize gguf utils funtions under transformers_utils (#29891)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
52cb349fc0
commit
63b1da76ba
@ -203,7 +203,7 @@ class TestGGUFModelLoader:
|
||||
@patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
|
||||
@patch("vllm.config.model.get_config")
|
||||
@patch("vllm.config.model.is_gguf", return_value=False)
|
||||
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False)
|
||||
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False)
|
||||
@patch("os.path.isfile", return_value=False)
|
||||
def test_prepare_weights_invalid_format(
|
||||
self,
|
||||
|
||||
@ -5,13 +5,15 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.transformers_utils.gguf_utils import (
|
||||
is_gguf,
|
||||
is_remote_gguf,
|
||||
split_remote_gguf,
|
||||
)
|
||||
from vllm.transformers_utils.utils import (
|
||||
is_cloud_storage,
|
||||
is_gcs,
|
||||
is_gguf,
|
||||
is_remote_gguf,
|
||||
is_s3,
|
||||
split_remote_gguf,
|
||||
)
|
||||
|
||||
|
||||
@ -132,7 +134,7 @@ class TestSplitRemoteGGUF:
|
||||
class TestIsGGUF:
|
||||
"""Test is_gguf utility function."""
|
||||
|
||||
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=True)
|
||||
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=True)
|
||||
def test_is_gguf_with_local_file(self, mock_check_gguf):
|
||||
"""Test is_gguf with local GGUF file."""
|
||||
assert is_gguf("/path/to/model.gguf")
|
||||
@ -149,7 +151,7 @@ class TestIsGGUF:
|
||||
assert not is_gguf("repo/model:quant")
|
||||
assert not is_gguf("repo/model:INVALID")
|
||||
|
||||
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False)
|
||||
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False)
|
||||
def test_is_gguf_false(self, mock_check_gguf):
|
||||
"""Test is_gguf returns False for non-GGUF models."""
|
||||
assert not is_gguf("unsloth/Qwen3-0.6B")
|
||||
|
||||
@ -37,15 +37,13 @@ from vllm.transformers_utils.config import (
|
||||
uses_xdrope_dim,
|
||||
)
|
||||
from vllm.transformers_utils.gguf_utils import (
|
||||
maybe_patch_hf_config_from_gguf,
|
||||
)
|
||||
from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
|
||||
from vllm.transformers_utils.utils import (
|
||||
is_gguf,
|
||||
is_remote_gguf,
|
||||
maybe_model_redirect,
|
||||
maybe_patch_hf_config_from_gguf,
|
||||
split_remote_gguf,
|
||||
)
|
||||
from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
|
||||
from vllm.transformers_utils.utils import maybe_model_redirect
|
||||
from vllm.utils.import_utils import LazyLoader
|
||||
from vllm.utils.torch_utils import common_broadcastable_dtype
|
||||
|
||||
|
||||
@ -86,8 +86,9 @@ from vllm.transformers_utils.config import (
|
||||
is_interleaved,
|
||||
maybe_override_with_speculators,
|
||||
)
|
||||
from vllm.transformers_utils.gguf_utils import is_gguf
|
||||
from vllm.transformers_utils.repo_utils import get_model_path
|
||||
from vllm.transformers_utils.utils import is_cloud_storage, is_gguf
|
||||
from vllm.transformers_utils.utils import is_cloud_storage
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.mem_constants import GiB_bytes
|
||||
from vllm.utils.network_utils import get_ip
|
||||
|
||||
@ -11,14 +11,14 @@ from typing_extensions import assert_never
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
|
||||
from vllm.transformers_utils.repo_utils import list_filtered_repo_files
|
||||
from vllm.transformers_utils.utils import (
|
||||
from vllm.transformers_utils.gguf_utils import (
|
||||
check_gguf_file,
|
||||
get_gguf_file_path_from_hf,
|
||||
is_gguf,
|
||||
is_remote_gguf,
|
||||
split_remote_gguf,
|
||||
)
|
||||
from vllm.transformers_utils.repo_utils import list_filtered_repo_files
|
||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
|
||||
from .protocol import TokenizerLike
|
||||
|
||||
@ -26,8 +26,15 @@ from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
|
||||
|
||||
from vllm import envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.transformers_utils.utils import parse_safetensors_file_metadata
|
||||
|
||||
from .config_parser_base import ConfigParserBase
|
||||
from .gguf_utils import (
|
||||
check_gguf_file,
|
||||
is_gguf,
|
||||
is_remote_gguf,
|
||||
split_remote_gguf,
|
||||
)
|
||||
from .repo_utils import (
|
||||
_get_hf_token,
|
||||
file_or_path_exists,
|
||||
@ -36,13 +43,6 @@ from .repo_utils import (
|
||||
try_get_local_file,
|
||||
with_retry,
|
||||
)
|
||||
from .utils import (
|
||||
check_gguf_file,
|
||||
is_gguf,
|
||||
is_remote_gguf,
|
||||
parse_safetensors_file_metadata,
|
||||
split_remote_gguf,
|
||||
)
|
||||
|
||||
if envs.VLLM_USE_MODELSCOPE:
|
||||
from modelscope import AutoConfig
|
||||
|
||||
@ -2,10 +2,14 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""GGUF utility functions."""
|
||||
|
||||
from functools import cache
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
|
||||
import gguf
|
||||
import regex as re
|
||||
from gguf.constants import Keys, VisionProjectorType
|
||||
from gguf.quants import GGMLQuantizationType
|
||||
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
|
||||
|
||||
from vllm.logger import init_logger
|
||||
@ -15,6 +19,73 @@ from .repo_utils import list_filtered_repo_files
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
@cache
|
||||
def check_gguf_file(model: str | PathLike) -> bool:
|
||||
"""Check if the file is a GGUF model."""
|
||||
model = Path(model)
|
||||
if not model.is_file():
|
||||
return False
|
||||
elif model.suffix == ".gguf":
|
||||
return True
|
||||
|
||||
try:
|
||||
with model.open("rb") as f:
|
||||
header = f.read(4)
|
||||
|
||||
return header == b"GGUF"
|
||||
except Exception as e:
|
||||
logger.debug("Error reading file %s: %s", model, e)
|
||||
return False
|
||||
|
||||
|
||||
@cache
|
||||
def is_remote_gguf(model: str | Path) -> bool:
|
||||
"""Check if the model is a remote GGUF model."""
|
||||
pattern = r"^[a-zA-Z0-9][a-zA-Z0-9._-]*/[a-zA-Z0-9][a-zA-Z0-9._-]*:[A-Za-z0-9_+-]+$"
|
||||
model = str(model)
|
||||
if re.fullmatch(pattern, model):
|
||||
_, quant_type = model.rsplit(":", 1)
|
||||
return is_valid_gguf_quant_type(quant_type)
|
||||
return False
|
||||
|
||||
|
||||
def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
|
||||
"""Check if the quant type is a valid GGUF quant type."""
|
||||
return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None
|
||||
|
||||
|
||||
def split_remote_gguf(model: str | Path) -> tuple[str, str]:
|
||||
"""Split the model into repo_id and quant type."""
|
||||
model = str(model)
|
||||
if is_remote_gguf(model):
|
||||
parts = model.rsplit(":", 1)
|
||||
return (parts[0], parts[1])
|
||||
raise ValueError(
|
||||
f"Wrong GGUF model or invalid GGUF quant type: {model}.\n"
|
||||
"- It should be in repo_id:quant_type format.\n"
|
||||
f"- Valid GGMLQuantizationType values: {GGMLQuantizationType._member_names_}",
|
||||
)
|
||||
|
||||
|
||||
def is_gguf(model: str | Path) -> bool:
|
||||
"""Check if the model is a GGUF model.
|
||||
|
||||
Args:
|
||||
model: Model name, path, or Path object to check.
|
||||
|
||||
Returns:
|
||||
True if the model is a GGUF model, False otherwise.
|
||||
"""
|
||||
model = str(model)
|
||||
|
||||
# Check if it's a local GGUF file
|
||||
if check_gguf_file(model):
|
||||
return True
|
||||
|
||||
# Check if it's a remote GGUF model (repo_id:quant_type format)
|
||||
return is_remote_gguf(model)
|
||||
|
||||
|
||||
def detect_gguf_multimodal(model: str) -> Path | None:
|
||||
"""Check if GGUF model has multimodal projector file.
|
||||
|
||||
|
||||
@ -18,7 +18,8 @@ from transformers.processing_utils import ProcessorMixin
|
||||
from transformers.video_processing_utils import BaseVideoProcessor
|
||||
from typing_extensions import TypeVar
|
||||
|
||||
from vllm.transformers_utils.utils import convert_model_repo_to_path, is_gguf
|
||||
from vllm.transformers_utils.gguf_utils import is_gguf
|
||||
from vllm.transformers_utils.utils import convert_model_repo_to_path
|
||||
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@ -9,8 +9,6 @@ from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from gguf import GGMLQuantizationType
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
@ -29,76 +27,6 @@ def is_cloud_storage(model_or_path: str) -> bool:
|
||||
return is_s3(model_or_path) or is_gcs(model_or_path)
|
||||
|
||||
|
||||
@cache
|
||||
def check_gguf_file(model: str | PathLike) -> bool:
|
||||
"""Check if the file is a GGUF model."""
|
||||
model = Path(model)
|
||||
if not model.is_file():
|
||||
return False
|
||||
elif model.suffix == ".gguf":
|
||||
return True
|
||||
|
||||
try:
|
||||
with model.open("rb") as f:
|
||||
header = f.read(4)
|
||||
|
||||
return header == b"GGUF"
|
||||
except Exception as e:
|
||||
logger.debug("Error reading file %s: %s", model, e)
|
||||
return False
|
||||
|
||||
|
||||
@cache
|
||||
def is_remote_gguf(model: str | Path) -> bool:
|
||||
"""Check if the model is a remote GGUF model."""
|
||||
model = str(model)
|
||||
return (
|
||||
(not is_cloud_storage(model))
|
||||
and (not model.startswith(("http://", "https://")))
|
||||
and ("/" in model and ":" in model)
|
||||
and is_valid_gguf_quant_type(model.rsplit(":", 1)[1])
|
||||
)
|
||||
|
||||
|
||||
def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
|
||||
"""Check if the quant type is a valid GGUF quant type."""
|
||||
return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None
|
||||
|
||||
|
||||
def split_remote_gguf(model: str | Path) -> tuple[str, str]:
|
||||
"""Split the model into repo_id and quant type."""
|
||||
model = str(model)
|
||||
if is_remote_gguf(model):
|
||||
parts = model.rsplit(":", 1)
|
||||
return (parts[0], parts[1])
|
||||
raise ValueError(
|
||||
"Wrong GGUF model or invalid GGUF quant type: %s.\n"
|
||||
"- It should be in repo_id:quant_type format.\n"
|
||||
"- Valid GGMLQuantizationType values: %s",
|
||||
model,
|
||||
GGMLQuantizationType._member_names_,
|
||||
)
|
||||
|
||||
|
||||
def is_gguf(model: str | Path) -> bool:
|
||||
"""Check if the model is a GGUF model.
|
||||
|
||||
Args:
|
||||
model: Model name, path, or Path object to check.
|
||||
|
||||
Returns:
|
||||
True if the model is a GGUF model, False otherwise.
|
||||
"""
|
||||
model = str(model)
|
||||
|
||||
# Check if it's a local GGUF file
|
||||
if check_gguf_file(model):
|
||||
return True
|
||||
|
||||
# Check if it's a remote GGUF model (repo_id:quant_type format)
|
||||
return is_remote_gguf(model)
|
||||
|
||||
|
||||
def modelscope_list_repo_files(
|
||||
repo_id: str,
|
||||
revision: str | None = None,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user