[Chore]: Reorganize gguf utils funtions under transformers_utils (#29891)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py 2025-12-03 01:33:23 +08:00 committed by GitHub
parent 52cb349fc0
commit 63b1da76ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 96 additions and 95 deletions

View File

@ -203,7 +203,7 @@ class TestGGUFModelLoader:
@patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
@patch("vllm.config.model.get_config")
@patch("vllm.config.model.is_gguf", return_value=False)
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False)
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False)
@patch("os.path.isfile", return_value=False)
def test_prepare_weights_invalid_format(
self,

View File

@ -5,13 +5,15 @@ from unittest.mock import patch
import pytest
from vllm.transformers_utils.gguf_utils import (
is_gguf,
is_remote_gguf,
split_remote_gguf,
)
from vllm.transformers_utils.utils import (
is_cloud_storage,
is_gcs,
is_gguf,
is_remote_gguf,
is_s3,
split_remote_gguf,
)
@ -132,7 +134,7 @@ class TestSplitRemoteGGUF:
class TestIsGGUF:
"""Test is_gguf utility function."""
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=True)
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=True)
def test_is_gguf_with_local_file(self, mock_check_gguf):
"""Test is_gguf with local GGUF file."""
assert is_gguf("/path/to/model.gguf")
@ -149,7 +151,7 @@ class TestIsGGUF:
assert not is_gguf("repo/model:quant")
assert not is_gguf("repo/model:INVALID")
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False)
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False)
def test_is_gguf_false(self, mock_check_gguf):
"""Test is_gguf returns False for non-GGUF models."""
assert not is_gguf("unsloth/Qwen3-0.6B")

View File

@ -37,15 +37,13 @@ from vllm.transformers_utils.config import (
uses_xdrope_dim,
)
from vllm.transformers_utils.gguf_utils import (
maybe_patch_hf_config_from_gguf,
)
from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
from vllm.transformers_utils.utils import (
is_gguf,
is_remote_gguf,
maybe_model_redirect,
maybe_patch_hf_config_from_gguf,
split_remote_gguf,
)
from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
from vllm.transformers_utils.utils import maybe_model_redirect
from vllm.utils.import_utils import LazyLoader
from vllm.utils.torch_utils import common_broadcastable_dtype

View File

@ -86,8 +86,9 @@ from vllm.transformers_utils.config import (
is_interleaved,
maybe_override_with_speculators,
)
from vllm.transformers_utils.gguf_utils import is_gguf
from vllm.transformers_utils.repo_utils import get_model_path
from vllm.transformers_utils.utils import is_cloud_storage, is_gguf
from vllm.transformers_utils.utils import is_cloud_storage
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.network_utils import get_ip

View File

@ -11,14 +11,14 @@ from typing_extensions import assert_never
import vllm.envs as envs
from vllm.logger import init_logger
from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
from vllm.transformers_utils.repo_utils import list_filtered_repo_files
from vllm.transformers_utils.utils import (
from vllm.transformers_utils.gguf_utils import (
check_gguf_file,
get_gguf_file_path_from_hf,
is_gguf,
is_remote_gguf,
split_remote_gguf,
)
from vllm.transformers_utils.repo_utils import list_filtered_repo_files
from vllm.utils.import_utils import resolve_obj_by_qualname
from .protocol import TokenizerLike

View File

@ -26,8 +26,15 @@ from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
from vllm import envs
from vllm.logger import init_logger
from vllm.transformers_utils.utils import parse_safetensors_file_metadata
from .config_parser_base import ConfigParserBase
from .gguf_utils import (
check_gguf_file,
is_gguf,
is_remote_gguf,
split_remote_gguf,
)
from .repo_utils import (
_get_hf_token,
file_or_path_exists,
@ -36,13 +43,6 @@ from .repo_utils import (
try_get_local_file,
with_retry,
)
from .utils import (
check_gguf_file,
is_gguf,
is_remote_gguf,
parse_safetensors_file_metadata,
split_remote_gguf,
)
if envs.VLLM_USE_MODELSCOPE:
from modelscope import AutoConfig

View File

@ -2,10 +2,14 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""GGUF utility functions."""
from functools import cache
from os import PathLike
from pathlib import Path
import gguf
import regex as re
from gguf.constants import Keys, VisionProjectorType
from gguf.quants import GGMLQuantizationType
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
from vllm.logger import init_logger
@ -15,6 +19,73 @@ from .repo_utils import list_filtered_repo_files
logger = init_logger(__name__)
@cache
def check_gguf_file(model: str | PathLike) -> bool:
"""Check if the file is a GGUF model."""
model = Path(model)
if not model.is_file():
return False
elif model.suffix == ".gguf":
return True
try:
with model.open("rb") as f:
header = f.read(4)
return header == b"GGUF"
except Exception as e:
logger.debug("Error reading file %s: %s", model, e)
return False
@cache
def is_remote_gguf(model: str | Path) -> bool:
"""Check if the model is a remote GGUF model."""
pattern = r"^[a-zA-Z0-9][a-zA-Z0-9._-]*/[a-zA-Z0-9][a-zA-Z0-9._-]*:[A-Za-z0-9_+-]+$"
model = str(model)
if re.fullmatch(pattern, model):
_, quant_type = model.rsplit(":", 1)
return is_valid_gguf_quant_type(quant_type)
return False
def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
"""Check if the quant type is a valid GGUF quant type."""
return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None
def split_remote_gguf(model: str | Path) -> tuple[str, str]:
"""Split the model into repo_id and quant type."""
model = str(model)
if is_remote_gguf(model):
parts = model.rsplit(":", 1)
return (parts[0], parts[1])
raise ValueError(
f"Wrong GGUF model or invalid GGUF quant type: {model}.\n"
"- It should be in repo_id:quant_type format.\n"
f"- Valid GGMLQuantizationType values: {GGMLQuantizationType._member_names_}",
)
def is_gguf(model: str | Path) -> bool:
"""Check if the model is a GGUF model.
Args:
model: Model name, path, or Path object to check.
Returns:
True if the model is a GGUF model, False otherwise.
"""
model = str(model)
# Check if it's a local GGUF file
if check_gguf_file(model):
return True
# Check if it's a remote GGUF model (repo_id:quant_type format)
return is_remote_gguf(model)
def detect_gguf_multimodal(model: str) -> Path | None:
"""Check if GGUF model has multimodal projector file.

View File

@ -18,7 +18,8 @@ from transformers.processing_utils import ProcessorMixin
from transformers.video_processing_utils import BaseVideoProcessor
from typing_extensions import TypeVar
from vllm.transformers_utils.utils import convert_model_repo_to_path, is_gguf
from vllm.transformers_utils.gguf_utils import is_gguf
from vllm.transformers_utils.utils import convert_model_repo_to_path
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
if TYPE_CHECKING:

View File

@ -9,8 +9,6 @@ from os import PathLike
from pathlib import Path
from typing import Any
from gguf import GGMLQuantizationType
import vllm.envs as envs
from vllm.logger import init_logger
@ -29,76 +27,6 @@ def is_cloud_storage(model_or_path: str) -> bool:
return is_s3(model_or_path) or is_gcs(model_or_path)
@cache
def check_gguf_file(model: str | PathLike) -> bool:
"""Check if the file is a GGUF model."""
model = Path(model)
if not model.is_file():
return False
elif model.suffix == ".gguf":
return True
try:
with model.open("rb") as f:
header = f.read(4)
return header == b"GGUF"
except Exception as e:
logger.debug("Error reading file %s: %s", model, e)
return False
@cache
def is_remote_gguf(model: str | Path) -> bool:
"""Check if the model is a remote GGUF model."""
model = str(model)
return (
(not is_cloud_storage(model))
and (not model.startswith(("http://", "https://")))
and ("/" in model and ":" in model)
and is_valid_gguf_quant_type(model.rsplit(":", 1)[1])
)
def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
"""Check if the quant type is a valid GGUF quant type."""
return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None
def split_remote_gguf(model: str | Path) -> tuple[str, str]:
"""Split the model into repo_id and quant type."""
model = str(model)
if is_remote_gguf(model):
parts = model.rsplit(":", 1)
return (parts[0], parts[1])
raise ValueError(
"Wrong GGUF model or invalid GGUF quant type: %s.\n"
"- It should be in repo_id:quant_type format.\n"
"- Valid GGMLQuantizationType values: %s",
model,
GGMLQuantizationType._member_names_,
)
def is_gguf(model: str | Path) -> bool:
"""Check if the model is a GGUF model.
Args:
model: Model name, path, or Path object to check.
Returns:
True if the model is a GGUF model, False otherwise.
"""
model = str(model)
# Check if it's a local GGUF file
if check_gguf_file(model):
return True
# Check if it's a remote GGUF model (repo_id:quant_type format)
return is_remote_gguf(model)
def modelscope_list_repo_files(
repo_id: str,
revision: str | None = None,