mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-13 05:19:07 +08:00
[Chore]: Reorganize gguf utils funtions under transformers_utils (#29891)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
52cb349fc0
commit
63b1da76ba
@ -203,7 +203,7 @@ class TestGGUFModelLoader:
|
|||||||
@patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
|
@patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
|
||||||
@patch("vllm.config.model.get_config")
|
@patch("vllm.config.model.get_config")
|
||||||
@patch("vllm.config.model.is_gguf", return_value=False)
|
@patch("vllm.config.model.is_gguf", return_value=False)
|
||||||
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False)
|
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False)
|
||||||
@patch("os.path.isfile", return_value=False)
|
@patch("os.path.isfile", return_value=False)
|
||||||
def test_prepare_weights_invalid_format(
|
def test_prepare_weights_invalid_format(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -5,13 +5,15 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from vllm.transformers_utils.gguf_utils import (
|
||||||
|
is_gguf,
|
||||||
|
is_remote_gguf,
|
||||||
|
split_remote_gguf,
|
||||||
|
)
|
||||||
from vllm.transformers_utils.utils import (
|
from vllm.transformers_utils.utils import (
|
||||||
is_cloud_storage,
|
is_cloud_storage,
|
||||||
is_gcs,
|
is_gcs,
|
||||||
is_gguf,
|
|
||||||
is_remote_gguf,
|
|
||||||
is_s3,
|
is_s3,
|
||||||
split_remote_gguf,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -132,7 +134,7 @@ class TestSplitRemoteGGUF:
|
|||||||
class TestIsGGUF:
|
class TestIsGGUF:
|
||||||
"""Test is_gguf utility function."""
|
"""Test is_gguf utility function."""
|
||||||
|
|
||||||
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=True)
|
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=True)
|
||||||
def test_is_gguf_with_local_file(self, mock_check_gguf):
|
def test_is_gguf_with_local_file(self, mock_check_gguf):
|
||||||
"""Test is_gguf with local GGUF file."""
|
"""Test is_gguf with local GGUF file."""
|
||||||
assert is_gguf("/path/to/model.gguf")
|
assert is_gguf("/path/to/model.gguf")
|
||||||
@ -149,7 +151,7 @@ class TestIsGGUF:
|
|||||||
assert not is_gguf("repo/model:quant")
|
assert not is_gguf("repo/model:quant")
|
||||||
assert not is_gguf("repo/model:INVALID")
|
assert not is_gguf("repo/model:INVALID")
|
||||||
|
|
||||||
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False)
|
@patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False)
|
||||||
def test_is_gguf_false(self, mock_check_gguf):
|
def test_is_gguf_false(self, mock_check_gguf):
|
||||||
"""Test is_gguf returns False for non-GGUF models."""
|
"""Test is_gguf returns False for non-GGUF models."""
|
||||||
assert not is_gguf("unsloth/Qwen3-0.6B")
|
assert not is_gguf("unsloth/Qwen3-0.6B")
|
||||||
|
|||||||
@ -37,15 +37,13 @@ from vllm.transformers_utils.config import (
|
|||||||
uses_xdrope_dim,
|
uses_xdrope_dim,
|
||||||
)
|
)
|
||||||
from vllm.transformers_utils.gguf_utils import (
|
from vllm.transformers_utils.gguf_utils import (
|
||||||
maybe_patch_hf_config_from_gguf,
|
|
||||||
)
|
|
||||||
from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
|
|
||||||
from vllm.transformers_utils.utils import (
|
|
||||||
is_gguf,
|
is_gguf,
|
||||||
is_remote_gguf,
|
is_remote_gguf,
|
||||||
maybe_model_redirect,
|
maybe_patch_hf_config_from_gguf,
|
||||||
split_remote_gguf,
|
split_remote_gguf,
|
||||||
)
|
)
|
||||||
|
from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri
|
||||||
|
from vllm.transformers_utils.utils import maybe_model_redirect
|
||||||
from vllm.utils.import_utils import LazyLoader
|
from vllm.utils.import_utils import LazyLoader
|
||||||
from vllm.utils.torch_utils import common_broadcastable_dtype
|
from vllm.utils.torch_utils import common_broadcastable_dtype
|
||||||
|
|
||||||
|
|||||||
@ -86,8 +86,9 @@ from vllm.transformers_utils.config import (
|
|||||||
is_interleaved,
|
is_interleaved,
|
||||||
maybe_override_with_speculators,
|
maybe_override_with_speculators,
|
||||||
)
|
)
|
||||||
|
from vllm.transformers_utils.gguf_utils import is_gguf
|
||||||
from vllm.transformers_utils.repo_utils import get_model_path
|
from vllm.transformers_utils.repo_utils import get_model_path
|
||||||
from vllm.transformers_utils.utils import is_cloud_storage, is_gguf
|
from vllm.transformers_utils.utils import is_cloud_storage
|
||||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||||
from vllm.utils.mem_constants import GiB_bytes
|
from vllm.utils.mem_constants import GiB_bytes
|
||||||
from vllm.utils.network_utils import get_ip
|
from vllm.utils.network_utils import get_ip
|
||||||
|
|||||||
@ -11,14 +11,14 @@ from typing_extensions import assert_never
|
|||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
|
from vllm.transformers_utils.gguf_utils import (
|
||||||
from vllm.transformers_utils.repo_utils import list_filtered_repo_files
|
|
||||||
from vllm.transformers_utils.utils import (
|
|
||||||
check_gguf_file,
|
check_gguf_file,
|
||||||
|
get_gguf_file_path_from_hf,
|
||||||
is_gguf,
|
is_gguf,
|
||||||
is_remote_gguf,
|
is_remote_gguf,
|
||||||
split_remote_gguf,
|
split_remote_gguf,
|
||||||
)
|
)
|
||||||
|
from vllm.transformers_utils.repo_utils import list_filtered_repo_files
|
||||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||||
|
|
||||||
from .protocol import TokenizerLike
|
from .protocol import TokenizerLike
|
||||||
|
|||||||
@ -26,8 +26,15 @@ from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
|
|||||||
|
|
||||||
from vllm import envs
|
from vllm import envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
from vllm.transformers_utils.utils import parse_safetensors_file_metadata
|
||||||
|
|
||||||
from .config_parser_base import ConfigParserBase
|
from .config_parser_base import ConfigParserBase
|
||||||
|
from .gguf_utils import (
|
||||||
|
check_gguf_file,
|
||||||
|
is_gguf,
|
||||||
|
is_remote_gguf,
|
||||||
|
split_remote_gguf,
|
||||||
|
)
|
||||||
from .repo_utils import (
|
from .repo_utils import (
|
||||||
_get_hf_token,
|
_get_hf_token,
|
||||||
file_or_path_exists,
|
file_or_path_exists,
|
||||||
@ -36,13 +43,6 @@ from .repo_utils import (
|
|||||||
try_get_local_file,
|
try_get_local_file,
|
||||||
with_retry,
|
with_retry,
|
||||||
)
|
)
|
||||||
from .utils import (
|
|
||||||
check_gguf_file,
|
|
||||||
is_gguf,
|
|
||||||
is_remote_gguf,
|
|
||||||
parse_safetensors_file_metadata,
|
|
||||||
split_remote_gguf,
|
|
||||||
)
|
|
||||||
|
|
||||||
if envs.VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
from modelscope import AutoConfig
|
from modelscope import AutoConfig
|
||||||
|
|||||||
@ -2,10 +2,14 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
"""GGUF utility functions."""
|
"""GGUF utility functions."""
|
||||||
|
|
||||||
|
from functools import cache
|
||||||
|
from os import PathLike
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import gguf
|
import gguf
|
||||||
|
import regex as re
|
||||||
from gguf.constants import Keys, VisionProjectorType
|
from gguf.constants import Keys, VisionProjectorType
|
||||||
|
from gguf.quants import GGMLQuantizationType
|
||||||
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
|
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
@ -15,6 +19,73 @@ from .repo_utils import list_filtered_repo_files
|
|||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def check_gguf_file(model: str | PathLike) -> bool:
|
||||||
|
"""Check if the file is a GGUF model."""
|
||||||
|
model = Path(model)
|
||||||
|
if not model.is_file():
|
||||||
|
return False
|
||||||
|
elif model.suffix == ".gguf":
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
with model.open("rb") as f:
|
||||||
|
header = f.read(4)
|
||||||
|
|
||||||
|
return header == b"GGUF"
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Error reading file %s: %s", model, e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def is_remote_gguf(model: str | Path) -> bool:
|
||||||
|
"""Check if the model is a remote GGUF model."""
|
||||||
|
pattern = r"^[a-zA-Z0-9][a-zA-Z0-9._-]*/[a-zA-Z0-9][a-zA-Z0-9._-]*:[A-Za-z0-9_+-]+$"
|
||||||
|
model = str(model)
|
||||||
|
if re.fullmatch(pattern, model):
|
||||||
|
_, quant_type = model.rsplit(":", 1)
|
||||||
|
return is_valid_gguf_quant_type(quant_type)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
|
||||||
|
"""Check if the quant type is a valid GGUF quant type."""
|
||||||
|
return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def split_remote_gguf(model: str | Path) -> tuple[str, str]:
|
||||||
|
"""Split the model into repo_id and quant type."""
|
||||||
|
model = str(model)
|
||||||
|
if is_remote_gguf(model):
|
||||||
|
parts = model.rsplit(":", 1)
|
||||||
|
return (parts[0], parts[1])
|
||||||
|
raise ValueError(
|
||||||
|
f"Wrong GGUF model or invalid GGUF quant type: {model}.\n"
|
||||||
|
"- It should be in repo_id:quant_type format.\n"
|
||||||
|
f"- Valid GGMLQuantizationType values: {GGMLQuantizationType._member_names_}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_gguf(model: str | Path) -> bool:
|
||||||
|
"""Check if the model is a GGUF model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model: Model name, path, or Path object to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the model is a GGUF model, False otherwise.
|
||||||
|
"""
|
||||||
|
model = str(model)
|
||||||
|
|
||||||
|
# Check if it's a local GGUF file
|
||||||
|
if check_gguf_file(model):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check if it's a remote GGUF model (repo_id:quant_type format)
|
||||||
|
return is_remote_gguf(model)
|
||||||
|
|
||||||
|
|
||||||
def detect_gguf_multimodal(model: str) -> Path | None:
|
def detect_gguf_multimodal(model: str) -> Path | None:
|
||||||
"""Check if GGUF model has multimodal projector file.
|
"""Check if GGUF model has multimodal projector file.
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,8 @@ from transformers.processing_utils import ProcessorMixin
|
|||||||
from transformers.video_processing_utils import BaseVideoProcessor
|
from transformers.video_processing_utils import BaseVideoProcessor
|
||||||
from typing_extensions import TypeVar
|
from typing_extensions import TypeVar
|
||||||
|
|
||||||
from vllm.transformers_utils.utils import convert_model_repo_to_path, is_gguf
|
from vllm.transformers_utils.gguf_utils import is_gguf
|
||||||
|
from vllm.transformers_utils.utils import convert_model_repo_to_path
|
||||||
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
|
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
|||||||
@ -9,8 +9,6 @@ from os import PathLike
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from gguf import GGMLQuantizationType
|
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
@ -29,76 +27,6 @@ def is_cloud_storage(model_or_path: str) -> bool:
|
|||||||
return is_s3(model_or_path) or is_gcs(model_or_path)
|
return is_s3(model_or_path) or is_gcs(model_or_path)
|
||||||
|
|
||||||
|
|
||||||
@cache
|
|
||||||
def check_gguf_file(model: str | PathLike) -> bool:
|
|
||||||
"""Check if the file is a GGUF model."""
|
|
||||||
model = Path(model)
|
|
||||||
if not model.is_file():
|
|
||||||
return False
|
|
||||||
elif model.suffix == ".gguf":
|
|
||||||
return True
|
|
||||||
|
|
||||||
try:
|
|
||||||
with model.open("rb") as f:
|
|
||||||
header = f.read(4)
|
|
||||||
|
|
||||||
return header == b"GGUF"
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug("Error reading file %s: %s", model, e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
@cache
|
|
||||||
def is_remote_gguf(model: str | Path) -> bool:
|
|
||||||
"""Check if the model is a remote GGUF model."""
|
|
||||||
model = str(model)
|
|
||||||
return (
|
|
||||||
(not is_cloud_storage(model))
|
|
||||||
and (not model.startswith(("http://", "https://")))
|
|
||||||
and ("/" in model and ":" in model)
|
|
||||||
and is_valid_gguf_quant_type(model.rsplit(":", 1)[1])
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
|
|
||||||
"""Check if the quant type is a valid GGUF quant type."""
|
|
||||||
return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None
|
|
||||||
|
|
||||||
|
|
||||||
def split_remote_gguf(model: str | Path) -> tuple[str, str]:
|
|
||||||
"""Split the model into repo_id and quant type."""
|
|
||||||
model = str(model)
|
|
||||||
if is_remote_gguf(model):
|
|
||||||
parts = model.rsplit(":", 1)
|
|
||||||
return (parts[0], parts[1])
|
|
||||||
raise ValueError(
|
|
||||||
"Wrong GGUF model or invalid GGUF quant type: %s.\n"
|
|
||||||
"- It should be in repo_id:quant_type format.\n"
|
|
||||||
"- Valid GGMLQuantizationType values: %s",
|
|
||||||
model,
|
|
||||||
GGMLQuantizationType._member_names_,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def is_gguf(model: str | Path) -> bool:
|
|
||||||
"""Check if the model is a GGUF model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model: Model name, path, or Path object to check.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if the model is a GGUF model, False otherwise.
|
|
||||||
"""
|
|
||||||
model = str(model)
|
|
||||||
|
|
||||||
# Check if it's a local GGUF file
|
|
||||||
if check_gguf_file(model):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check if it's a remote GGUF model (repo_id:quant_type format)
|
|
||||||
return is_remote_gguf(model)
|
|
||||||
|
|
||||||
|
|
||||||
def modelscope_list_repo_files(
|
def modelscope_list_repo_files(
|
||||||
repo_id: str,
|
repo_id: str,
|
||||||
revision: str | None = None,
|
revision: str | None = None,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user