diff --git a/tests/models/test_gguf_download.py b/tests/models/test_gguf_download.py index 155768ac9bff7..b1674cdf77178 100644 --- a/tests/models/test_gguf_download.py +++ b/tests/models/test_gguf_download.py @@ -203,7 +203,7 @@ class TestGGUFModelLoader: @patch("vllm.config.model.get_hf_image_processor_config", return_value=None) @patch("vllm.config.model.get_config") @patch("vllm.config.model.is_gguf", return_value=False) - @patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False) + @patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False) @patch("os.path.isfile", return_value=False) def test_prepare_weights_invalid_format( self, diff --git a/tests/transformers_utils/test_utils.py b/tests/transformers_utils/test_utils.py index a8d0b9be9ec29..0a6a65b4133c9 100644 --- a/tests/transformers_utils/test_utils.py +++ b/tests/transformers_utils/test_utils.py @@ -5,13 +5,15 @@ from unittest.mock import patch import pytest +from vllm.transformers_utils.gguf_utils import ( + is_gguf, + is_remote_gguf, + split_remote_gguf, +) from vllm.transformers_utils.utils import ( is_cloud_storage, is_gcs, - is_gguf, - is_remote_gguf, is_s3, - split_remote_gguf, ) @@ -132,7 +134,7 @@ class TestSplitRemoteGGUF: class TestIsGGUF: """Test is_gguf utility function.""" - @patch("vllm.transformers_utils.utils.check_gguf_file", return_value=True) + @patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=True) def test_is_gguf_with_local_file(self, mock_check_gguf): """Test is_gguf with local GGUF file.""" assert is_gguf("/path/to/model.gguf") @@ -149,7 +151,7 @@ class TestIsGGUF: assert not is_gguf("repo/model:quant") assert not is_gguf("repo/model:INVALID") - @patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False) + @patch("vllm.transformers_utils.gguf_utils.check_gguf_file", return_value=False) def test_is_gguf_false(self, mock_check_gguf): """Test is_gguf returns False for non-GGUF models.""" assert not is_gguf("unsloth/Qwen3-0.6B") diff --git a/vllm/config/model.py b/vllm/config/model.py index ef592ac001535..5de97697698a1 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -37,15 +37,13 @@ from vllm.transformers_utils.config import ( uses_xdrope_dim, ) from vllm.transformers_utils.gguf_utils import ( - maybe_patch_hf_config_from_gguf, -) -from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri -from vllm.transformers_utils.utils import ( is_gguf, is_remote_gguf, - maybe_model_redirect, + maybe_patch_hf_config_from_gguf, split_remote_gguf, ) +from vllm.transformers_utils.runai_utils import ObjectStorageModel, is_runai_obj_uri +from vllm.transformers_utils.utils import maybe_model_redirect from vllm.utils.import_utils import LazyLoader from vllm.utils.torch_utils import common_broadcastable_dtype diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 5a2836668174f..83029e09ceaad 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -86,8 +86,9 @@ from vllm.transformers_utils.config import ( is_interleaved, maybe_override_with_speculators, ) +from vllm.transformers_utils.gguf_utils import is_gguf from vllm.transformers_utils.repo_utils import get_model_path -from vllm.transformers_utils.utils import is_cloud_storage, is_gguf +from vllm.transformers_utils.utils import is_cloud_storage from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.mem_constants import GiB_bytes from vllm.utils.network_utils import get_ip diff --git a/vllm/tokenizers/registry.py b/vllm/tokenizers/registry.py index bf9d295de23ae..87048f2ec7845 100644 --- a/vllm/tokenizers/registry.py +++ b/vllm/tokenizers/registry.py @@ -11,14 +11,14 @@ from typing_extensions import assert_never import vllm.envs as envs from vllm.logger import init_logger -from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf -from vllm.transformers_utils.repo_utils import list_filtered_repo_files -from vllm.transformers_utils.utils import ( +from vllm.transformers_utils.gguf_utils import ( check_gguf_file, + get_gguf_file_path_from_hf, is_gguf, is_remote_gguf, split_remote_gguf, ) +from vllm.transformers_utils.repo_utils import list_filtered_repo_files from vllm.utils.import_utils import resolve_obj_by_qualname from .protocol import TokenizerLike diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 1bb5791e19016..0cceab90ba9a2 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -26,8 +26,15 @@ from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME from vllm import envs from vllm.logger import init_logger +from vllm.transformers_utils.utils import parse_safetensors_file_metadata from .config_parser_base import ConfigParserBase +from .gguf_utils import ( + check_gguf_file, + is_gguf, + is_remote_gguf, + split_remote_gguf, +) from .repo_utils import ( _get_hf_token, file_or_path_exists, @@ -36,13 +43,6 @@ from .repo_utils import ( try_get_local_file, with_retry, ) -from .utils import ( - check_gguf_file, - is_gguf, - is_remote_gguf, - parse_safetensors_file_metadata, - split_remote_gguf, -) if envs.VLLM_USE_MODELSCOPE: from modelscope import AutoConfig diff --git a/vllm/transformers_utils/gguf_utils.py b/vllm/transformers_utils/gguf_utils.py index cb1fc2d092e01..f3fd43c6ace51 100644 --- a/vllm/transformers_utils/gguf_utils.py +++ b/vllm/transformers_utils/gguf_utils.py @@ -2,10 +2,14 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """GGUF utility functions.""" +from functools import cache +from os import PathLike from pathlib import Path import gguf +import regex as re from gguf.constants import Keys, VisionProjectorType +from gguf.quants import GGMLQuantizationType from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig from vllm.logger import init_logger @@ -15,6 +19,73 @@ from .repo_utils import list_filtered_repo_files logger = init_logger(__name__) +@cache +def check_gguf_file(model: str | PathLike) -> bool: + """Check if the file is a GGUF model.""" + model = Path(model) + if not model.is_file(): + return False + elif model.suffix == ".gguf": + return True + + try: + with model.open("rb") as f: + header = f.read(4) + + return header == b"GGUF" + except Exception as e: + logger.debug("Error reading file %s: %s", model, e) + return False + + +@cache +def is_remote_gguf(model: str | Path) -> bool: + """Check if the model is a remote GGUF model.""" + pattern = r"^[a-zA-Z0-9][a-zA-Z0-9._-]*/[a-zA-Z0-9][a-zA-Z0-9._-]*:[A-Za-z0-9_+-]+$" + model = str(model) + if re.fullmatch(pattern, model): + _, quant_type = model.rsplit(":", 1) + return is_valid_gguf_quant_type(quant_type) + return False + + +def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool: + """Check if the quant type is a valid GGUF quant type.""" + return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None + + +def split_remote_gguf(model: str | Path) -> tuple[str, str]: + """Split the model into repo_id and quant type.""" + model = str(model) + if is_remote_gguf(model): + parts = model.rsplit(":", 1) + return (parts[0], parts[1]) + raise ValueError( + f"Wrong GGUF model or invalid GGUF quant type: {model}.\n" + "- It should be in repo_id:quant_type format.\n" + f"- Valid GGMLQuantizationType values: {GGMLQuantizationType._member_names_}", + ) + + +def is_gguf(model: str | Path) -> bool: + """Check if the model is a GGUF model. + + Args: + model: Model name, path, or Path object to check. + + Returns: + True if the model is a GGUF model, False otherwise. + """ + model = str(model) + + # Check if it's a local GGUF file + if check_gguf_file(model): + return True + + # Check if it's a remote GGUF model (repo_id:quant_type format) + return is_remote_gguf(model) + + def detect_gguf_multimodal(model: str) -> Path | None: """Check if GGUF model has multimodal projector file. diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py index 63cdf63370342..e9864b0c1531d 100644 --- a/vllm/transformers_utils/processor.py +++ b/vllm/transformers_utils/processor.py @@ -18,7 +18,8 @@ from transformers.processing_utils import ProcessorMixin from transformers.video_processing_utils import BaseVideoProcessor from typing_extensions import TypeVar -from vllm.transformers_utils.utils import convert_model_repo_to_path, is_gguf +from vllm.transformers_utils.gguf_utils import is_gguf +from vllm.transformers_utils.utils import convert_model_repo_to_path from vllm.utils.func_utils import get_allowed_kwarg_only_overrides if TYPE_CHECKING: diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index 45a873c9f7001..96f292f4c949e 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -9,8 +9,6 @@ from os import PathLike from pathlib import Path from typing import Any -from gguf import GGMLQuantizationType - import vllm.envs as envs from vllm.logger import init_logger @@ -29,76 +27,6 @@ def is_cloud_storage(model_or_path: str) -> bool: return is_s3(model_or_path) or is_gcs(model_or_path) -@cache -def check_gguf_file(model: str | PathLike) -> bool: - """Check if the file is a GGUF model.""" - model = Path(model) - if not model.is_file(): - return False - elif model.suffix == ".gguf": - return True - - try: - with model.open("rb") as f: - header = f.read(4) - - return header == b"GGUF" - except Exception as e: - logger.debug("Error reading file %s: %s", model, e) - return False - - -@cache -def is_remote_gguf(model: str | Path) -> bool: - """Check if the model is a remote GGUF model.""" - model = str(model) - return ( - (not is_cloud_storage(model)) - and (not model.startswith(("http://", "https://"))) - and ("/" in model and ":" in model) - and is_valid_gguf_quant_type(model.rsplit(":", 1)[1]) - ) - - -def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool: - """Check if the quant type is a valid GGUF quant type.""" - return getattr(GGMLQuantizationType, gguf_quant_type, None) is not None - - -def split_remote_gguf(model: str | Path) -> tuple[str, str]: - """Split the model into repo_id and quant type.""" - model = str(model) - if is_remote_gguf(model): - parts = model.rsplit(":", 1) - return (parts[0], parts[1]) - raise ValueError( - "Wrong GGUF model or invalid GGUF quant type: %s.\n" - "- It should be in repo_id:quant_type format.\n" - "- Valid GGMLQuantizationType values: %s", - model, - GGMLQuantizationType._member_names_, - ) - - -def is_gguf(model: str | Path) -> bool: - """Check if the model is a GGUF model. - - Args: - model: Model name, path, or Path object to check. - - Returns: - True if the model is a GGUF model, False otherwise. - """ - model = str(model) - - # Check if it's a local GGUF file - if check_gguf_file(model): - return True - - # Check if it's a remote GGUF model (repo_id:quant_type format) - return is_remote_gguf(model) - - def modelscope_list_repo_files( repo_id: str, revision: str | None = None,