vllm/tests/models/test_gguf_download.py
Injae Ryou 794029f012
[Feature]: Improve GGUF loading from HuggingFace user experience like repo_id:quant_type (#29137)
Signed-off-by: Injae Ryou <injaeryou@gmail.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-11-25 14:28:53 +00:00

241 lines
9.4 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from unittest.mock import MagicMock, patch
import pytest
from vllm.config import ModelConfig
from vllm.config.load import LoadConfig
from vllm.model_executor.model_loader.gguf_loader import GGUFModelLoader
from vllm.model_executor.model_loader.weight_utils import download_gguf
class TestGGUFDownload:
"""Test GGUF model downloading functionality."""
@patch("vllm.model_executor.model_loader.weight_utils.download_weights_from_hf")
def test_download_gguf_single_file(self, mock_download):
"""Test downloading a single GGUF file."""
# Setup mock
mock_folder = "/tmp/mock_cache"
mock_download.return_value = mock_folder
# Mock glob to return a single file
with patch("glob.glob") as mock_glob:
mock_glob.side_effect = lambda pattern, **kwargs: (
[f"{mock_folder}/model-IQ1_S.gguf"] if "IQ1_S" in pattern else []
)
result = download_gguf("unsloth/Qwen3-0.6B-GGUF", "IQ1_S")
# Verify download_weights_from_hf was called with correct patterns
mock_download.assert_called_once_with(
model_name_or_path="unsloth/Qwen3-0.6B-GGUF",
cache_dir=None,
allow_patterns=[
"*-IQ1_S.gguf",
"*-IQ1_S-*.gguf",
"*/*-IQ1_S.gguf",
"*/*-IQ1_S-*.gguf",
],
revision=None,
ignore_patterns=None,
)
# Verify result is the file path, not folder
assert result == f"{mock_folder}/model-IQ1_S.gguf"
@patch("vllm.model_executor.model_loader.weight_utils.download_weights_from_hf")
def test_download_gguf_sharded_files(self, mock_download):
"""Test downloading sharded GGUF files."""
mock_folder = "/tmp/mock_cache"
mock_download.return_value = mock_folder
# Mock glob to return sharded files
with patch("glob.glob") as mock_glob:
mock_glob.side_effect = lambda pattern, **kwargs: (
[
f"{mock_folder}/model-Q2_K-00001-of-00002.gguf",
f"{mock_folder}/model-Q2_K-00002-of-00002.gguf",
]
if "Q2_K" in pattern
else []
)
result = download_gguf("unsloth/gpt-oss-120b-GGUF", "Q2_K")
# Should return the first file after sorting
assert result == f"{mock_folder}/model-Q2_K-00001-of-00002.gguf"
@patch("vllm.model_executor.model_loader.weight_utils.download_weights_from_hf")
def test_download_gguf_subdir(self, mock_download):
"""Test downloading GGUF files from subdirectory."""
mock_folder = "/tmp/mock_cache"
mock_download.return_value = mock_folder
with patch("glob.glob") as mock_glob:
mock_glob.side_effect = lambda pattern, **kwargs: (
[f"{mock_folder}/Q2_K/model-Q2_K.gguf"]
if "Q2_K" in pattern or "**/*.gguf" in pattern
else []
)
result = download_gguf("unsloth/gpt-oss-120b-GGUF", "Q2_K")
assert result == f"{mock_folder}/Q2_K/model-Q2_K.gguf"
@patch("vllm.model_executor.model_loader.weight_utils.download_weights_from_hf")
@patch("glob.glob", return_value=[])
def test_download_gguf_no_files_found(self, mock_glob, mock_download):
"""Test error when no GGUF files are found."""
mock_folder = "/tmp/mock_cache"
mock_download.return_value = mock_folder
with pytest.raises(ValueError, match="Downloaded GGUF files not found"):
download_gguf("unsloth/Qwen3-0.6B-GGUF", "IQ1_S")
class TestGGUFModelLoader:
"""Test GGUFModelLoader class methods."""
@patch("os.path.isfile", return_value=True)
def test_prepare_weights_local_file(self, mock_isfile):
"""Test _prepare_weights with local file."""
load_config = LoadConfig(load_format="gguf")
loader = GGUFModelLoader(load_config)
# Create a simple mock ModelConfig with only the model attribute
model_config = MagicMock()
model_config.model = "/path/to/model.gguf"
result = loader._prepare_weights(model_config)
assert result == "/path/to/model.gguf"
mock_isfile.assert_called_once_with("/path/to/model.gguf")
@patch("vllm.model_executor.model_loader.gguf_loader.hf_hub_download")
@patch("os.path.isfile", return_value=False)
def test_prepare_weights_https_url(self, mock_isfile, mock_hf_download):
"""Test _prepare_weights with HTTPS URL."""
load_config = LoadConfig(load_format="gguf")
loader = GGUFModelLoader(load_config)
mock_hf_download.return_value = "/downloaded/model.gguf"
# Create a simple mock ModelConfig with only the model attribute
model_config = MagicMock()
model_config.model = "https://huggingface.co/model.gguf"
result = loader._prepare_weights(model_config)
assert result == "/downloaded/model.gguf"
mock_hf_download.assert_called_once_with(
url="https://huggingface.co/model.gguf"
)
@patch("vllm.model_executor.model_loader.gguf_loader.hf_hub_download")
@patch("os.path.isfile", return_value=False)
def test_prepare_weights_repo_filename(self, mock_isfile, mock_hf_download):
"""Test _prepare_weights with repo_id/filename.gguf format."""
load_config = LoadConfig(load_format="gguf")
loader = GGUFModelLoader(load_config)
mock_hf_download.return_value = "/downloaded/model.gguf"
# Create a simple mock ModelConfig with only the model attribute
model_config = MagicMock()
model_config.model = "unsloth/Qwen3-0.6B-GGUF/model.gguf"
result = loader._prepare_weights(model_config)
assert result == "/downloaded/model.gguf"
mock_hf_download.assert_called_once_with(
repo_id="unsloth/Qwen3-0.6B-GGUF", filename="model.gguf"
)
@patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
@patch("vllm.transformers_utils.config.file_or_path_exists", return_value=True)
@patch("vllm.config.model.get_config")
@patch("vllm.config.model.is_gguf", return_value=True)
@patch("vllm.model_executor.model_loader.gguf_loader.download_gguf")
@patch("os.path.isfile", return_value=False)
def test_prepare_weights_repo_quant_type(
self,
mock_isfile,
mock_download_gguf,
mock_is_gguf,
mock_get_config,
mock_file_exists,
mock_get_image_config,
):
"""Test _prepare_weights with repo_id:quant_type format."""
mock_hf_config = MagicMock()
mock_hf_config.architectures = ["Qwen3ForCausalLM"]
class MockTextConfig:
max_position_embeddings = 4096
sliding_window = None
model_type = "qwen3"
num_attention_heads = 32
mock_text_config = MockTextConfig()
mock_hf_config.get_text_config.return_value = mock_text_config
mock_hf_config.dtype = "bfloat16"
mock_get_config.return_value = mock_hf_config
load_config = LoadConfig(load_format="gguf")
loader = GGUFModelLoader(load_config)
mock_download_gguf.return_value = "/downloaded/model-IQ1_S.gguf"
model_config = ModelConfig(
model="unsloth/Qwen3-0.6B-GGUF:IQ1_S", tokenizer="Qwen/Qwen3-0.6B"
)
result = loader._prepare_weights(model_config)
# The actual result will be the downloaded file path from mock
assert result == "/downloaded/model-IQ1_S.gguf"
mock_download_gguf.assert_called_once_with(
"unsloth/Qwen3-0.6B-GGUF",
"IQ1_S",
cache_dir=None,
revision=None,
ignore_patterns=["original/**/*"],
)
@patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
@patch("vllm.config.model.get_config")
@patch("vllm.config.model.is_gguf", return_value=False)
@patch("vllm.transformers_utils.utils.check_gguf_file", return_value=False)
@patch("os.path.isfile", return_value=False)
def test_prepare_weights_invalid_format(
self,
mock_isfile,
mock_check_gguf,
mock_is_gguf,
mock_get_config,
mock_get_image_config,
):
"""Test _prepare_weights with invalid format."""
mock_hf_config = MagicMock()
mock_hf_config.architectures = ["Qwen3ForCausalLM"]
class MockTextConfig:
max_position_embeddings = 4096
sliding_window = None
model_type = "qwen3"
num_attention_heads = 32
mock_text_config = MockTextConfig()
mock_hf_config.get_text_config.return_value = mock_text_config
mock_hf_config.dtype = "bfloat16"
mock_get_config.return_value = mock_hf_config
load_config = LoadConfig(load_format="gguf")
loader = GGUFModelLoader(load_config)
# Create ModelConfig with a valid repo_id to avoid validation errors
# Then test _prepare_weights with invalid format
model_config = ModelConfig(model="unsloth/Qwen3-0.6B")
# Manually set model to invalid format after creation
model_config.model = "invalid-format"
with pytest.raises(ValueError, match="Unrecognised GGUF reference"):
loader._prepare_weights(model_config)