vllm/vllm/transformers_utils/gguf_utils.py
Injae Ryou 0840abdd24
[BugFix] Optional tokenizer argument when loading GGUF models (#29582)
Signed-off-by: Injae Ryou <injaeryou@gmail.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-11-27 16:53:10 +00:00

209 lines
7.3 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""GGUF utility functions."""
from pathlib import Path
import gguf
from gguf.constants import Keys, VisionProjectorType
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
from vllm.logger import init_logger
from vllm.transformers_utils.config import list_filtered_repo_files
logger = init_logger(__name__)
def detect_gguf_multimodal(model: str) -> Path | None:
"""Check if GGUF model has multimodal projector file.
Args:
model: Model path string
Returns:
Path to mmproj file if found, None otherwise
"""
if not model.endswith(".gguf"):
return None
try:
model_path = Path(model)
if not model_path.is_file():
return None
model_dir = model_path.parent
mmproj_patterns = ["mmproj.gguf", "mmproj-*.gguf", "*mmproj*.gguf"]
for pattern in mmproj_patterns:
mmproj_files = list(model_dir.glob(pattern))
if mmproj_files:
return mmproj_files[0]
return None
except Exception:
return None
def extract_vision_config_from_gguf(mmproj_path: str) -> "SiglipVisionConfig | None":
"""Extract vision config parameters from mmproj.gguf metadata.
Reads vision encoder configuration from GGUF metadata fields using
standardized GGUF constants. Automatically detects the projector type
(e.g., gemma3, llama4) and applies model-specific parameters accordingly.
The function extracts standard CLIP vision parameters from GGUF metadata
and applies projector-type-specific customizations. For unknown projector
types, it uses safe defaults from SiglipVisionConfig.
Args:
mmproj_path: Path to mmproj.gguf file (str or Path)
Returns:
SiglipVisionConfig if extraction succeeds, None if any required
field is missing from the GGUF metadata
Raises:
Exception: Exceptions from GGUF reading (file not found, corrupted
file, etc.) propagate directly from gguf.GGUFReader
"""
reader = gguf.GGUFReader(str(mmproj_path))
# Detect projector type to apply model-specific parameters
projector_type = None
projector_type_field = reader.get_field(Keys.Clip.PROJECTOR_TYPE)
if projector_type_field:
try:
projector_type = bytes(projector_type_field.parts[-1]).decode("utf-8")
except (AttributeError, UnicodeDecodeError) as e:
logger.warning("Failed to decode projector type from GGUF: %s", e)
# Map GGUF field constants to SiglipVisionConfig parameters.
# Uses official GGUF constants from gguf-py for standardization.
# Format: {gguf_constant: (param_name, dtype)}
VISION_CONFIG_FIELDS = {
Keys.ClipVision.EMBEDDING_LENGTH: ("hidden_size", int),
Keys.ClipVision.FEED_FORWARD_LENGTH: ("intermediate_size", int),
Keys.ClipVision.BLOCK_COUNT: ("num_hidden_layers", int),
Keys.ClipVision.Attention.HEAD_COUNT: ("num_attention_heads", int),
Keys.ClipVision.IMAGE_SIZE: ("image_size", int),
Keys.ClipVision.PATCH_SIZE: ("patch_size", int),
Keys.ClipVision.Attention.LAYERNORM_EPS: ("layer_norm_eps", float),
}
# Extract and validate all required fields
config_params = {}
for gguf_key, (param_name, dtype) in VISION_CONFIG_FIELDS.items():
field = reader.get_field(gguf_key)
if field is None:
logger.warning(
"Missing required vision config field '%s' in mmproj.gguf",
gguf_key,
)
return None
# Extract scalar value from GGUF field and convert to target type
config_params[param_name] = dtype(field.parts[-1])
# Apply model-specific parameters based on projector type
if projector_type == VisionProjectorType.GEMMA3:
# Gemma3 doesn't use the vision pooling head (multihead attention)
# This is a vLLM-specific parameter used in SiglipVisionTransformer
config_params["vision_use_head"] = False
logger.info("Detected Gemma3 projector, disabling vision pooling head")
# Add other projector-type-specific customizations here as needed
# elif projector_type == VisionProjectorType.LLAMA4:
# config_params["vision_use_head"] = ...
# Create config with extracted parameters
# Note: num_channels and attention_dropout use SiglipVisionConfig defaults
# (3 and 0.0 respectively) which are correct for all models
config = SiglipVisionConfig(**config_params)
if projector_type:
logger.info(
"Extracted vision config from mmproj.gguf (projector_type: %s)",
projector_type,
)
else:
logger.info("Extracted vision config from mmproj.gguf metadata")
return config
def maybe_patch_hf_config_from_gguf(
model: str,
hf_config: PretrainedConfig,
) -> PretrainedConfig:
"""Patch HF config for GGUF models.
Applies GGUF-specific patches to HuggingFace config:
1. For multimodal models: patches architecture and vision config
2. For all GGUF models: overrides vocab_size from embedding tensor
This ensures compatibility with GGUF models that have extended
vocabularies (e.g., Unsloth) where the GGUF file contains more
tokens than the HuggingFace tokenizer config specifies.
Args:
model: Model path string
hf_config: HuggingFace config to patch in-place
Returns:
Updated HuggingFace config
"""
# Patch multimodal config if mmproj.gguf exists
mmproj_path = detect_gguf_multimodal(model)
if mmproj_path is not None:
vision_config = extract_vision_config_from_gguf(str(mmproj_path))
# Create HF config for Gemma3 multimodal
text_config = hf_config.get_text_config()
is_gemma3 = hf_config.model_type in ("gemma3", "gemma3_text")
if vision_config is not None and is_gemma3:
new_hf_config = Gemma3Config.from_text_vision_configs(
text_config=text_config,
vision_config=vision_config,
architectures=["Gemma3ForConditionalGeneration"],
)
hf_config = new_hf_config
return hf_config
def get_gguf_file_path_from_hf(
repo_id: str | Path,
quant_type: str,
revision: str | None = None,
) -> str:
"""Get the GGUF file path from HuggingFace Hub based on repo_id and quant_type.
Args:
repo_id: The HuggingFace repository ID (e.g., "Qwen/Qwen3-0.6B")
quant_type: The quantization type (e.g., "Q4_K_M", "F16")
revision: Optional revision/branch name
Returns:
The path to the GGUF file on HuggingFace Hub (e.g., "filename.gguf"),
"""
repo_id = str(repo_id)
gguf_patterns = [
f"*-{quant_type}.gguf",
f"*-{quant_type}-*.gguf",
f"*/*-{quant_type}.gguf",
f"*/*-{quant_type}-*.gguf",
]
matching_files = list_filtered_repo_files(
repo_id,
allow_patterns=gguf_patterns,
revision=revision,
)
if len(matching_files) == 0:
raise ValueError(
"Could not find GGUF file for repo %s with quantization %s.",
repo_id,
quant_type,
)
# Sort to ensure consistent ordering (prefer non-sharded files)
matching_files.sort(key=lambda x: (x.count("-"), x))
gguf_filename = matching_files[0]
return gguf_filename