mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 11:06:08 +08:00
[BugFix] Optional tokenizer argument when loading GGUF models (#29582)
Signed-off-by: Injae Ryou <injaeryou@gmail.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
e1f262337b
commit
0840abdd24
@ -439,13 +439,6 @@ class ModelConfig:
|
|||||||
self.model = maybe_model_redirect(self.model)
|
self.model = maybe_model_redirect(self.model)
|
||||||
# The tokenizer is consistent with the model by default.
|
# The tokenizer is consistent with the model by default.
|
||||||
if self.tokenizer is None:
|
if self.tokenizer is None:
|
||||||
# Check if this is a GGUF model (either local file or remote GGUF)
|
|
||||||
if is_gguf(self.model):
|
|
||||||
raise ValueError(
|
|
||||||
"Using a tokenizer is mandatory when loading a GGUF model. "
|
|
||||||
"Please specify the tokenizer path or name using the "
|
|
||||||
"--tokenizer argument."
|
|
||||||
)
|
|
||||||
self.tokenizer = self.model
|
self.tokenizer = self.model
|
||||||
if self.tokenizer_revision is None:
|
if self.tokenizer_revision is None:
|
||||||
self.tokenizer_revision = self.revision
|
self.tokenizer_revision = self.revision
|
||||||
@ -699,6 +692,14 @@ class ModelConfig:
|
|||||||
|
|
||||||
self.multimodal_config = MultiModalConfig(**mm_config_kwargs)
|
self.multimodal_config = MultiModalConfig(**mm_config_kwargs)
|
||||||
|
|
||||||
|
# Multimodal GGUF models must use original repo for mm processing
|
||||||
|
if is_gguf(self.tokenizer) and self.is_multimodal_model:
|
||||||
|
raise ValueError(
|
||||||
|
"Loading a multimodal GGUF model needs to use original "
|
||||||
|
"tokenizer. Please specify the unquantized hf model's "
|
||||||
|
"repo name or path using the --tokenizer argument."
|
||||||
|
)
|
||||||
|
|
||||||
if self.disable_sliding_window:
|
if self.disable_sliding_window:
|
||||||
# Set after get_and_verify_max_len to ensure that max_model_len
|
# Set after get_and_verify_max_len to ensure that max_model_len
|
||||||
# can be correctly capped to sliding window size
|
# can be correctly capped to sliding window size
|
||||||
|
|||||||
@ -9,6 +9,7 @@ from gguf.constants import Keys, VisionProjectorType
|
|||||||
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
|
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
from vllm.transformers_utils.config import list_filtered_repo_files
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -164,3 +165,44 @@ def maybe_patch_hf_config_from_gguf(
|
|||||||
hf_config = new_hf_config
|
hf_config = new_hf_config
|
||||||
|
|
||||||
return hf_config
|
return hf_config
|
||||||
|
|
||||||
|
|
||||||
|
def get_gguf_file_path_from_hf(
|
||||||
|
repo_id: str | Path,
|
||||||
|
quant_type: str,
|
||||||
|
revision: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Get the GGUF file path from HuggingFace Hub based on repo_id and quant_type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_id: The HuggingFace repository ID (e.g., "Qwen/Qwen3-0.6B")
|
||||||
|
quant_type: The quantization type (e.g., "Q4_K_M", "F16")
|
||||||
|
revision: Optional revision/branch name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The path to the GGUF file on HuggingFace Hub (e.g., "filename.gguf"),
|
||||||
|
"""
|
||||||
|
repo_id = str(repo_id)
|
||||||
|
gguf_patterns = [
|
||||||
|
f"*-{quant_type}.gguf",
|
||||||
|
f"*-{quant_type}-*.gguf",
|
||||||
|
f"*/*-{quant_type}.gguf",
|
||||||
|
f"*/*-{quant_type}-*.gguf",
|
||||||
|
]
|
||||||
|
matching_files = list_filtered_repo_files(
|
||||||
|
repo_id,
|
||||||
|
allow_patterns=gguf_patterns,
|
||||||
|
revision=revision,
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(matching_files) == 0:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not find GGUF file for repo %s with quantization %s.",
|
||||||
|
repo_id,
|
||||||
|
quant_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sort to ensure consistent ordering (prefer non-sharded files)
|
||||||
|
matching_files.sort(key=lambda x: (x.count("-"), x))
|
||||||
|
gguf_filename = matching_files[0]
|
||||||
|
return gguf_filename
|
||||||
|
|||||||
@ -19,6 +19,7 @@ from vllm.transformers_utils.config import (
|
|||||||
get_sentence_transformer_tokenizer_config,
|
get_sentence_transformer_tokenizer_config,
|
||||||
list_filtered_repo_files,
|
list_filtered_repo_files,
|
||||||
)
|
)
|
||||||
|
from vllm.transformers_utils.gguf_utils import get_gguf_file_path_from_hf
|
||||||
from vllm.transformers_utils.tokenizers import MistralTokenizer
|
from vllm.transformers_utils.tokenizers import MistralTokenizer
|
||||||
from vllm.transformers_utils.utils import (
|
from vllm.transformers_utils.utils import (
|
||||||
check_gguf_file,
|
check_gguf_file,
|
||||||
@ -190,7 +191,14 @@ def get_tokenizer(
|
|||||||
kwargs["gguf_file"] = Path(tokenizer_name).name
|
kwargs["gguf_file"] = Path(tokenizer_name).name
|
||||||
tokenizer_name = Path(tokenizer_name).parent
|
tokenizer_name = Path(tokenizer_name).parent
|
||||||
elif is_remote_gguf(tokenizer_name):
|
elif is_remote_gguf(tokenizer_name):
|
||||||
tokenizer_name, _ = split_remote_gguf(tokenizer_name)
|
tokenizer_name, quant_type = split_remote_gguf(tokenizer_name)
|
||||||
|
# Get the HuggingFace Hub path for the GGUF file
|
||||||
|
gguf_file = get_gguf_file_path_from_hf(
|
||||||
|
tokenizer_name,
|
||||||
|
quant_type,
|
||||||
|
revision=revision,
|
||||||
|
)
|
||||||
|
kwargs["gguf_file"] = gguf_file
|
||||||
|
|
||||||
# if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format
|
# if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format
|
||||||
# first to use official Mistral tokenizer if possible.
|
# first to use official Mistral tokenizer if possible.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user