[Docs] Lazy import gguf (#20785)

Signed-off-by: simon-mo <simon.mo@hey.com>
2025-12-14 16:27:27 +08:00 · 2025-07-10 16:06:37 -07:00 · 2025-07-10 16:06:37 -07:00 · b854321ffe
commit b854321ffe
parent 5b6fe23d05
2 changed files with 10 additions and 2 deletions
--- a/vllm/entrypoints/score_utils.py
+++ b/vllm/entrypoints/score_utils.py
@ -11,7 +11,6 @@ from vllm.entrypoints.chat_utils import (
    ChatCompletionContentPartImageParam, ChatCompletionContentPartTextParam,
    MultiModalItemTracker, _ContentPart, _parse_chat_message_content_part)
 from vllm.inputs import TokensPrompt
 from vllm.model_executor.model_loader import get_model_cls
 from vllm.model_executor.models.interfaces import supports_score_template
 from vllm.multimodal.inputs import MultiModalDataDict
 from vllm.outputs import PoolingRequestOutput
@ -140,6 +139,8 @@ def apply_score_template(
    prompt_1: str,
    prompt_2: str,
 ) -> str:
    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
    from vllm.model_executor.model_loader import get_model_cls
    model = get_model_cls(model_config)
    if supports_score_template(model):
@ -162,6 +163,9 @@ def post_process_tokens(
    Note:
        This is an in-place operation.
    """
    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
    from vllm.model_executor.model_loader import get_model_cls
    model = get_model_cls(model_config)
    if supports_score_template(model):
        model.post_process_tokens(prompt)
--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@ -14,7 +14,6 @@ from pathlib import Path
 from typing import Any, Callable, Optional, Union
 import filelock
 import gguf
 import huggingface_hub.constants
 import numpy as np
 import torch
@ -40,6 +39,11 @@ except (ImportError, OSError):
    SafetensorsStreamer = runai_model_streamer.placeholder_attr(
        "SafetensorsStreamer")
 try:
    import gguf
 except ImportError:
    gguf = PlaceholderModule("gguf")
 try:
    from fastsafetensors import SafeTensorsFileLoader, SingleGroup
 except ImportError: