From b854321ffe50fd04c6b1ac58eecdab4caf5b4295 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Thu, 10 Jul 2025 16:06:37 -0700 Subject: [PATCH] [Docs] Lazy import gguf (#20785) Signed-off-by: simon-mo --- vllm/entrypoints/score_utils.py | 6 +++++- vllm/model_executor/model_loader/weight_utils.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/score_utils.py b/vllm/entrypoints/score_utils.py index 3fc4ed606b8a9..f3f042355c9eb 100644 --- a/vllm/entrypoints/score_utils.py +++ b/vllm/entrypoints/score_utils.py @@ -11,7 +11,6 @@ from vllm.entrypoints.chat_utils import ( ChatCompletionContentPartImageParam, ChatCompletionContentPartTextParam, MultiModalItemTracker, _ContentPart, _parse_chat_message_content_part) from vllm.inputs import TokensPrompt -from vllm.model_executor.model_loader import get_model_cls from vllm.model_executor.models.interfaces import supports_score_template from vllm.multimodal.inputs import MultiModalDataDict from vllm.outputs import PoolingRequestOutput @@ -140,6 +139,8 @@ def apply_score_template( prompt_1: str, prompt_2: str, ) -> str: + # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf) + from vllm.model_executor.model_loader import get_model_cls model = get_model_cls(model_config) if supports_score_template(model): @@ -162,6 +163,9 @@ def post_process_tokens( Note: This is an in-place operation. """ + # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf) + from vllm.model_executor.model_loader import get_model_cls + model = get_model_cls(model_config) if supports_score_template(model): model.post_process_tokens(prompt) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 857f4bca68245..1058ae140b5b4 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -14,7 +14,6 @@ from pathlib import Path from typing import Any, Callable, Optional, Union import filelock -import gguf import huggingface_hub.constants import numpy as np import torch @@ -40,6 +39,11 @@ except (ImportError, OSError): SafetensorsStreamer = runai_model_streamer.placeholder_attr( "SafetensorsStreamer") +try: + import gguf +except ImportError: + gguf = PlaceholderModule("gguf") + try: from fastsafetensors import SafeTensorsFileLoader, SingleGroup except ImportError: