mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 16:27:27 +08:00
[Docs] Lazy import gguf (#20785)
Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
parent
5b6fe23d05
commit
b854321ffe
@ -11,7 +11,6 @@ from vllm.entrypoints.chat_utils import (
|
|||||||
ChatCompletionContentPartImageParam, ChatCompletionContentPartTextParam,
|
ChatCompletionContentPartImageParam, ChatCompletionContentPartTextParam,
|
||||||
MultiModalItemTracker, _ContentPart, _parse_chat_message_content_part)
|
MultiModalItemTracker, _ContentPart, _parse_chat_message_content_part)
|
||||||
from vllm.inputs import TokensPrompt
|
from vllm.inputs import TokensPrompt
|
||||||
from vllm.model_executor.model_loader import get_model_cls
|
|
||||||
from vllm.model_executor.models.interfaces import supports_score_template
|
from vllm.model_executor.models.interfaces import supports_score_template
|
||||||
from vllm.multimodal.inputs import MultiModalDataDict
|
from vllm.multimodal.inputs import MultiModalDataDict
|
||||||
from vllm.outputs import PoolingRequestOutput
|
from vllm.outputs import PoolingRequestOutput
|
||||||
@ -140,6 +139,8 @@ def apply_score_template(
|
|||||||
prompt_1: str,
|
prompt_1: str,
|
||||||
prompt_2: str,
|
prompt_2: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
# NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
|
||||||
|
from vllm.model_executor.model_loader import get_model_cls
|
||||||
|
|
||||||
model = get_model_cls(model_config)
|
model = get_model_cls(model_config)
|
||||||
if supports_score_template(model):
|
if supports_score_template(model):
|
||||||
@ -162,6 +163,9 @@ def post_process_tokens(
|
|||||||
Note:
|
Note:
|
||||||
This is an in-place operation.
|
This is an in-place operation.
|
||||||
"""
|
"""
|
||||||
|
# NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
|
||||||
|
from vllm.model_executor.model_loader import get_model_cls
|
||||||
|
|
||||||
model = get_model_cls(model_config)
|
model = get_model_cls(model_config)
|
||||||
if supports_score_template(model):
|
if supports_score_template(model):
|
||||||
model.post_process_tokens(prompt)
|
model.post_process_tokens(prompt)
|
||||||
|
|||||||
@ -14,7 +14,6 @@ from pathlib import Path
|
|||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
import filelock
|
import filelock
|
||||||
import gguf
|
|
||||||
import huggingface_hub.constants
|
import huggingface_hub.constants
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@ -40,6 +39,11 @@ except (ImportError, OSError):
|
|||||||
SafetensorsStreamer = runai_model_streamer.placeholder_attr(
|
SafetensorsStreamer = runai_model_streamer.placeholder_attr(
|
||||||
"SafetensorsStreamer")
|
"SafetensorsStreamer")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import gguf
|
||||||
|
except ImportError:
|
||||||
|
gguf = PlaceholderModule("gguf")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from fastsafetensors import SafeTensorsFileLoader, SingleGroup
|
from fastsafetensors import SafeTensorsFileLoader, SingleGroup
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user