mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-09 19:43:14 +08:00
Better errors for Transformers backend missing features (#23759)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
885ca6d31d
commit
934bebf192
@ -17,6 +17,7 @@
|
||||
"""Wrapper around `transformers` models"""
|
||||
from collections.abc import Iterable, Mapping
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
import regex as re
|
||||
@ -60,6 +61,21 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def get_feature_request_tip(
|
||||
model: str,
|
||||
trust_remote_code: bool,
|
||||
) -> str:
|
||||
hf_url = f"a discussion at https://huggingface.co/{model}/discussions/new"
|
||||
gh_url = "an issue at https://github.com/huggingface/transformers/issues/new/choose"
|
||||
url = hf_url if trust_remote_code else gh_url
|
||||
prefix = f"Please open {url} to request support for this feature. "
|
||||
if Path(model).exists():
|
||||
prefix = ""
|
||||
doc_url = "https://docs.vllm.ai/en/latest/models/supported_models.html#writing-custom-models"
|
||||
tip = f"See {doc_url} for instructions on how to add support yourself."
|
||||
return f"{prefix}{tip}"
|
||||
|
||||
|
||||
def vllm_flash_attention_forward(
|
||||
# Transformers args
|
||||
module: torch.nn.Module,
|
||||
@ -480,8 +496,11 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP):
|
||||
return
|
||||
|
||||
if not self.model.supports_pp_plan:
|
||||
tip = get_feature_request_tip(self.model_config.model,
|
||||
self.model_config.trust_remote_code)
|
||||
raise ValueError(
|
||||
f"{type(self.model)} does not support pipeline parallel yet!")
|
||||
f"{type(self.model)} does not support pipeline parallel. {tip}"
|
||||
)
|
||||
|
||||
module_lists = []
|
||||
module_list_idx = None
|
||||
@ -535,8 +554,10 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP):
|
||||
models_with_tp_plan = filter(supports_tp_plan, pretrained_models)
|
||||
|
||||
if not any(models_with_tp_plan) and self.tp_size > 1:
|
||||
tip = get_feature_request_tip(self.model_config.model,
|
||||
self.model_config.trust_remote_code)
|
||||
raise ValueError(
|
||||
f"{type(self.model)} does not support tensor parallel yet!")
|
||||
f"{type(self.model)} does not support tensor parallel. {tip}")
|
||||
|
||||
def _tensor_parallel(module: nn.Module,
|
||||
prefix: str = "",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user