diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index f7ced6134da52..5ad0482330ecd 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -17,6 +17,7 @@ """Wrapper around `transformers` models""" from collections.abc import Iterable, Mapping from contextlib import contextmanager +from pathlib import Path from typing import Literal, Optional, Union import regex as re @@ -60,6 +61,21 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper, logger = init_logger(__name__) +def get_feature_request_tip( + model: str, + trust_remote_code: bool, +) -> str: + hf_url = f"a discussion at https://huggingface.co/{model}/discussions/new" + gh_url = "an issue at https://github.com/huggingface/transformers/issues/new/choose" + url = hf_url if trust_remote_code else gh_url + prefix = f"Please open {url} to request support for this feature. " + if Path(model).exists(): + prefix = "" + doc_url = "https://docs.vllm.ai/en/latest/models/supported_models.html#writing-custom-models" + tip = f"See {doc_url} for instructions on how to add support yourself." + return f"{prefix}{tip}" + + def vllm_flash_attention_forward( # Transformers args module: torch.nn.Module, @@ -480,8 +496,11 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP): return if not self.model.supports_pp_plan: + tip = get_feature_request_tip(self.model_config.model, + self.model_config.trust_remote_code) raise ValueError( - f"{type(self.model)} does not support pipeline parallel yet!") + f"{type(self.model)} does not support pipeline parallel. {tip}" + ) module_lists = [] module_list_idx = None @@ -535,8 +554,10 @@ class TransformersBase(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP): models_with_tp_plan = filter(supports_tp_plan, pretrained_models) if not any(models_with_tp_plan) and self.tp_size > 1: + tip = get_feature_request_tip(self.model_config.model, + self.model_config.trust_remote_code) raise ValueError( - f"{type(self.model)} does not support tensor parallel yet!") + f"{type(self.model)} does not support tensor parallel. {tip}") def _tensor_parallel(module: nn.Module, prefix: str = "",