mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-31 07:56:31 +08:00
[Misc] Remove unused encoder-decoder error strings (#25374)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
cbba9bd0b0
commit
ddf4e1f56f
@ -18,11 +18,6 @@ from vllm.utils import async_tensor_h2d, make_tensor_with_pad
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# Error string(s) for encoder/decoder
|
||||
# unsupported attention scenarios
|
||||
STR_NOT_IMPL_ENC_DEC_ROCM_HIP = ("ROCm/HIP is not currently supported "
|
||||
"with encoder/decoder models.")
|
||||
|
||||
PAD_SLOT_ID = -1
|
||||
|
||||
# Switch to numpy implementation of compute_slot_mapping
|
||||
|
||||
@ -88,64 +88,6 @@ DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
|
||||
POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
|
||||
MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
|
||||
|
||||
# Exception strings for non-implemented encoder/decoder scenarios
|
||||
|
||||
# Reminder: Please update docs/features/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_SWA = \
|
||||
"Sliding window attention for encoder/decoder models " + \
|
||||
"is not currently supported."
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE = \
|
||||
"Prefix caching for encoder/decoder models " + \
|
||||
"is not currently supported."
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL = \
|
||||
"Chunked prefill for encoder/decoder models " + \
|
||||
"is not currently supported."
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP = (
|
||||
"Models with logits_soft_cap "
|
||||
"require FlashInfer backend, which is "
|
||||
"currently not supported for encoder/decoder "
|
||||
"models.")
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_LORA = ("LoRA is not currently "
|
||||
"supported with encoder/decoder "
|
||||
"models.")
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_PP = ("Pipeline parallelism is not "
|
||||
"currently supported with "
|
||||
"encoder/decoder models.")
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_MM = ("Multimodal is not currently "
|
||||
"supported with encoder/decoder "
|
||||
"models.")
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_SPEC_DEC = ("Speculative decoding is not "
|
||||
"currently supported with encoder/"
|
||||
"decoder models.")
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_BACKEND = ("XFormers and Flash-Attention are the only "
|
||||
"backends currently supported with encoder/"
|
||||
"decoder models.")
|
||||
|
||||
# Efficiently import all enc/dec error strings
|
||||
# rather than having to import all of the above
|
||||
STR_NOT_IMPL_ENC_DEC_ERR_STRS = {
|
||||
"STR_NOT_IMPL_ENC_DEC_SWA": STR_NOT_IMPL_ENC_DEC_SWA,
|
||||
"STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE": STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
|
||||
"STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL":
|
||||
STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL,
|
||||
"STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP": STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP,
|
||||
"STR_NOT_IMPL_ENC_DEC_LORA": STR_NOT_IMPL_ENC_DEC_LORA,
|
||||
"STR_NOT_IMPL_ENC_DEC_PP": STR_NOT_IMPL_ENC_DEC_PP,
|
||||
"STR_NOT_IMPL_ENC_DEC_MM": STR_NOT_IMPL_ENC_DEC_MM,
|
||||
"STR_NOT_IMPL_ENC_DEC_SPEC_DEC": STR_NOT_IMPL_ENC_DEC_SPEC_DEC,
|
||||
"STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND,
|
||||
}
|
||||
|
||||
# Constants related to forcing the attention backend selection
|
||||
|
||||
# String name of register which may be set in order to
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user