mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-13 13:46:51 +08:00
[Misc] Remove unused encoder-decoder error strings (#25374)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
cbba9bd0b0
commit
ddf4e1f56f
@ -18,11 +18,6 @@ from vllm.utils import async_tensor_h2d, make_tensor_with_pad
|
|||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
# Error string(s) for encoder/decoder
|
|
||||||
# unsupported attention scenarios
|
|
||||||
STR_NOT_IMPL_ENC_DEC_ROCM_HIP = ("ROCm/HIP is not currently supported "
|
|
||||||
"with encoder/decoder models.")
|
|
||||||
|
|
||||||
PAD_SLOT_ID = -1
|
PAD_SLOT_ID = -1
|
||||||
|
|
||||||
# Switch to numpy implementation of compute_slot_mapping
|
# Switch to numpy implementation of compute_slot_mapping
|
||||||
|
|||||||
@ -88,64 +88,6 @@ DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
|
|||||||
POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
|
POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
|
||||||
MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
|
MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
|
||||||
|
|
||||||
# Exception strings for non-implemented encoder/decoder scenarios
|
|
||||||
|
|
||||||
# Reminder: Please update docs/features/compatibility_matrix.md
|
|
||||||
# If the feature combo become valid
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_SWA = \
|
|
||||||
"Sliding window attention for encoder/decoder models " + \
|
|
||||||
"is not currently supported."
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE = \
|
|
||||||
"Prefix caching for encoder/decoder models " + \
|
|
||||||
"is not currently supported."
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL = \
|
|
||||||
"Chunked prefill for encoder/decoder models " + \
|
|
||||||
"is not currently supported."
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP = (
|
|
||||||
"Models with logits_soft_cap "
|
|
||||||
"require FlashInfer backend, which is "
|
|
||||||
"currently not supported for encoder/decoder "
|
|
||||||
"models.")
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_LORA = ("LoRA is not currently "
|
|
||||||
"supported with encoder/decoder "
|
|
||||||
"models.")
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_PP = ("Pipeline parallelism is not "
|
|
||||||
"currently supported with "
|
|
||||||
"encoder/decoder models.")
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_MM = ("Multimodal is not currently "
|
|
||||||
"supported with encoder/decoder "
|
|
||||||
"models.")
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_SPEC_DEC = ("Speculative decoding is not "
|
|
||||||
"currently supported with encoder/"
|
|
||||||
"decoder models.")
|
|
||||||
|
|
||||||
STR_NOT_IMPL_ENC_DEC_BACKEND = ("XFormers and Flash-Attention are the only "
|
|
||||||
"backends currently supported with encoder/"
|
|
||||||
"decoder models.")
|
|
||||||
|
|
||||||
# Efficiently import all enc/dec error strings
|
|
||||||
# rather than having to import all of the above
|
|
||||||
STR_NOT_IMPL_ENC_DEC_ERR_STRS = {
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_SWA": STR_NOT_IMPL_ENC_DEC_SWA,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE": STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL":
|
|
||||||
STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP": STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_LORA": STR_NOT_IMPL_ENC_DEC_LORA,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_PP": STR_NOT_IMPL_ENC_DEC_PP,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_MM": STR_NOT_IMPL_ENC_DEC_MM,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_SPEC_DEC": STR_NOT_IMPL_ENC_DEC_SPEC_DEC,
|
|
||||||
"STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Constants related to forcing the attention backend selection
|
# Constants related to forcing the attention backend selection
|
||||||
|
|
||||||
# String name of register which may be set in order to
|
# String name of register which may be set in order to
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user