mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 15:55:43 +08:00
Merge 4ab91bcc015efb6ad9edf8d80133d11208b3dddf into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
e6bca90aac
@ -294,8 +294,13 @@ class RocmPlatform(Platform):
|
|||||||
attn_selector_config.attn_type is not None
|
attn_selector_config.attn_type is not None
|
||||||
and attn_selector_config.attn_type == AttentionType.ENCODER_ONLY
|
and attn_selector_config.attn_type == AttentionType.ENCODER_ONLY
|
||||||
):
|
):
|
||||||
logger.info("Using FlexAttention backend.")
|
# Use generic FlashAttention for encoder-only models
|
||||||
return AttentionBackendEnum.FLEX_ATTENTION.get_path()
|
# ROCM_AITER_FA doesn't support encoder-only (causal-only limitation)
|
||||||
|
# Generic FLASH_ATTN supports all attention types including ENCODER_ONLY
|
||||||
|
logger.info(
|
||||||
|
"Using FlashAttention backend for encoder-only model on ROCm."
|
||||||
|
)
|
||||||
|
return AttentionBackendEnum.FLASH_ATTN.get_path()
|
||||||
|
|
||||||
# Default: Triton Unified Attention
|
# Default: Triton Unified Attention
|
||||||
logger.info("Using Triton Attention backend.")
|
logger.info("Using Triton Attention backend.")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user