mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 10:26:11 +08:00
Fix: MI100 Support By Bypassing Custom Paged Attention (#9560)
This commit is contained in:
parent
5cbdccd151
commit
55137e8ee3
@ -21,7 +21,10 @@ if TYPE_CHECKING:
|
|||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
_PARTITION_SIZE_ROCM = 512
|
_PARTITION_SIZE_ROCM = 512
|
||||||
_ON_NAVI = "gfx1" in torch.cuda.get_device_properties("cuda").gcnArchName
|
_GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
|
||||||
|
_ON_NAVI = "gfx1" in _GPU_ARCH
|
||||||
|
_ON_MI250_MI300 = any(arch in _GPU_ARCH
|
||||||
|
for arch in ["gfx90a", "gfx940", "gfx941", "gfx942"])
|
||||||
|
|
||||||
|
|
||||||
class ROCmFlashAttentionBackend(AttentionBackend):
|
class ROCmFlashAttentionBackend(AttentionBackend):
|
||||||
@ -662,7 +665,8 @@ def _use_rocm_custom_paged_attention(qtype: torch.dtype, head_size: int,
|
|||||||
block_size: int, gqa_ratio: int,
|
block_size: int, gqa_ratio: int,
|
||||||
max_seq_len: int) -> bool:
|
max_seq_len: int) -> bool:
|
||||||
# rocm custom page attention not support on navi (gfx1*)
|
# rocm custom page attention not support on navi (gfx1*)
|
||||||
return (not _ON_NAVI and (qtype == torch.half or qtype == torch.bfloat16)
|
return (_ON_MI250_MI300 and not _ON_NAVI
|
||||||
|
and (qtype == torch.half or qtype == torch.bfloat16)
|
||||||
and (head_size == 64 or head_size == 128)
|
and (head_size == 64 or head_size == 128)
|
||||||
and (block_size == 16 or block_size == 32)
|
and (block_size == 16 or block_size == 32)
|
||||||
and (gqa_ratio >= 1 and gqa_ratio <= 16) and max_seq_len <= 32768)
|
and (gqa_ratio >= 1 and gqa_ratio <= 16) and max_seq_len <= 32768)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user