mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 19:45:01 +08:00
[Bug] Fix is_flashmla_supported Check Error (#24774)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
d96e11167d
commit
b42566f440
@ -17,7 +17,6 @@ from vllm.attention.backends.mla.common import (MLACommonBackend,
|
|||||||
from vllm.attention.ops.flashmla import (flash_mla_with_kvcache,
|
from vllm.attention.ops.flashmla import (flash_mla_with_kvcache,
|
||||||
get_mla_metadata,
|
get_mla_metadata,
|
||||||
is_flashmla_supported)
|
is_flashmla_supported)
|
||||||
from vllm.platforms.cuda import CudaPlatform
|
|
||||||
|
|
||||||
|
|
||||||
class FlashMLABackend(MLACommonBackend):
|
class FlashMLABackend(MLACommonBackend):
|
||||||
@ -179,18 +178,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
|
|||||||
logits_soft_cap, attn_type,
|
logits_soft_cap, attn_type,
|
||||||
kv_sharing_target_layer_name, **mla_args)
|
kv_sharing_target_layer_name, **mla_args)
|
||||||
|
|
||||||
assert is_flashmla_supported(), \
|
is_supported, reason = is_flashmla_supported()
|
||||||
"FlashMLA is not supported on this device"
|
assert is_supported, reason
|
||||||
|
|
||||||
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
|
|
||||||
# context:
|
|
||||||
# https://github.com/deepseek-ai/FlashMLA/issues/83
|
|
||||||
# https://github.com/vllm-project/vllm/issues/24513
|
|
||||||
if CudaPlatform.has_device_capability(100):
|
|
||||||
raise NotImplementedError(
|
|
||||||
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
|
|
||||||
"Please use CUTLASS_MLA or TRITON_MLA instead. "
|
|
||||||
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`")
|
|
||||||
|
|
||||||
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
|
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
|
||||||
if any(unsupported_features):
|
if any(unsupported_features):
|
||||||
|
|||||||
@ -12,7 +12,6 @@ from vllm.attention.ops.flashmla import (flash_mla_with_kvcache,
|
|||||||
is_flashmla_supported)
|
is_flashmla_supported)
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.platforms.cuda import CudaPlatform
|
|
||||||
from vllm.v1.attention.backends.mla.common import (MLACommonBackend,
|
from vllm.v1.attention.backends.mla.common import (MLACommonBackend,
|
||||||
MLACommonDecodeMetadata,
|
MLACommonDecodeMetadata,
|
||||||
MLACommonImpl,
|
MLACommonImpl,
|
||||||
@ -156,18 +155,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
|
|||||||
logits_soft_cap, attn_type,
|
logits_soft_cap, attn_type,
|
||||||
kv_sharing_target_layer_name, **mla_args)
|
kv_sharing_target_layer_name, **mla_args)
|
||||||
|
|
||||||
assert is_flashmla_supported(), \
|
is_supported, reason = is_flashmla_supported()
|
||||||
"FlashMLA is not supported on this device"
|
assert is_supported, reason
|
||||||
|
|
||||||
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
|
|
||||||
# context:
|
|
||||||
# https://github.com/deepseek-ai/FlashMLA/issues/83
|
|
||||||
# https://github.com/vllm-project/vllm/issues/24513
|
|
||||||
if CudaPlatform.has_device_capability(100):
|
|
||||||
raise NotImplementedError(
|
|
||||||
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
|
|
||||||
"Please use CUTLASS_MLA or TRITON_MLA instead. "
|
|
||||||
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`")
|
|
||||||
|
|
||||||
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
|
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
|
||||||
if any(unsupported_features):
|
if any(unsupported_features):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user