mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 17:05:37 +08:00
[Bugfix] Fallback ViT attn backend to SDPA for blackwell (#25851)
Signed-off-by: Roger Wang <hey@rogerw.io> Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
parent
8ce5d3198d
commit
ab5b6459df
@ -66,7 +66,7 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
|
||||
PromptReplacement, PromptUpdate,
|
||||
PromptUpdateDetails)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.platforms import _Backend, current_platform
|
||||
from vllm.platforms import _Backend
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.config import uses_mrope
|
||||
from vllm.utils import is_list_of
|
||||
@ -335,14 +335,6 @@ class Qwen3_VisionTransformer(nn.Module):
|
||||
}:
|
||||
raise RuntimeError(
|
||||
f"Qwen3-VL does not support {self.attn_backend} backend now.")
|
||||
if current_platform.is_device_capability(
|
||||
100) and self.attn_backend != _Backend.TORCH_SDPA:
|
||||
# TODO(Roger/Wentao): remove this after FA
|
||||
# or XFORMERS's issue fixed on Blackwell
|
||||
logger.info_once("Qwen3-VL vision attention does not support "
|
||||
f"{self.attn_backend} backend on Blackwell now. "
|
||||
"Vision attention backend is set to TORCH_SDPA.")
|
||||
self.attn_backend = _Backend.TORCH_SDPA
|
||||
|
||||
self.blocks = nn.ModuleList([
|
||||
Qwen3_VisionBlock(
|
||||
|
||||
@ -205,6 +205,12 @@ class CudaPlatformBase(Platform):
|
||||
@classmethod
|
||||
def get_vit_attn_backend(cls, head_size: int,
|
||||
dtype: torch.dtype) -> _Backend:
|
||||
|
||||
# For Blackwell GPUs, force TORCH_SDPA for now.
|
||||
# See https://github.com/facebookresearch/xformers/issues/1317#issuecomment-3199392579 # noqa: E501
|
||||
if cls.has_device_capability(100):
|
||||
return _Backend.TORCH_SDPA
|
||||
|
||||
if dtype not in (torch.float16, torch.bfloat16):
|
||||
return _Backend.XFORMERS
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user