mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 01:35:01 +08:00
[Bugfix] Fix Mistral3 support on SM100/SM120 (#20998)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
e9534c7202
commit
f46098335b
@ -43,6 +43,7 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
|
|||||||
PromptReplacement, PromptUpdate,
|
PromptReplacement, PromptUpdate,
|
||||||
PromptUpdateDetails)
|
PromptUpdateDetails)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||||
|
from vllm.platforms import current_platform
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.tokenizer import (MistralTokenizer,
|
from vllm.transformers_utils.tokenizer import (MistralTokenizer,
|
||||||
cached_tokenizer_from_config)
|
cached_tokenizer_from_config)
|
||||||
@ -54,7 +55,12 @@ from .vision import VisionEncoderInfo, resolve_visual_encoder_outputs
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from xformers import ops as xops
|
from xformers import ops as xops
|
||||||
USE_XFORMERS_OPS = True
|
if (current_platform.is_cuda()
|
||||||
|
and current_platform.has_device_capability(100)):
|
||||||
|
# Xformers FA is not compatible with B200
|
||||||
|
USE_XFORMERS_OPS = False
|
||||||
|
else:
|
||||||
|
USE_XFORMERS_OPS = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
USE_XFORMERS_OPS = False
|
USE_XFORMERS_OPS = False
|
||||||
|
|
||||||
@ -1082,7 +1088,6 @@ class PixtralHFAttention(nn.Module):
|
|||||||
# Transpose q and k back for attention
|
# Transpose q and k back for attention
|
||||||
q = q.transpose(1, 2).contiguous()
|
q = q.transpose(1, 2).contiguous()
|
||||||
k = k.transpose(1, 2).contiguous()
|
k = k.transpose(1, 2).contiguous()
|
||||||
|
|
||||||
out = xops.memory_efficient_attention(q,
|
out = xops.memory_efficient_attention(q,
|
||||||
k,
|
k,
|
||||||
v,
|
v,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user