[Misc] Allow passing logits_soft_cap for xformers backend (#11252)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py 2024-12-17 16:37:04 +08:00 committed by GitHub
parent 02222a0256
commit f9ecbb18bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -17,9 +17,7 @@ from vllm.attention.backends.utils import (
is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set)
from vllm.attention.ops.paged_attn import (PagedAttention,
PagedAttentionMetadata)
from vllm.logger import init_logger
logger = init_logger(__name__)
from vllm.utils import print_warning_once
class XFormersBackend(AttentionBackend):
@ -386,8 +384,8 @@ class XFormersImpl(AttentionImpl[XFormersMetadata]):
raise ValueError(
"XFormers does not support block-sparse attention.")
if logits_soft_cap is not None:
raise ValueError(
"XFormers does not support attention logits soft capping.")
print_warning_once("XFormers does not support logits soft cap. "
"Outputs may be slightly off.")
self.num_heads = num_heads
self.head_size = head_size
self.scale = float(scale)