mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-06 20:25:28 +08:00
[Misc] Allow passing logits_soft_cap for xformers backend (#11252)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
parent
02222a0256
commit
f9ecbb18bf
@ -17,9 +17,7 @@ from vllm.attention.backends.utils import (
|
||||
is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set)
|
||||
from vllm.attention.ops.paged_attn import (PagedAttention,
|
||||
PagedAttentionMetadata)
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
from vllm.utils import print_warning_once
|
||||
|
||||
|
||||
class XFormersBackend(AttentionBackend):
|
||||
@ -386,8 +384,8 @@ class XFormersImpl(AttentionImpl[XFormersMetadata]):
|
||||
raise ValueError(
|
||||
"XFormers does not support block-sparse attention.")
|
||||
if logits_soft_cap is not None:
|
||||
raise ValueError(
|
||||
"XFormers does not support attention logits soft capping.")
|
||||
print_warning_once("XFormers does not support logits soft cap. "
|
||||
"Outputs may be slightly off.")
|
||||
self.num_heads = num_heads
|
||||
self.head_size = head_size
|
||||
self.scale = float(scale)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user