mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 20:37:05 +08:00
fa format
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
d46397661f
commit
e34e4411b9
@ -324,12 +324,14 @@ class FlashAttentionMetadataBuilder:
|
||||
scheduler_output: "SchedulerOutput") -> bool:
|
||||
return False
|
||||
|
||||
def build_slice(self, req_slice: slice,
|
||||
token_slice: slice,
|
||||
max_query_len: int,
|
||||
common_prefix_len: int,
|
||||
common_attn_metadata: CommonAttentionMetadata,
|
||||
) -> FlashAttentionMetadata:
|
||||
def build_slice(
|
||||
self,
|
||||
req_slice: slice,
|
||||
token_slice: slice,
|
||||
max_query_len: int,
|
||||
common_prefix_len: int,
|
||||
common_attn_metadata: CommonAttentionMetadata,
|
||||
) -> FlashAttentionMetadata:
|
||||
num_reqs = req_slice.stop - req_slice.start
|
||||
num_tokens = token_slice.stop - token_slice.start
|
||||
|
||||
@ -482,7 +484,7 @@ class FlashAttentionMetadataBuilder:
|
||||
)
|
||||
|
||||
def use_cascade_attention(self, *args, **kwargs) -> bool:
|
||||
return False #use_cascade_attention(*args, **kwargs)
|
||||
return False #use_cascade_attention(*args, **kwargs)
|
||||
|
||||
|
||||
class FlashAttentionImpl(AttentionImpl):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user