mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-18 15:56:59 +08:00
fa format
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
d46397661f
commit
e34e4411b9
@ -324,12 +324,14 @@ class FlashAttentionMetadataBuilder:
|
|||||||
scheduler_output: "SchedulerOutput") -> bool:
|
scheduler_output: "SchedulerOutput") -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def build_slice(self, req_slice: slice,
|
def build_slice(
|
||||||
token_slice: slice,
|
self,
|
||||||
max_query_len: int,
|
req_slice: slice,
|
||||||
common_prefix_len: int,
|
token_slice: slice,
|
||||||
common_attn_metadata: CommonAttentionMetadata,
|
max_query_len: int,
|
||||||
) -> FlashAttentionMetadata:
|
common_prefix_len: int,
|
||||||
|
common_attn_metadata: CommonAttentionMetadata,
|
||||||
|
) -> FlashAttentionMetadata:
|
||||||
num_reqs = req_slice.stop - req_slice.start
|
num_reqs = req_slice.stop - req_slice.start
|
||||||
num_tokens = token_slice.stop - token_slice.start
|
num_tokens = token_slice.stop - token_slice.start
|
||||||
|
|
||||||
@ -482,7 +484,7 @@ class FlashAttentionMetadataBuilder:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def use_cascade_attention(self, *args, **kwargs) -> bool:
|
def use_cascade_attention(self, *args, **kwargs) -> bool:
|
||||||
return False #use_cascade_attention(*args, **kwargs)
|
return False #use_cascade_attention(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class FlashAttentionImpl(AttentionImpl):
|
class FlashAttentionImpl(AttentionImpl):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user