From e34e4411b97a8481a2fc797930d84f16b073e142 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Mon, 2 Jun 2025 19:17:50 +0000 Subject: [PATCH] fa format Signed-off-by: Sage Moore --- vllm/v1/attention/backends/flash_attn.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index 9d83e155864b9..fde44a24998a2 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -324,12 +324,14 @@ class FlashAttentionMetadataBuilder: scheduler_output: "SchedulerOutput") -> bool: return False - def build_slice(self, req_slice: slice, - token_slice: slice, - max_query_len: int, - common_prefix_len: int, - common_attn_metadata: CommonAttentionMetadata, - ) -> FlashAttentionMetadata: + def build_slice( + self, + req_slice: slice, + token_slice: slice, + max_query_len: int, + common_prefix_len: int, + common_attn_metadata: CommonAttentionMetadata, + ) -> FlashAttentionMetadata: num_reqs = req_slice.stop - req_slice.start num_tokens = token_slice.stop - token_slice.start @@ -482,7 +484,7 @@ class FlashAttentionMetadataBuilder: ) def use_cascade_attention(self, *args, **kwargs) -> bool: - return False #use_cascade_attention(*args, **kwargs) + return False #use_cascade_attention(*args, **kwargs) class FlashAttentionImpl(AttentionImpl):