From fb4983e112a81f4df25b92ab98c9c84a5babfec9 Mon Sep 17 00:00:00 2001 From: Ning Xie Date: Sat, 30 Aug 2025 21:41:45 +0800 Subject: [PATCH] [Misc] add reorder_batch AttentionMetadataBuilder (#23798) Signed-off-by: Andy Xie --- vllm/v1/attention/backends/utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index ad53b2e80bc73..011a90ece01bd 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -212,6 +212,23 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]): """ raise NotImplementedError + def reorder_batch(self, input_batch: "InputBatch", + scheduler_output: "SchedulerOutput") -> bool: + """ + Update the order of requests in the batch based on the attention + backend's needs. For example, some attention backends (namely MLA) may + want to separate requests based on if the attention computation will be + compute-bound or memory-bound. + + Args: + input_batch: input batch + scheduler_output: scheduler output. + + Returns: + True if the batch was modified, False otherwise. + """ + raise NotImplementedError + def build_for_cudagraph_capture( self, common_attn_metadata: CommonAttentionMetadata) -> M: """