From fb4983e112a81f4df25b92ab98c9c84a5babfec9 Mon Sep 17 00:00:00 2001
From: Ning Xie <andy.xning@gmail.com>
Date: Sat, 30 Aug 2025 21:41:45 +0800
Subject: [PATCH] [Misc] add reorder_batch AttentionMetadataBuilder (#23798)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
---
 vllm/v1/attention/backends/utils.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index ad53b2e80bc73..011a90ece01bd 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -212,6 +212,23 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
         """
         raise NotImplementedError
 
+    def reorder_batch(self, input_batch: "InputBatch",
+                      scheduler_output: "SchedulerOutput") -> bool:
+        """
+        Update the order of requests in the batch based on the attention
+        backend's needs. For example, some attention backends (namely MLA) may
+        want to separate requests based on if the attention computation will be
+        compute-bound or memory-bound.
+
+        Args:
+            input_batch: input batch
+            scheduler_output: scheduler output.
+
+        Returns:
+            True if the batch was modified, False otherwise.
+        """
+        raise NotImplementedError
+
     def build_for_cudagraph_capture(
             self, common_attn_metadata: CommonAttentionMetadata) -> M:
         """