From 145c00a4d32b7a681f7fb936c9575812c7aa7880 Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Mon, 3 Nov 2025 15:17:10 -0500
Subject: [PATCH] [Bugfix] change FlashMLA reorder_batch_threshold (#27777)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 vllm/v1/attention/backends/mla/flashmla.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py
index 1f98204031ed5..bc17307532093 100644
--- a/vllm/v1/attention/backends/mla/flashmla.py
+++ b/vllm/v1/attention/backends/mla/flashmla.py
@@ -71,7 +71,7 @@ class FlashMLAMetadata(MLACommonMetadata[FlashMLADecodeMetadata]):
 class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
     cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
     query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM
-    reorder_batch_threshold: int = 512  # process small prefills with decode pathway
+    reorder_batch_threshold: int = 128  # process small prefills with decode pathway
     # ^ TODO(matt): tune this
 
     def __init__(