From 933cdea44061cb19a99421d2d2e51535e7f21216 Mon Sep 17 00:00:00 2001
From: Huamin Li <3ericli@gmail.com>
Date: Fri, 31 Oct 2025 04:36:18 -0700
Subject: [PATCH] =?UTF-8?q?[BugFix]=20Don=E2=80=99t=20compute=20reorder=20?=
 =?UTF-8?q?threshold=20when=20there=20are=20no=20attention=20groups=20(#27?=
 =?UTF-8?q?861)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 vllm/v1/worker/gpu_model_runner.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 04814b5991ebc..747a7b377e401 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -4149,6 +4149,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             group.get_metadata_builder().reorder_batch_threshold
             for group in self._attn_group_iterator()
         ]
+        # If there are no attention groups (attention-free model) or no backend
+        # reports a threshold, leave reordering disabled.
+        if len(reorder_batch_thresholds) == 0:
+            self.reorder_batch_threshold = None
+            return
         self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)
 
     def _find_compatible_block_sizes(