[BugFix] Don’t compute reorder threshold when there are no attention groups (#27861)

This commit is contained in:
Huamin Li 2025-10-31 04:36:18 -07:00 committed by GitHub
parent 3933f18a5e
commit 933cdea440
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4149,6 +4149,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
group.get_metadata_builder().reorder_batch_threshold
for group in self._attn_group_iterator()
]
# If there are no attention groups (attention-free model) or no backend
# reports a threshold, leave reordering disabled.
if len(reorder_batch_thresholds) == 0:
self.reorder_batch_threshold = None
return
self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)
def _find_compatible_block_sizes(