mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 17:55:50 +08:00
[BugFix] Don’t compute reorder threshold when there are no attention groups (#27861)
This commit is contained in:
parent
3933f18a5e
commit
933cdea440
@ -4149,6 +4149,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
group.get_metadata_builder().reorder_batch_threshold
|
||||
for group in self._attn_group_iterator()
|
||||
]
|
||||
# If there are no attention groups (attention-free model) or no backend
|
||||
# reports a threshold, leave reordering disabled.
|
||||
if len(reorder_batch_thresholds) == 0:
|
||||
self.reorder_batch_threshold = None
|
||||
return
|
||||
self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)
|
||||
|
||||
def _find_compatible_block_sizes(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user