mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 02:35:26 +08:00
[Perf] Optimize _update_states for GPU model runner (#16910)
Signed-off-by: snowcharm <snowcharmqq@gmail.com>
This commit is contained in:
parent
3097ce3a32
commit
a114bf20a3
@ -454,7 +454,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
||||
|
||||
# Add the new or resumed requests to the persistent batch.
|
||||
# The smaller empty indices are filled first.
|
||||
removed_req_indices = sorted(removed_req_indices, reverse=True)
|
||||
removed_req_indices.sort(reverse=True)
|
||||
for req_id in req_ids_to_add:
|
||||
req_state = self.requests[req_id]
|
||||
if removed_req_indices:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user