mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 05:25:01 +08:00
[Bugfix] Free cross attention block table for preempted-for-recompute sequence group. (#10013)
Signed-off-by: Kathy Yu <feiyangyu@google.com>
This commit is contained in:
parent
84c35c374a
commit
2f385183f3
@ -1579,6 +1579,7 @@ class Scheduler:
|
|||||||
seq.status = SequenceStatus.WAITING
|
seq.status = SequenceStatus.WAITING
|
||||||
self.free_seq(seq)
|
self.free_seq(seq)
|
||||||
seq.reset_state_for_recompute()
|
seq.reset_state_for_recompute()
|
||||||
|
self._free_seq_group_cross_attn_blocks(seq_group)
|
||||||
|
|
||||||
def _preempt_by_swap(
|
def _preempt_by_swap(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user