mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:15:01 +08:00
[V1][Bugfix] Clean up requests when aborted (#9629)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
836e8ef6ee
commit
3770071eb4
@ -300,6 +300,7 @@ class LLMEngine:
|
||||
def abort_request(self, request_id: Union[str, Iterable[str]]) -> None:
|
||||
self.scheduler.finish_requests(request_id,
|
||||
RequestStatus.FINISHED_ABORTED)
|
||||
self._free_request(request_id)
|
||||
|
||||
def get_num_unfinished_requests(self) -> int:
|
||||
"""Gets the number of unfinished requests."""
|
||||
@ -361,6 +362,11 @@ class LLMEngine:
|
||||
num_reqs = len(detokenizer_output.req_ids)
|
||||
for i in range(num_reqs):
|
||||
req_id = detokenizer_output.req_ids[i]
|
||||
if req_id not in self.requests:
|
||||
# The request has been aborted while the detokenizer was
|
||||
# processing the outputs.
|
||||
continue
|
||||
|
||||
req = self.requests[req_id]
|
||||
req.output_text += detokenizer_output.detokenized_texts[i]
|
||||
|
||||
@ -373,9 +379,7 @@ class LLMEngine:
|
||||
req_outputs.append(req_output)
|
||||
|
||||
if finished:
|
||||
del self.requests[req_id]
|
||||
del self.num_lagged_steps[req_id]
|
||||
del self.request_outputs[req_id]
|
||||
self._free_request(req_id)
|
||||
return req_outputs
|
||||
|
||||
def terminate_detokenizer(self) -> None:
|
||||
@ -440,6 +444,11 @@ class LLMEngine:
|
||||
req_output.finished = finished
|
||||
return req_output
|
||||
|
||||
def _free_request(self, request_id: str) -> None:
|
||||
self.requests.pop(request_id, None)
|
||||
self.num_lagged_steps.pop(request_id, None)
|
||||
self.request_outputs.pop(request_id, None)
|
||||
|
||||
def check_health(self) -> None:
|
||||
if self.tokenizer:
|
||||
self.tokenizer.check_health()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user