mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 06:35:01 +08:00
[V1][Bugfix] Clean up requests when aborted (#9629)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
836e8ef6ee
commit
3770071eb4
@ -300,6 +300,7 @@ class LLMEngine:
|
|||||||
def abort_request(self, request_id: Union[str, Iterable[str]]) -> None:
|
def abort_request(self, request_id: Union[str, Iterable[str]]) -> None:
|
||||||
self.scheduler.finish_requests(request_id,
|
self.scheduler.finish_requests(request_id,
|
||||||
RequestStatus.FINISHED_ABORTED)
|
RequestStatus.FINISHED_ABORTED)
|
||||||
|
self._free_request(request_id)
|
||||||
|
|
||||||
def get_num_unfinished_requests(self) -> int:
|
def get_num_unfinished_requests(self) -> int:
|
||||||
"""Gets the number of unfinished requests."""
|
"""Gets the number of unfinished requests."""
|
||||||
@ -361,6 +362,11 @@ class LLMEngine:
|
|||||||
num_reqs = len(detokenizer_output.req_ids)
|
num_reqs = len(detokenizer_output.req_ids)
|
||||||
for i in range(num_reqs):
|
for i in range(num_reqs):
|
||||||
req_id = detokenizer_output.req_ids[i]
|
req_id = detokenizer_output.req_ids[i]
|
||||||
|
if req_id not in self.requests:
|
||||||
|
# The request has been aborted while the detokenizer was
|
||||||
|
# processing the outputs.
|
||||||
|
continue
|
||||||
|
|
||||||
req = self.requests[req_id]
|
req = self.requests[req_id]
|
||||||
req.output_text += detokenizer_output.detokenized_texts[i]
|
req.output_text += detokenizer_output.detokenized_texts[i]
|
||||||
|
|
||||||
@ -373,9 +379,7 @@ class LLMEngine:
|
|||||||
req_outputs.append(req_output)
|
req_outputs.append(req_output)
|
||||||
|
|
||||||
if finished:
|
if finished:
|
||||||
del self.requests[req_id]
|
self._free_request(req_id)
|
||||||
del self.num_lagged_steps[req_id]
|
|
||||||
del self.request_outputs[req_id]
|
|
||||||
return req_outputs
|
return req_outputs
|
||||||
|
|
||||||
def terminate_detokenizer(self) -> None:
|
def terminate_detokenizer(self) -> None:
|
||||||
@ -440,6 +444,11 @@ class LLMEngine:
|
|||||||
req_output.finished = finished
|
req_output.finished = finished
|
||||||
return req_output
|
return req_output
|
||||||
|
|
||||||
|
def _free_request(self, request_id: str) -> None:
|
||||||
|
self.requests.pop(request_id, None)
|
||||||
|
self.num_lagged_steps.pop(request_id, None)
|
||||||
|
self.request_outputs.pop(request_id, None)
|
||||||
|
|
||||||
def check_health(self) -> None:
|
def check_health(self) -> None:
|
||||||
if self.tokenizer:
|
if self.tokenizer:
|
||||||
self.tokenizer.check_health()
|
self.tokenizer.check_health()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user