mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 04:44:59 +08:00
[V1][PP] Do not block engine core when no requests to schedule (#14585)
Signed-off-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
parent
c91b64f749
commit
4290b704ff
@ -205,23 +205,18 @@ class EngineCore:
|
|||||||
self.batch_queue.put_nowait(
|
self.batch_queue.put_nowait(
|
||||||
(future, scheduler_output)) # type: ignore
|
(future, scheduler_output)) # type: ignore
|
||||||
|
|
||||||
# If all requests are scheduled or the job queue is full,
|
scheduled_batch = (scheduler_output is not None
|
||||||
|
and scheduler_output.total_num_scheduled_tokens > 0)
|
||||||
|
|
||||||
|
# If no more requests can be scheduled and the job queue is not empty,
|
||||||
# block until the first batch in the job queue is finished.
|
# block until the first batch in the job queue is finished.
|
||||||
if (scheduler_output is None
|
if not scheduled_batch and not self.batch_queue.empty():
|
||||||
or scheduler_output.total_num_scheduled_tokens == 0):
|
future, scheduler_output = self.batch_queue.get_nowait()
|
||||||
try:
|
|
||||||
future, scheduler_output = self.batch_queue.get(
|
|
||||||
timeout=POLLING_TIMEOUT_S)
|
|
||||||
# Blocking until the first result is available.
|
# Blocking until the first result is available.
|
||||||
model_output = future.result()
|
model_output = future.result()
|
||||||
self.batch_queue.task_done()
|
self.batch_queue.task_done()
|
||||||
engine_core_outputs = self.scheduler.update_from_output(
|
engine_core_outputs = self.scheduler.update_from_output(
|
||||||
scheduler_output, model_output)
|
scheduler_output, model_output)
|
||||||
except queue.Empty:
|
|
||||||
# If the queue is empty (timeout at .get), return
|
|
||||||
# an empty EngineCoreOutputs for logging.
|
|
||||||
engine_core_outputs = EngineCoreOutputs(
|
|
||||||
outputs=[], scheduler_stats=self.scheduler.make_stats())
|
|
||||||
|
|
||||||
return engine_core_outputs
|
return engine_core_outputs
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user