mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 11:06:08 +08:00
align llm_engine and async_engine. (#1081)
This commit is contained in:
parent
fbe66e1d0b
commit
95592fa00a
@ -183,10 +183,9 @@ class _AsyncLLMEngine(LLMEngine):
|
|||||||
and updates the scheduler with the model outputs. Finally, it decodes
|
and updates the scheduler with the model outputs. Finally, it decodes
|
||||||
the sequences and returns the newly generated results.
|
the sequences and returns the newly generated results.
|
||||||
"""
|
"""
|
||||||
(seq_group_metadata_list, scheduler_outputs,
|
seq_group_metadata_list, scheduler_outputs, ignored = self._schedule()
|
||||||
early_return) = self._schedule()
|
if scheduler_outputs.is_empty():
|
||||||
if early_return is not None:
|
return ignored
|
||||||
return early_return
|
|
||||||
|
|
||||||
# Execute the model.
|
# Execute the model.
|
||||||
output = await self._run_workers_async(
|
output = await self._run_workers_async(
|
||||||
@ -197,7 +196,7 @@ class _AsyncLLMEngine(LLMEngine):
|
|||||||
blocks_to_copy=scheduler_outputs.blocks_to_copy,
|
blocks_to_copy=scheduler_outputs.blocks_to_copy,
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._process_model_outputs(output, scheduler_outputs)
|
return self._process_model_outputs(output, scheduler_outputs) + ignored
|
||||||
|
|
||||||
async def _run_workers_async(
|
async def _run_workers_async(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user