mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-22 07:11:18 +08:00
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
0e74d797ce
commit
6dd94dbe94
@ -455,7 +455,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
|||||||
self.enable_prompt_adapter = (self.runner.prompt_adapter_config
|
self.enable_prompt_adapter = (self.runner.prompt_adapter_config
|
||||||
is not None)
|
is not None)
|
||||||
self.multi_modal_input_mapper = self.runner.multi_modal_input_mapper
|
self.multi_modal_input_mapper = self.runner.multi_modal_input_mapper
|
||||||
self.decode_only = True
|
|
||||||
|
|
||||||
# Attention metadata inputs.
|
# Attention metadata inputs.
|
||||||
if self.attn_backend is not None:
|
if self.attn_backend is not None:
|
||||||
@ -477,6 +476,10 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
|||||||
finished_requests_ids: Optional[List[str]] = None) -> None:
|
finished_requests_ids: Optional[List[str]] = None) -> None:
|
||||||
self.finished_requests_ids = finished_requests_ids
|
self.finished_requests_ids = finished_requests_ids
|
||||||
|
|
||||||
|
# if the current batch is decode-only.
|
||||||
|
# will be set to False if there is any non-decode request.
|
||||||
|
self.decode_only = True
|
||||||
|
|
||||||
# Intermediate data (data in CPU before going to GPU) for
|
# Intermediate data (data in CPU before going to GPU) for
|
||||||
# the current sequence group.
|
# the current sequence group.
|
||||||
self.inter_data_list: List[
|
self.inter_data_list: List[
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user