mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-28 22:17:13 +08:00
[Core] Eliminate redundant is_encoder_decoder lookups (20-40us/step) (#29800)
Signed-off-by: Wushi Dong <dongws@meta.com>
This commit is contained in:
parent
f5b0846ba0
commit
0037b5746a
@ -2439,16 +2439,13 @@ class GPUModelRunner(
|
||||
]:
|
||||
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
|
||||
is_first_rank = get_pp_group().is_first_rank
|
||||
is_encoder_decoder = self.model_config.is_encoder_decoder
|
||||
|
||||
# _prepare_inputs may reorder the batch, so we must gather multi
|
||||
# modal outputs after that to ensure the correct order
|
||||
ec_connector_output = None
|
||||
|
||||
if (
|
||||
self.supports_mm_inputs
|
||||
and is_first_rank
|
||||
and not self.model_config.is_encoder_decoder
|
||||
):
|
||||
if self.supports_mm_inputs and is_first_rank and not is_encoder_decoder:
|
||||
# Run the multimodal encoder if any.
|
||||
with self.maybe_get_ec_connector_output(
|
||||
scheduler_output,
|
||||
@ -2526,10 +2523,7 @@ class GPUModelRunner(
|
||||
num_input_tokens, intermediate_tensors, True
|
||||
)
|
||||
|
||||
if (
|
||||
self.model_config.is_encoder_decoder
|
||||
and scheduler_output.scheduled_encoder_inputs
|
||||
):
|
||||
if is_encoder_decoder and scheduler_output.scheduled_encoder_inputs:
|
||||
# Run the encoder, just like we do with other multimodal inputs.
|
||||
# For an encoder-decoder model, our processing here is a bit
|
||||
# simpler, because the outputs are just passed to the decoder.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user