[Core] Eliminate redundant is_encoder_decoder lookups (20-40us/step) (#29800)

Signed-off-by: Wushi Dong <dongws@meta.com>
This commit is contained in:
Wushi Dong 2025-12-01 23:08:07 -08:00 committed by GitHub
parent f5b0846ba0
commit 0037b5746a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2439,16 +2439,13 @@ class GPUModelRunner(
]:
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
is_first_rank = get_pp_group().is_first_rank
is_encoder_decoder = self.model_config.is_encoder_decoder
# _prepare_inputs may reorder the batch, so we must gather multi
# modal outputs after that to ensure the correct order
ec_connector_output = None
if (
self.supports_mm_inputs
and is_first_rank
and not self.model_config.is_encoder_decoder
):
if self.supports_mm_inputs and is_first_rank and not is_encoder_decoder:
# Run the multimodal encoder if any.
with self.maybe_get_ec_connector_output(
scheduler_output,
@ -2526,10 +2523,7 @@ class GPUModelRunner(
num_input_tokens, intermediate_tensors, True
)
if (
self.model_config.is_encoder_decoder
and scheduler_output.scheduled_encoder_inputs
):
if is_encoder_decoder and scheduler_output.scheduled_encoder_inputs:
# Run the encoder, just like we do with other multimodal inputs.
# For an encoder-decoder model, our processing here is a bit
# simpler, because the outputs are just passed to the decoder.