diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 9eacd2138978b..ee28f477a26ad 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2439,16 +2439,13 @@ class GPUModelRunner( ]: num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens is_first_rank = get_pp_group().is_first_rank + is_encoder_decoder = self.model_config.is_encoder_decoder # _prepare_inputs may reorder the batch, so we must gather multi # modal outputs after that to ensure the correct order ec_connector_output = None - if ( - self.supports_mm_inputs - and is_first_rank - and not self.model_config.is_encoder_decoder - ): + if self.supports_mm_inputs and is_first_rank and not is_encoder_decoder: # Run the multimodal encoder if any. with self.maybe_get_ec_connector_output( scheduler_output, @@ -2526,10 +2523,7 @@ class GPUModelRunner( num_input_tokens, intermediate_tensors, True ) - if ( - self.model_config.is_encoder_decoder - and scheduler_output.scheduled_encoder_inputs - ): + if is_encoder_decoder and scheduler_output.scheduled_encoder_inputs: # Run the encoder, just like we do with other multimodal inputs. # For an encoder-decoder model, our processing here is a bit # simpler, because the outputs are just passed to the decoder.