mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 01:57:45 +08:00
Skip MM Encoder for non-first PP ranks (#24387)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
0661cb9df3
commit
2e5d21378d
@ -1620,14 +1620,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
|
|
||||||
# _prepare_inputs may reorder the batch, so we must gather multi
|
# _prepare_inputs may reorder the batch, so we must gather multi
|
||||||
# modal outputs after that to ensure the correct order
|
# modal outputs after that to ensure the correct order
|
||||||
if self.supports_mm_inputs:
|
if self.supports_mm_inputs and get_pp_group().is_first_rank:
|
||||||
# Run the multimodal encoder if any.
|
# Run the multimodal encoder if any.
|
||||||
self._execute_mm_encoder(scheduler_output)
|
self._execute_mm_encoder(scheduler_output)
|
||||||
mm_embeds = self._gather_mm_embeddings(scheduler_output)
|
mm_embeds = self._gather_mm_embeddings(scheduler_output)
|
||||||
else:
|
|
||||||
mm_embeds = []
|
|
||||||
|
|
||||||
if self.supports_mm_inputs and get_pp_group().is_first_rank:
|
|
||||||
# NOTE(woosuk): To unify token ids and soft tokens (vision
|
# NOTE(woosuk): To unify token ids and soft tokens (vision
|
||||||
# embeddings), we always use embeddings (rather than token ids)
|
# embeddings), we always use embeddings (rather than token ids)
|
||||||
# as input to the multimodal model, even when the input is text.
|
# as input to the multimodal model, even when the input is text.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user