mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 01:45:54 +08:00
[Core] Cleanup TPU model runner for MM (#23894)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
9748c5198b
commit
f1bddbd852
@ -808,31 +808,6 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
return per_layer_attn_metadata, logits_indices, padded_num_reqs,\
|
||||
num_reqs, end_index
|
||||
|
||||
def _scatter_placeholders(
|
||||
self,
|
||||
embeds: torch.Tensor,
|
||||
is_embed: Optional[torch.Tensor],
|
||||
) -> torch.Tensor:
|
||||
if is_embed is None:
|
||||
return embeds
|
||||
|
||||
placeholders = embeds.new_full(
|
||||
(is_embed.shape[0], embeds.shape[-1]),
|
||||
fill_value=torch.nan,
|
||||
)
|
||||
placeholders[is_embed] = embeds
|
||||
return placeholders
|
||||
|
||||
def _gather_placeholders(
|
||||
self,
|
||||
placeholders: torch.Tensor,
|
||||
is_embed: Optional[torch.Tensor],
|
||||
) -> torch.Tensor:
|
||||
if is_embed is None:
|
||||
return placeholders
|
||||
|
||||
return placeholders[is_embed]
|
||||
|
||||
def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"):
|
||||
scheduled_encoder_inputs = scheduler_output.scheduled_encoder_inputs
|
||||
if not scheduled_encoder_inputs:
|
||||
@ -892,12 +867,7 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
# NOTE (NickLucche) here we diverge from logic in other runners, as we
|
||||
# assume to only have whole mm items to process. Hence we avoid the
|
||||
# intrinsic dynamism that `scatter_mm_placeholders` introduces.
|
||||
for (mm_hash, pos_info), output in zip(
|
||||
mm_hashes_pos,
|
||||
encoder_outputs,
|
||||
):
|
||||
if req_id not in self.encoder_cache:
|
||||
self.encoder_cache[req_id] = {}
|
||||
for (mm_hash, pos_info), output in zip(mm_hashes_pos, encoder_outputs):
|
||||
assert pos_info.is_embed is None, "Expected all positions to be"\
|
||||
" contiguous and embeddings."
|
||||
self.encoder_cache[mm_hash] = output
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user