[Model] Use in-place adds in SigLIP (#18922)

Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
Lukas Geiger 2025-05-30 10:12:59 +01:00 committed by GitHub
parent 8f8900cee9
commit c3bb9f2331
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -130,11 +130,10 @@ class SiglipVisionEmbeddings(nn.Module):
embeddings = patch_embeds.flatten(2).transpose(1, 2)
if interpolate_pos_encoding:
embeddings = embeddings + self.interpolate_pos_encoding(
embeddings += self.interpolate_pos_encoding(
embeddings, height, width)
else:
embeddings = embeddings + self.position_embedding(
self.position_ids)
embeddings += self.position_embedding(self.position_ids)
return embeddings
@ -271,12 +270,12 @@ class SiglipEncoderLayer(nn.Module):
hidden_states = self.layer_norm1(hidden_states)
hidden_states, _ = self.self_attn(hidden_states=hidden_states)
hidden_states = residual + hidden_states
hidden_states += residual
residual = hidden_states
hidden_states = self.layer_norm2(hidden_states)
hidden_states = self.mlp(hidden_states)
hidden_states = residual + hidden_states
hidden_states += residual
return hidden_states, None
@ -354,7 +353,8 @@ class SiglipMultiheadAttentionPoolingHead(nn.Module):
residual = hidden_state
hidden_state = self.layernorm(hidden_state)
hidden_state = residual + self.mlp(hidden_state)
hidden_state = self.mlp(hidden_state)
hidden_state += residual
return hidden_state[:, 0]