Fix weights loading for Apertus (#24100)

Signed-off-by: Nathan Ranchin <nranchin@student.ethz.ch>
This commit is contained in:
nathan 2025-09-02 20:34:28 +02:00 committed by GitHub
parent 2417798471
commit 598bd74cf8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -415,6 +415,12 @@ class ApertusModel(nn.Module):
(".qkv_proj", ".v_proj", "v"),
]
params_dict = dict(self.named_parameters())
# we need to load the buffers for beta and eps (XIELU)
for name, buffer in self.named_buffers():
if name.endswith(".beta") or name.endswith(".eps"):
params_dict[name] = buffer
loaded_params: set[str] = set()
for name, loaded_weight in weights:
if "rotary_emb.inv_freq" in name: