Fix hard-coded parameter name in gemma3n.py (#27946)

Signed-off-by: Seungduk Kim <seungduk.kim@yanolja.com>
Signed-off-by: Biswa Panda <biswa.panda@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Biswa Panda <biswa.panda@gmail.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
This commit is contained in:
Seungduk Kim 2025-11-06 16:07:36 +09:00 committed by GitHub
parent a404e2c0f1
commit 201dc98acc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -357,8 +357,27 @@ class Gemma3nAttention(nn.Module):
offset = 2 if self.sliding_window is not None else 1
kv_shared_layer_index = first_kv_shared_layer_idx - offset
if kv_shared_layer_index >= 0:
# Different model wrappers expose layer parameters under
# different parent attributes.
# For example:
# - Gemma3nForCausalLM → parameters live under "model.layers"
# - Gemma3nForConditionalGeneration →
# under "language_model.model.layers"
# This logic extracts the portion of the parameter name
# *before* ".layers."
# so downstream code can consistently reference the correct
# model root regardless of which wrapper class was used.
if ".layers." in prefix:
param_name_before_layers = prefix.split(".layers.")[0]
else:
raise ValueError(
"Unexpected prefix format for Gemma3nAttention: "
f"'{prefix}'. The prefix is expected to contain "
"'.layers.' to correctly determine the KV sharing "
"target layer."
)
# Only the greater layer is required to specify sharing.
kv_sharing_target_layer_name = f"language_model.model.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501
kv_sharing_target_layer_name = f"{param_name_before_layers}.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501
self.rotary_emb = get_rope(
self.head_dim,