diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py index f7a732e3a601c..547884f393eb0 100644 --- a/vllm/model_executor/models/gemma3n.py +++ b/vllm/model_executor/models/gemma3n.py @@ -357,8 +357,27 @@ class Gemma3nAttention(nn.Module): offset = 2 if self.sliding_window is not None else 1 kv_shared_layer_index = first_kv_shared_layer_idx - offset if kv_shared_layer_index >= 0: + # Different model wrappers expose layer parameters under + # different parent attributes. + # For example: + # - Gemma3nForCausalLM → parameters live under "model.layers" + # - Gemma3nForConditionalGeneration → + # under "language_model.model.layers" + # This logic extracts the portion of the parameter name + # *before* ".layers." + # so downstream code can consistently reference the correct + # model root regardless of which wrapper class was used. + if ".layers." in prefix: + param_name_before_layers = prefix.split(".layers.")[0] + else: + raise ValueError( + "Unexpected prefix format for Gemma3nAttention: " + f"'{prefix}'. The prefix is expected to contain " + "'.layers.' to correctly determine the KV sharing " + "target layer." + ) # Only the greater layer is required to specify sharing. - kv_sharing_target_layer_name = f"language_model.model.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501 + kv_sharing_target_layer_name = f"{param_name_before_layers}.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501 self.rotary_emb = get_rope( self.head_dim,