expand coverage of gpt2 model loading (#271)

2025-12-10 05:25:00 +08:00 · 2023-06-27 22:27:41 +09:00 · 2023-06-27 22:27:41 +09:00 · 4026a049d3
commit 4026a049d3
parent 43710e8d09
1 changed files with 4 additions and 2 deletions
--- a/vllm/model_executor/models/gpt2.py
+++ b/vllm/model_executor/models/gpt2.py
@ -228,11 +228,13 @@ class GPT2LMHeadModel(nn.Module):
                # GPT-2 ties the weights of the embedding layer and the final
                # linear layer.
                continue
-            if ".attn.bias" in name:
+            if ".attn.bias" in name or ".attn.masked_bias" in name:
                # Skip attention mask.
                # NOTE: "c_attn.bias" should not be skipped.
                continue
-            name = "transformer." + name
+
+            if not name.startswith("transformer."):
+                name = "transformer." + name

            # The HF's GPT-2 implementation uses Conv1D instead of Linear.
            # Because of this, we need to transpose the weights.