expand coverage of gpt2 model loading (#271)

2025-12-10 07:57:45 +08:00 · 2023-06-27 22:27:41 +09:00 · 2023-06-27 22:27:41 +09:00 · 4026a049d3
commit 4026a049d3
parent 43710e8d09
1 changed files with 4 additions and 2 deletions
--- a/vllm/model_executor/models/gpt2.py
+++ b/vllm/model_executor/models/gpt2.py
@ -228,10 +228,12 @@ class GPT2LMHeadModel(nn.Module):
                # GPT-2 ties the weights of the embedding layer and the final
                # linear layer.
                continue
-            if ".attn.bias" in name:
+            if ".attn.bias" in name or ".attn.masked_bias" in name:
                # Skip attention mask.
                # NOTE: "c_attn.bias" should not be skipped.
                continue
            if not name.startswith("transformer."):
                name = "transformer." + name
            # The HF's GPT-2 implementation uses Conv1D instead of Linear.