From 4026a049d3ad510bea8e177bf71722bd510fbb46 Mon Sep 17 00:00:00 2001
From: twaka <twaka@users.noreply.github.com>
Date: Tue, 27 Jun 2023 22:27:41 +0900
Subject: [PATCH] expand coverage of gpt2 model loading (#271)

---
 vllm/model_executor/models/gpt2.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py
index 9537daa235ae7..6c2890015c8ce 100644
--- a/vllm/model_executor/models/gpt2.py
+++ b/vllm/model_executor/models/gpt2.py
@@ -228,11 +228,13 @@ class GPT2LMHeadModel(nn.Module):
                 # GPT-2 ties the weights of the embedding layer and the final
                 # linear layer.
                 continue
-            if ".attn.bias" in name:
+            if ".attn.bias" in name or ".attn.masked_bias" in name:
                 # Skip attention mask.
                 # NOTE: "c_attn.bias" should not be skipped.
                 continue
-            name = "transformer." + name
+
+            if not name.startswith("transformer."):
+                name = "transformer." + name
 
             # The HF's GPT-2 implementation uses Conv1D instead of Linear.
             # Because of this, we need to transpose the weights.