mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:57:45 +08:00
expand coverage of gpt2 model loading (#271)
This commit is contained in:
parent
43710e8d09
commit
4026a049d3
@ -228,10 +228,12 @@ class GPT2LMHeadModel(nn.Module):
|
|||||||
# GPT-2 ties the weights of the embedding layer and the final
|
# GPT-2 ties the weights of the embedding layer and the final
|
||||||
# linear layer.
|
# linear layer.
|
||||||
continue
|
continue
|
||||||
if ".attn.bias" in name:
|
if ".attn.bias" in name or ".attn.masked_bias" in name:
|
||||||
# Skip attention mask.
|
# Skip attention mask.
|
||||||
# NOTE: "c_attn.bias" should not be skipped.
|
# NOTE: "c_attn.bias" should not be skipped.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if not name.startswith("transformer."):
|
||||||
name = "transformer." + name
|
name = "transformer." + name
|
||||||
|
|
||||||
# The HF's GPT-2 implementation uses Conv1D instead of Linear.
|
# The HF's GPT-2 implementation uses Conv1D instead of Linear.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user