mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:15:51 +08:00
Support GPT-NeoX Models without attention biases (#2301)
This commit is contained in:
parent
358c328d69
commit
4934d49274
@ -54,6 +54,7 @@ class GPTNeoXAttention(nn.Module):
|
|||||||
self.total_num_heads = config.num_attention_heads
|
self.total_num_heads = config.num_attention_heads
|
||||||
self.hidden_size = config.hidden_size
|
self.hidden_size = config.hidden_size
|
||||||
self.head_size = self.hidden_size // self.total_num_heads
|
self.head_size = self.hidden_size // self.total_num_heads
|
||||||
|
self.bias = getattr(config, "attention_bias", True)
|
||||||
|
|
||||||
tensor_model_parallel_world_size = (
|
tensor_model_parallel_world_size = (
|
||||||
get_tensor_model_parallel_world_size())
|
get_tensor_model_parallel_world_size())
|
||||||
@ -65,11 +66,13 @@ class GPTNeoXAttention(nn.Module):
|
|||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
self.head_size,
|
self.head_size,
|
||||||
self.total_num_heads,
|
self.total_num_heads,
|
||||||
|
bias=self.bias,
|
||||||
linear_method=linear_method,
|
linear_method=linear_method,
|
||||||
)
|
)
|
||||||
self.dense = RowParallelLinear(
|
self.dense = RowParallelLinear(
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
|
bias=self.bias,
|
||||||
linear_method=linear_method,
|
linear_method=linear_method,
|
||||||
)
|
)
|
||||||
scaling = self.head_size**-0.5
|
scaling = self.head_size**-0.5
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user