mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 00:05:48 +08:00
[Quantization]add prefix for commandA quantized model (#17017)
This commit is contained in:
parent
b07d741661
commit
6b2427f995
@ -89,6 +89,7 @@ class CohereMLP(nn.Module):
|
|||||||
self,
|
self,
|
||||||
config: CohereConfig,
|
config: CohereConfig,
|
||||||
quant_config: Optional[QuantizationConfig] = None,
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
|
prefix: str = "",
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.config = config
|
self.config = config
|
||||||
@ -99,12 +100,14 @@ class CohereMLP(nn.Module):
|
|||||||
[self.intermediate_size] * 2,
|
[self.intermediate_size] * 2,
|
||||||
bias=False,
|
bias=False,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.gate_up_proj",
|
||||||
)
|
)
|
||||||
self.down_proj = RowParallelLinear(
|
self.down_proj = RowParallelLinear(
|
||||||
self.intermediate_size,
|
self.intermediate_size,
|
||||||
self.hidden_size,
|
self.hidden_size,
|
||||||
bias=False,
|
bias=False,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.down_proj",
|
||||||
)
|
)
|
||||||
self.act_fn = SiluAndMul()
|
self.act_fn = SiluAndMul()
|
||||||
|
|
||||||
@ -158,12 +161,14 @@ class CohereAttention(nn.Module):
|
|||||||
self.total_num_kv_heads,
|
self.total_num_kv_heads,
|
||||||
bias=False,
|
bias=False,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.qkv_proj",
|
||||||
)
|
)
|
||||||
self.o_proj = RowParallelLinear(
|
self.o_proj = RowParallelLinear(
|
||||||
self.total_num_heads * self.head_dim,
|
self.total_num_heads * self.head_dim,
|
||||||
self.hidden_size,
|
self.hidden_size,
|
||||||
bias=False,
|
bias=False,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.o_proj",
|
||||||
)
|
)
|
||||||
self.rotary_emb = get_rope(
|
self.rotary_emb = get_rope(
|
||||||
self.head_dim,
|
self.head_dim,
|
||||||
@ -244,7 +249,9 @@ class CohereDecoderLayer(nn.Module):
|
|||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
prefix=f"{prefix}.self_attn")
|
prefix=f"{prefix}.self_attn")
|
||||||
|
|
||||||
self.mlp = CohereMLP(config, quant_config=quant_config)
|
self.mlp = CohereMLP(config,
|
||||||
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.mlp")
|
||||||
self.input_layernorm = LayerNorm(param_shape=(config.hidden_size),
|
self.input_layernorm = LayerNorm(param_shape=(config.hidden_size),
|
||||||
eps=config.layer_norm_eps)
|
eps=config.layer_norm_eps)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user