mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 22:28:44 +08:00
[Bugfix] Fix GLM rotary_dim issue and support v1 (#16912)
Signed-off-by: isotr0py <2037008807@qq.com>
This commit is contained in:
parent
7272bfae77
commit
55d6d3fdb8
@ -3,13 +3,13 @@
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.model_executor.models.llama import LlamaForCausalLM
|
||||
|
||||
from .interfaces import SupportsV0Only
|
||||
from .utils import PPMissingLayer
|
||||
|
||||
|
||||
class GlmForCausalLM(LlamaForCausalLM, SupportsV0Only):
|
||||
class GlmForCausalLM(LlamaForCausalLM):
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
vllm_config.model_config.hf_config.partial_rotary_factor = 0.5
|
||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||
# Hack Llama model to fit HF format GLM implementation
|
||||
# Attention difference between GLM and Llama:
|
||||
@ -17,7 +17,6 @@ class GlmForCausalLM(LlamaForCausalLM, SupportsV0Only):
|
||||
# 2. There is no bias for o_proj in attention
|
||||
for layer in self.model.layers:
|
||||
if not isinstance(layer, PPMissingLayer):
|
||||
layer.self_attn.rotary_emb.rotary_dim //= 2
|
||||
layer.self_attn.rotary_emb.is_neox_style = False
|
||||
layer.self_attn.o_proj.bias = None
|
||||
layer.self_attn.o_proj.skip_bias_add = True
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user