From 4b657d32922cb6d3179f901e312715279ad9c728 Mon Sep 17 00:00:00 2001 From: sixgod Date: Sun, 12 Jan 2025 03:05:56 +0800 Subject: [PATCH] [Model] Add cogagent model support vLLM (#11742) Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com> --- vllm/model_executor/models/chatglm.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index ffd6891b25965..7e37ce3086e6b 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -1,6 +1,6 @@ # Adapted from -# https://github.com/THUDM/GLM-4 -"""Inference-only ChatGLM model compatible with THUDM weights.""" +# https://github.com/THUDM/CogAgent +"""Inference-only CogAgent model compatible with THUDM weights.""" from argparse import Namespace from array import array from typing import (Dict, Iterable, List, Mapping, Optional, Set, Tuple, @@ -201,7 +201,6 @@ def input_processor_for_glmv(ctx: InputContext, inputs: DecoderOnlyInputs): new_input_ids = [] final_processed_position = 0 - final_processed_position = 0 for boi_position, eoi_position in zip(boi_positions, eoi_positions): assert boi_position < eoi_position @@ -275,12 +274,15 @@ class GLMAttention(nn.Module): # https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141 rope_ratio = getattr(config, "rope_ratio", 1.0) max_positions = getattr(config, "seq_length", 8192) + # NOTE: THUDM/cogagent-9b-20241220 uses original_rope=False, + # which is equivalent to is_neox_style=True + is_neox_style = not config.original_rope self.rotary_emb = get_rope( self.head_dim, rotary_dim=self.head_dim // 2, max_position=max_positions, base=10000 * rope_ratio, - is_neox_style=False, + is_neox_style=is_neox_style, ) self.attn = Attention(self.num_heads, self.head_dim, @@ -779,4 +781,4 @@ class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP, return ChatGLMV(vllm_config=vllm_config, prefix=prefix) # Initialize LLM else: - return ChatGLM(vllm_config=vllm_config, prefix=prefix) + return ChatGLM(vllm_config=vllm_config, prefix=prefix) \ No newline at end of file