mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 13:35:01 +08:00
[Model] Ignore rotary embed load for Cohere model (#17319)
This commit is contained in:
parent
4464109219
commit
97cc8729f0
@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
|
||||
loaded_params: Set[str] = set()
|
||||
for name, loaded_weight in weights:
|
||||
|
||||
# Skip loading rotary embeddings since vLLM has its own
|
||||
if "rotary_emb.inv_freq" in name:
|
||||
continue
|
||||
|
||||
if (self.quant_config is not None and
|
||||
(scale_name := self.quant_config.get_cache_scale(name))):
|
||||
# Loading kv cache quantization scales
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user