mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-28 20:47:08 +08:00
[Model] Ignore rotary embed load for Cohere model (#17319)
This commit is contained in:
parent
4464109219
commit
97cc8729f0
@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
|
|||||||
loaded_params: Set[str] = set()
|
loaded_params: Set[str] = set()
|
||||||
for name, loaded_weight in weights:
|
for name, loaded_weight in weights:
|
||||||
|
|
||||||
|
# Skip loading rotary embeddings since vLLM has its own
|
||||||
|
if "rotary_emb.inv_freq" in name:
|
||||||
|
continue
|
||||||
|
|
||||||
if (self.quant_config is not None and
|
if (self.quant_config is not None and
|
||||||
(scale_name := self.quant_config.get_cache_scale(name))):
|
(scale_name := self.quant_config.get_cache_scale(name))):
|
||||||
# Loading kv cache quantization scales
|
# Loading kv cache quantization scales
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user