diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index 25b1d5a1955f5..8f64e5d5c966c 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant): loaded_params: Set[str] = set() for name, loaded_weight in weights: + # Skip loading rotary embeddings since vLLM has its own + if "rotary_emb.inv_freq" in name: + continue + if (self.quant_config is not None and (scale_name := self.quant_config.get_cache_scale(name))): # Loading kv cache quantization scales