[Model] Ignore rotary embed load for Cohere model (#17319)

This commit is contained in:
Ekagra Ranjan 2025-04-29 03:30:40 -04:00 committed by GitHub
parent 4464109219
commit 97cc8729f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
loaded_params: Set[str] = set()
for name, loaded_weight in weights:
# Skip loading rotary embeddings since vLLM has its own
if "rotary_emb.inv_freq" in name:
continue
if (self.quant_config is not None and
(scale_name := self.quant_config.get_cache_scale(name))):
# Loading kv cache quantization scales