diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index bd4d5d0b6b28..507a9206c428 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -273,6 +273,10 @@ class GraniteModel(nn.Module): self.vocab_size, config.hidden_size, org_num_embeddings=config.vocab_size, + padding_size=DEFAULT_VOCAB_PADDING_SIZE + # We need bigger padding if using lora for kernel + # compatibility + if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, ) else: