From c2a2f19abad77e8a8b97c178c4ea1684c2747348 Mon Sep 17 00:00:00 2001 From: Alex Brooks Date: Sat, 12 Jul 2025 07:11:30 -0600 Subject: [PATCH] [Bugfix] Fix Tensor Parallelism Padding Consistency in Granite Models (#20843) Signed-off-by: Alex-Brooks --- vllm/model_executor/models/granite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index bd4d5d0b6b28..507a9206c428 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -273,6 +273,10 @@ class GraniteModel(nn.Module): self.vocab_size, config.hidden_size, org_num_embeddings=config.vocab_size, + padding_size=DEFAULT_VOCAB_PADDING_SIZE + # We need bigger padding if using lora for kernel + # compatibility + if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, ) else: