diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index b5dddd5192194..4e7e6c47f0a0b 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -155,7 +155,8 @@ class LlavaForConditionalGeneration(nn.Module, SupportsVision): quant_config=quant_config) logit_scale = getattr(config, "logit_scale", 1.0) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, - config.vocab_size, logit_scale) + config.text_config.vocab_size, + logit_scale) self.sampler = Sampler() def _validate_pixel_values(self, data: torch.Tensor) -> torch.Tensor: diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index 0c89eed88f21a..5abb55c2cc415 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -249,7 +249,8 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsVision): quant_config=quant_config) logit_scale = getattr(config, "logit_scale", 1.0) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, - config.vocab_size, logit_scale) + config.text_config.vocab_size, + logit_scale) self.sampler = Sampler() self.image_newline = nn.Parameter(