diff --git a/vllm/model_executor/models/florence2.py b/vllm/model_executor/models/florence2.py index 4a8cb35a54dc8..d0881231fb1e7 100644 --- a/vllm/model_executor/models/florence2.py +++ b/vllm/model_executor/models/florence2.py @@ -647,7 +647,8 @@ class Florence2LanguageModel(nn.Module): encoder_hidden_states = None - if inputs_embeds is not None or encoder_input_ids.numel() > 0: + if ((inputs_embeds is not None and inputs_embeds.numel() > 0) + or encoder_input_ids.numel() > 0): # Run encoder attention if a non-zero number of encoder tokens # are provided as input encoder_hidden_states = self.encoder(input_ids=encoder_input_ids, @@ -681,6 +682,8 @@ class Florence2LanguageForConditionalGeneration(nn.Module, SupportsV0Only): self.lm_head = BartParallelLMHead(self.vocab_size, config.d_model, embed_scale=embed_scale) + if self.config.tie_word_embeddings: + self.lm_head.tie_weights(self.model.shared) self.logits_processor = LogitsProcessor(self.vocab_size, config.vocab_size) @@ -749,7 +752,8 @@ class Florence2LanguageForConditionalGeneration(nn.Module, SupportsV0Only): else: if "final_logits_bias" in name: continue - if self.config.tie_word_embeddings and "embed_tokens" in name: + if self.config.tie_word_embeddings and ("embed_tokens" in name + or "lm_head" in name): continue param = params_dict[name] weight_loader = getattr(param, "weight_loader",