From 344e193b7ddb654c95d1195278edd9e129717979 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 28 Apr 2025 19:09:57 +0800 Subject: [PATCH] [Bugfix] Add missing `get_language_model` to new MLLMs (#17300) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/kimi_vl.py | 3 +++ vllm/model_executor/models/qwen2_5_omni_thinker.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index 8cb8bc22fc0e7..0629266860fd3 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -369,6 +369,9 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal): return self.multi_modal_projector( torch.cat(image_features)).split(lengths) + def get_language_model(self) -> torch.nn.Module: + return self.language_model + def get_multimodal_embeddings(self, **kwargs: object) -> Optional[NestedTensors]: # Validate the multimodal input keyword arguments diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index 93c21fc55c5ca..c3988ee570813 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -809,6 +809,9 @@ class Qwen2_5OmniThinkerForConditionalGeneration( "audio"] = self._parse_and_validate_audio_input(**kwargs) return mm_input_by_modality + def get_language_model(self) -> torch.nn.Module: + return self.language_model + def get_multimodal_embeddings( self, **kwargs: object) -> Optional[MultiModalEmbeddings]: