[Model] Future-proof Qwen2-Audio multi-modal processor (#11776)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-01-07 11:05:17 +08:00 committed by GitHub
parent 08fb75c72e
commit d0169e1b0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -227,12 +227,14 @@ class Qwen2AudioMultiModalProcessor(Qwen2AudioProcessingMixin,
] ]
def _always_apply_prompt_replacements(self) -> bool: def _always_apply_prompt_replacements(self) -> bool:
# HF never applies prompt replacements, so we have to do it ourselves. # Qwen2-Audio processor will start inserting placeholder tokens
# in an upcoming release:
# https://github.com/huggingface/transformers/pull/35534
# NOTE: `_find_placeholders_by_modality` may incorrectly think that HF # NOTE: `_find_placeholders_by_modality` may incorrectly think that HF
# has already performed processing for multi-audio input when the input # has already performed processing for multi-audio input when the input
# audios are short (the corresponding placeholders may take up fewer # audios are short (the corresponding placeholders may take up fewer
# tokens than the number of audio items) # tokens than the number of audio items)
return True return not hasattr(self._get_hf_processor(), "audio_token")
@MULTIMODAL_REGISTRY.register_processor(Qwen2AudioMultiModalProcessor) @MULTIMODAL_REGISTRY.register_processor(Qwen2AudioMultiModalProcessor)