diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index bb0f6bd036f1..26a8355cd22b 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -116,7 +116,12 @@ class UltravoxProcessingInfo(BaseProcessingInfo): def get_feature_extractor(self, **kwargs: object) -> WhisperFeatureExtractor: hf_processor = self.get_hf_processor(**kwargs) + + # Changed in https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_2-1b/commit/9a3c571b8fdaf1e66dd3ea61bbcb6db5c70a438e audio_processor = hf_processor.audio_processor # type: ignore + if isinstance(audio_processor, WhisperFeatureExtractor): + return audio_processor + feature_extractor = audio_processor.feature_extractor # type: ignore assert isinstance(feature_extractor, WhisperFeatureExtractor) return feature_extractor