diff --git a/vllm/multimodal/audio.py b/vllm/multimodal/audio.py index f3b273eb41e8..d7e9d402a1f9 100644 --- a/vllm/multimodal/audio.py +++ b/vllm/multimodal/audio.py @@ -106,7 +106,7 @@ class AudioMediaIO(MediaIO[tuple[npt.NDArray, float]]): def load_file(self, filepath: Path) -> tuple[npt.NDArray, float]: return librosa.load(filepath, sr=None) - def encode_base64(self, media: tuple[npt.NDArray, float]) -> str: + def encode_base64(self, media: tuple[npt.NDArray, int]) -> str: audio, sr = media with BytesIO() as buffer: diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index e09c97de576e..b308366fca28 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -310,7 +310,7 @@ class MediaConnector: def encode_audio_base64( audio: np.ndarray, - sampling_rate: float, + sampling_rate: int, ) -> str: """Encode audio as base64.""" audio_io = AudioMediaIO()