diff --git a/vllm/multimodal/audio.py b/vllm/multimodal/audio.py
index f3b273eb41e8..d7e9d402a1f9 100644
--- a/vllm/multimodal/audio.py
+++ b/vllm/multimodal/audio.py
@@ -106,7 +106,7 @@ class AudioMediaIO(MediaIO[tuple[npt.NDArray, float]]):
     def load_file(self, filepath: Path) -> tuple[npt.NDArray, float]:
         return librosa.load(filepath, sr=None)
 
-    def encode_base64(self, media: tuple[npt.NDArray, float]) -> str:
+    def encode_base64(self, media: tuple[npt.NDArray, int]) -> str:
         audio, sr = media
 
         with BytesIO() as buffer:
diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py
index e09c97de576e..b308366fca28 100644
--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@@ -310,7 +310,7 @@ class MediaConnector:
 
 def encode_audio_base64(
     audio: np.ndarray,
-    sampling_rate: float,
+    sampling_rate: int,
 ) -> str:
     """Encode audio as base64."""
     audio_io = AudioMediaIO()