mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 10:35:01 +08:00
42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
import numpy as np
|
|
import numpy.typing as npt
|
|
|
|
from vllm.inputs.registry import InputContext
|
|
|
|
from .base import MultiModalPlugin
|
|
from .inputs import AudioItem, MultiModalData, MultiModalKwargs
|
|
|
|
|
|
class AudioPlugin(MultiModalPlugin):
|
|
"""Plugin for audio data."""
|
|
|
|
def get_data_key(self) -> str:
|
|
return "audio"
|
|
|
|
def _default_input_mapper(
|
|
self,
|
|
ctx: InputContext,
|
|
data: MultiModalData[AudioItem],
|
|
**mm_processor_kwargs,
|
|
) -> MultiModalKwargs:
|
|
raise NotImplementedError("There is no default audio input mapper")
|
|
|
|
def _default_max_multimodal_tokens(self, ctx: InputContext) -> int:
|
|
raise NotImplementedError(
|
|
"There is no default maximum multimodal tokens")
|
|
|
|
|
|
def resample_audio(
|
|
audio: npt.NDArray[np.floating],
|
|
*,
|
|
orig_sr: float,
|
|
target_sr: float,
|
|
) -> npt.NDArray[np.floating]:
|
|
try:
|
|
import librosa
|
|
except ImportError as exc:
|
|
msg = "Please install vllm[audio] for audio support."
|
|
raise ImportError(msg) from exc
|
|
|
|
return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
|