mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 22:55:35 +08:00
Migrate whisper inputs to TensorSchema (#23505)
Signed-off-by: Benji Beck <benjibeck@meta.com>
This commit is contained in:
parent
e9b92dcd89
commit
731a6940e3
@ -4,7 +4,7 @@
|
|||||||
import math
|
import math
|
||||||
from collections.abc import Iterable, Mapping, Sequence
|
from collections.abc import Iterable, Mapping, Sequence
|
||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
from typing import Literal, Optional, TypedDict, Union, cast
|
from typing import Annotated, Literal, Optional, Union, cast
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@ -40,6 +40,7 @@ from vllm.multimodal.processing import (BaseProcessingInfo,
|
|||||||
PromptReplacement, PromptUpdate)
|
PromptReplacement, PromptUpdate)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||||
from vllm.transformers_utils.processor import cached_get_processor
|
from vllm.transformers_utils.processor import cached_get_processor
|
||||||
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
|
|
||||||
from .interfaces import (MultiModalEmbeddings, SupportsMultiModal,
|
from .interfaces import (MultiModalEmbeddings, SupportsMultiModal,
|
||||||
SupportsTranscription, SupportsV0Only)
|
SupportsTranscription, SupportsV0Only)
|
||||||
@ -111,9 +112,16 @@ ISO639_1_SUPPORTED_LANGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WhisperAudioInputs(TypedDict):
|
class WhisperAudioInputs(TensorSchema):
|
||||||
input_features: NestedTensors
|
"""
|
||||||
"""Shape: `(batch_size, 128, M)`"""
|
Dimensions:
|
||||||
|
- b: Batch size
|
||||||
|
- nmb: Number of mel bins
|
||||||
|
- t: Time frames (M)
|
||||||
|
"""
|
||||||
|
|
||||||
|
input_features: Annotated[Optional[NestedTensors],
|
||||||
|
TensorShape("b", "nmb", "t")]
|
||||||
|
|
||||||
|
|
||||||
class WhisperPositionalEmbedding(nn.Embedding):
|
class WhisperPositionalEmbedding(nn.Embedding):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user