mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:05:02 +08:00
[Bugfix] Fix Qwen Omni audio inference (#27920)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
758ea2e980
commit
853a8eb53b
@ -130,6 +130,8 @@ class Qwen2_5OmniAudioFeatureInputs(TensorSchema):
|
||||
TensorShape("nmb", "tsl", dynamic_dims={"tsl"}),
|
||||
]
|
||||
|
||||
audio_feature_lengths: Annotated[torch.Tensor, TensorShape("na")]
|
||||
|
||||
feature_attention_mask: Annotated[
|
||||
torch.Tensor | list[torch.Tensor],
|
||||
TensorShape("na", "msl", dynamic_dims={"msl"}),
|
||||
@ -732,13 +734,6 @@ class Qwen2_5OmniConditionalGenerationMixin:
|
||||
input_features = audio_input["input_features"]
|
||||
audio_feature_lengths = audio_input["audio_feature_lengths"]
|
||||
|
||||
if audio_feature_lengths.shape[0] == 1:
|
||||
audio_feature_lengths = audio_feature_lengths.squeeze(0)
|
||||
elif audio_feature_lengths.shape[1] == 1:
|
||||
audio_feature_lengths = audio_feature_lengths.squeeze(1)
|
||||
else:
|
||||
raise AssertionError(audio_feature_lengths.shape)
|
||||
|
||||
audio_feat_lengths, audio_output_lengths = (
|
||||
self.audio_tower._get_feat_extract_output_lengths(audio_feature_lengths)
|
||||
)
|
||||
|
||||
@ -99,7 +99,6 @@ from .utils import (
|
||||
AutoWeightsLoader,
|
||||
WeightsMapper,
|
||||
_merge_multimodal_embeddings,
|
||||
flatten_bn,
|
||||
maybe_prefix,
|
||||
)
|
||||
from .vision import (
|
||||
@ -1065,8 +1064,6 @@ class Qwen3OmniMoeConditionalGenerationMixin(Qwen2_5OmniConditionalGenerationMix
|
||||
input_features = audio_input["input_features"]
|
||||
audio_feature_lengths = audio_input["audio_feature_lengths"]
|
||||
|
||||
audio_feature_lengths = flatten_bn(audio_feature_lengths, concat=True)
|
||||
|
||||
audio_feat_lengths, audio_output_lengths = _get_feat_extract_output_lengths(
|
||||
audio_feature_lengths
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user