diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py index 1ea317c2f95f9..16a97389cd21b 100644 --- a/vllm/model_executor/models/voxtral.py +++ b/vllm/model_executor/models/voxtral.py @@ -585,12 +585,12 @@ class VoxtralForConditionalGeneration(nn.Module, SupportsMultiModal, r"language_model.model.layers.\1.mlp.down_proj"), (r"layers\.(\d+)\.feed_forward\.w3", r"language_model.model.layers.\1.mlp.up_proj"), - (r"mm_whisper_embeddings\.whisper_encoder\.transformer\.layers\.(\d+)\.attention.wo", - r"whisper_encoder.whisper_encoder.layers.\1.layers.self_attn.out_proj" - ), (r"mm_whisper_embeddings\.whisper_encoder\.transformer\.layers\.(\d+)\.attention.w(.*)", r"whisper_encoder.whisper_encoder.layers.\1.layers.self_attn.\2_proj" ), + (r"mm_whisper_embeddings\.whisper_encoder\.transformer\.layers\.(\d+)\.attention.wo", + r"whisper_encoder.whisper_encoder.layers.\1.layers.self_attn.out_proj" + ), (r"mm_whisper_embeddings\.whisper_encoder\.transformer\.layers\.(\d+)\.feed_forward.w(\d+)", r"whisper_encoder.whisper_encoder.layers.\1.layers.mlp.fc\2"), (r"mm_whisper_embeddings\.whisper_encoder\.conv_layers\.0",