diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index 712667b1e274..ed9d6c0ab4ce 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -709,6 +709,13 @@ def flatten_and_concat(x: list[torch.Tensor]) -> torch.Tensor: MultiModalProcessor, info=MultiModalProcessingInfo, dummy_inputs=MultiModalDummyInputsBuilder) +@support_torch_compile( + dynamic_arg_dims={ + "input_ids": 0, + "positions": -1, + "intermediate_tensors": 0, + "inputs_embeds": 0, + }) # set `positions` to last dim to support Qwen-mrope class TransformersForMultimodalLM(TransformersForCausalLM, SupportsMultiModal): # Backwards compatibility for prev released models. State dicts back then # had different formats and cannot be loaded with `AutoModel` mapping as is