diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py
index b2fc7be1af224..5d999a02b4e65 100644
--- a/vllm/model_executor/models/molmo.py
+++ b/vllm/model_executor/models/molmo.py
@@ -76,20 +76,22 @@ class MolmoImageInputs(TensorSchema):
     """
     Dimensions:
         - bn: Batch size * number of images
-        - nc: Number of crops
+        - nc: Number of crops (dynamic)
         - np: Number of patches
+        - tp: Token sequence positions
         - pd: Patch dimension
     """
     images: Annotated[Union[torch.Tensor, list[torch.Tensor]],
-                      TensorShape("bn", "nc", "np", "pd")]
+                      TensorShape("bn", "nc", "np", "pd", dynamic_dims={"nc"})]
+    # Number of crops may vary per batch and image, so pass it as a list.
 
     image_masks: Annotated[Optional[Union[torch.Tensor, list[torch.Tensor]]],
-                           TensorShape("bn", "nc", "np")]
+                           TensorShape("bn", "nc", "np", dynamic_dims={"nc"})]
 
-    feat_is_patch: Annotated[Union[torch.Tensor, list[torch.Tensor]],
-                             TensorShape("bn", "nc", "np")]
+    feat_is_patch: Annotated[
+        Union[torch.Tensor, list[torch.Tensor]],
+        TensorShape("bn", "nc", "tp", dynamic_dims={"nc"})]
     # A boolean mask indicating which image features correspond to patch tokens.
-
     num_crops: Annotated[torch.Tensor, TensorShape("bn")]