From 0eebd74842d56a2c84374ee2559ef99f9c841774 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Tue, 27 May 2025 04:13:37 +0100 Subject: [PATCH] [Model][Gemma3] Simplify image input validation (#18710) Signed-off-by: Lukas Geiger --- vllm/model_executor/models/gemma3_mm.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index 743542ec8dfa..c4ae5b50c451 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -504,18 +504,12 @@ class Gemma3ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP, return next(self.parameters()).dtype def _validate_pixel_values(self, data: torch.Tensor) -> torch.Tensor: - h = w = self.config.vision_config.image_size - expected_dims = (3, h, w) - - def _validate_shape(d: torch.Tensor): - if d.shape != expected_dims: - raise ValueError( - "The expected shape of pixel values per image per batch " - f"is {expected_dims}. You supplied {tuple(d.shape)}.") - - for d in data: - _validate_shape(d) - + image_size = self.config.vision_config.image_size + expected_dims = (3, image_size, image_size) + if data.shape[1:] != expected_dims: + raise ValueError( + "The expected shape of pixel values per image per batch is " + f"{expected_dims}. You supplied {tuple(data.shape)}.") return data def _parse_and_validate_image_input(