From 7c3a0741c67007dd759f52d07f7aca854628b81b Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Thu, 14 Aug 2025 17:35:43 +0800 Subject: [PATCH] [Bugfix] Fix `PixtralHFImagePixelInputs` dynamic shape check (#22827) Signed-off-by: Isotr0py --- tests/models/multimodal/test_tensor_schema.py | 2 +- vllm/model_executor/models/llava.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/models/multimodal/test_tensor_schema.py b/tests/models/multimodal/test_tensor_schema.py index a4cb1a68833a..92390d8c2f7e 100644 --- a/tests/models/multimodal/test_tensor_schema.py +++ b/tests/models/multimodal/test_tensor_schema.py @@ -153,4 +153,4 @@ def test_model_tensor_schema(model_arch: str, vllm_runner: type[VllmRunner], if hasattr(model, method_name): getattr(model, method_name)(**mm_kwargs) - vllm_model.apply_model(validate_model_input) + vllm_model.apply_model(validate_model_input) \ No newline at end of file diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 89d2817b57e0..4927d6b62c6d 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -72,8 +72,9 @@ class PixtralHFImagePixelInputs(TensorSchema): in which case the data is passed as a list instead of a batched tensor. """ type: Literal["pixel_values_pixtral"] = "pixel_values_pixtral" - pixel_values: Annotated[Union[torch.Tensor, list[torch.Tensor]], - TensorShape("bn", "c", "h", "w")] + pixel_values: Annotated[ + Union[torch.Tensor, list[torch.Tensor]], + TensorShape("bn", "c", "h", "w", dynamic_dims={"h", "w"})] class LlavaImageEmbeddingInputs(TensorSchema):