diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index 84a5260ad9a08..a0f1229f0af5a 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -9,7 +9,8 @@ from pathlib import PosixPath import pytest from packaging.version import Version -from transformers import AutoModelForPreTraining, AutoModelForVision2Seq +from transformers import (AutoModelForImageTextToText, AutoModelForPreTraining, + AutoModelForVision2Seq) from transformers import __version__ as TRANSFORMERS_VERSION from vllm.platforms import current_platform @@ -163,6 +164,7 @@ VLM_TEST_SETTINGS = { img_idx_to_prompt=lambda idx: "<|img|>\n", max_model_len=4096, max_num_seqs=2, + auto_cls=AutoModelForImageTextToText, single_image_prompts=IMAGE_ASSETS.prompts({ "stop_sign": "Please describe the image shortly.", "cherry_blossom": "Please infer the season with reason.",