mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-05 08:56:41 +08:00
[Fix] improve aspect ratio in dummy image generation and add common VLM tests for PaddleOCR-VL (#28711)
Signed-off-by: dongbo910220 <1275604947@qq.com>
This commit is contained in:
parent
3f8a874065
commit
c934caee88
@ -12,6 +12,7 @@ import pytest
|
|||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoModel,
|
AutoModel,
|
||||||
|
AutoModelForCausalLM,
|
||||||
AutoModelForImageTextToText,
|
AutoModelForImageTextToText,
|
||||||
AutoModelForTextToWaveform,
|
AutoModelForTextToWaveform,
|
||||||
)
|
)
|
||||||
@ -691,6 +692,23 @@ VLM_TEST_SETTINGS = {
|
|||||||
patch_hf_runner=model_utils.ovis2_5_patch_hf_runner,
|
patch_hf_runner=model_utils.ovis2_5_patch_hf_runner,
|
||||||
hf_model_kwargs={"revision": "refs/pr/5"},
|
hf_model_kwargs={"revision": "refs/pr/5"},
|
||||||
),
|
),
|
||||||
|
"paddleocr_vl": VLMTestInfo(
|
||||||
|
models=["PaddlePaddle/PaddleOCR-VL"],
|
||||||
|
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||||
|
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
||||||
|
img_idx_to_prompt=lambda idx: (
|
||||||
|
"<|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>"
|
||||||
|
),
|
||||||
|
multi_image_prompt=(
|
||||||
|
"Image-1: <|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>\n"
|
||||||
|
"Image-2: <|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>\n"
|
||||||
|
"Describe these two images separately."
|
||||||
|
),
|
||||||
|
max_model_len=8192,
|
||||||
|
max_num_seqs=2,
|
||||||
|
auto_cls=AutoModelForCausalLM,
|
||||||
|
image_size_factors=[(), (0.25,)],
|
||||||
|
),
|
||||||
"phi3v": VLMTestInfo(
|
"phi3v": VLMTestInfo(
|
||||||
models=["microsoft/Phi-3.5-vision-instruct"],
|
models=["microsoft/Phi-3.5-vision-instruct"],
|
||||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||||
|
|||||||
@ -232,8 +232,7 @@ class PaddleOCRVLProcessingInfo(BaseProcessingInfo):
|
|||||||
# Find factors of max_num_tokens close to its square root
|
# Find factors of max_num_tokens close to its square root
|
||||||
# to create a dummy image with a reasonable aspect ratio.
|
# to create a dummy image with a reasonable aspect ratio.
|
||||||
h_patches = int(math.sqrt(max_num_tokens))
|
h_patches = int(math.sqrt(max_num_tokens))
|
||||||
while max_num_tokens % h_patches != 0:
|
max_num_tokens -= max_num_tokens % h_patches
|
||||||
h_patches -= 1
|
|
||||||
w_patches = max_num_tokens // h_patches
|
w_patches = max_num_tokens // h_patches
|
||||||
return ImageSize(height=h_patches * factor, width=w_patches * factor)
|
return ImageSize(height=h_patches * factor, width=w_patches * factor)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user