mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-19 00:25:27 +08:00
43 lines
1.5 KiB
Python
43 lines
1.5 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
import pytest
|
|
|
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
|
|
|
from ....conftest import ImageTestAssets
|
|
from ...utils import build_model_context
|
|
|
|
|
|
@pytest.mark.parametrize("model_id", ["google/gemma-3-4b-it"])
|
|
def test_get_image_size_with_most_features(
|
|
image_assets: ImageTestAssets, model_id: str
|
|
):
|
|
ctx = build_model_context(
|
|
model_id,
|
|
mm_processor_kwargs={"do_pan_and_scan": True},
|
|
limit_mm_per_prompt={"image": 1},
|
|
)
|
|
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
|
|
|
|
hf_processor_mm_kwargs: dict[str, object] = {}
|
|
hf_processor = processor.info.get_hf_processor(**hf_processor_mm_kwargs)
|
|
|
|
max_image_size = processor.info.get_image_size_with_most_features()
|
|
max_tokens = processor.info.get_num_image_tokens(
|
|
image_width=max_image_size.width,
|
|
image_height=max_image_size.height,
|
|
processor=hf_processor,
|
|
)
|
|
|
|
prompt = "<start_of_image>"
|
|
image_seq_length = hf_processor.image_seq_length
|
|
|
|
for asset in image_assets:
|
|
mm_data = {"image": [asset.pil_image]}
|
|
processed_inputs = processor.apply(prompt, mm_data, hf_processor_mm_kwargs)
|
|
mm_kwargs_data = processed_inputs["mm_kwargs"].get_data()
|
|
num_patches_tensor = mm_kwargs_data["num_patches"]
|
|
tokens = int(num_patches_tensor.item()) * image_seq_length
|
|
assert tokens <= max_tokens
|