vllm/tests/v1/tpu/test_multimodal.py
Cyrus Leung bb62dda2c3
[Misc] Introduce encode_*_url utility function (#31208)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-12-23 13:45:21 +00:00

77 lines
2.5 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import openai
import pytest
from vllm.multimodal.utils import encode_image_url
from vllm.platforms import current_platform
from ...entrypoints.openai.test_vision import TEST_IMAGE_ASSETS
from ...utils import RemoteOpenAIServer
@pytest.fixture(scope="session")
def url_encoded_image(local_asset_server) -> dict[str, str]:
return {
image_asset: encode_image_url(local_asset_server.get_image_asset(image_asset))
for image_asset in TEST_IMAGE_ASSETS
}
@pytest.mark.asyncio
@pytest.mark.skipif(not current_platform.is_tpu(), reason="This test needs a TPU")
@pytest.mark.parametrize("model_name", ["llava-hf/llava-1.5-7b-hf"])
async def test_basic_vision(model_name: str, url_encoded_image: dict[str, str]):
pytest.skip("Skip this test until it's fixed.")
def whats_in_this_image_msg(url):
return [
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": url}},
],
}
]
server_args = [
"--max-model-len",
"1024",
"--max-num-seqs",
"16",
"--gpu-memory-utilization",
"0.95",
"--trust-remote-code",
"--max-num-batched-tokens",
"576",
# NOTE: max-num-batched-tokens>=mm_item_size
"--disable_chunked_mm_input",
]
# Server will pre-compile on first startup (takes a long time).
with RemoteOpenAIServer(
model_name, server_args, max_wait_seconds=600
) as remote_server:
client: openai.AsyncOpenAI = remote_server.get_async_client()
# Other requests now should be much faster
for image_url in TEST_IMAGE_ASSETS:
image_url = url_encoded_image[image_url]
chat_completion_from_url = await client.chat.completions.create(
model=model_name,
messages=whats_in_this_image_msg(image_url),
max_completion_tokens=24,
temperature=0.0,
)
result = chat_completion_from_url
assert result
choice = result.choices[0]
assert choice.finish_reason == "length"
message = choice.message
message = result.choices[0].message
assert message.content is not None and len(message.content) >= 10
assert message.role == "assistant"