diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index 29d5a85af6132..72e616656775e 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -104,6 +104,35 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI, assert message.content is not None and len(message.content) >= 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("audio_url", [TEST_AUDIO_URLS[0]]) +async def test_error_on_invalid_audio_url_type(client: openai.AsyncOpenAI, + model_name: str, + audio_url: str): + messages = [{ + "role": + "user", + "content": [ + { + "type": "audio_url", + "audio_url": audio_url + }, + { + "type": "text", + "text": "What's happening in this audio?" + }, + ], + }] + + # audio_url should be a dict {"url": "some url"}, not directly a string + with pytest.raises(openai.BadRequestError): + _ = await client.chat.completions.create(model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0) + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("audio_url", [TEST_AUDIO_URLS[0]]) diff --git a/tests/entrypoints/openai/test_video.py b/tests/entrypoints/openai/test_video.py index 8679c2f25db40..53f057a294c0a 100644 --- a/tests/entrypoints/openai/test_video.py +++ b/tests/entrypoints/openai/test_video.py @@ -108,6 +108,35 @@ async def test_single_chat_session_video(client: openai.AsyncOpenAI, assert message.content is not None and len(message.content) >= 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS) +async def test_error_on_invalid_video_url_type(client: openai.AsyncOpenAI, + model_name: str, + video_url: str): + messages = [{ + "role": + "user", + "content": [ + { + "type": "video_url", + "video_url": video_url + }, + { + "type": "text", + "text": "What's in this video?" + }, + ], + }] + + # video_url should be a dict {"url": "some url"}, not directly a string + with pytest.raises(openai.BadRequestError): + _ = await client.chat.completions.create(model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0) + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("video_url", TEST_VIDEO_URLS) diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index 87b5cee73ecb0..1ab50b41c7ecb 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -137,6 +137,36 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI, assert message.content is not None and len(message.content) >= 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) +async def test_error_on_invalid_image_url_type(client: openai.AsyncOpenAI, + model_name: str, + image_url: str): + content_text = "What's in this image?" + messages = [{ + "role": + "user", + "content": [ + { + "type": "image_url", + "image_url": image_url + }, + { + "type": "text", + "text": content_text + }, + ], + }] + + # image_url should be a dict {"url": "some url"}, not directly a string + with pytest.raises(openai.BadRequestError): + _ = await client.chat.completions.create(model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0) + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 2e19ebcdd61f3..bd2c3357cdc00 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -27,10 +27,11 @@ from openai.types.chat import (ChatCompletionMessageToolCallParam, ChatCompletionToolMessageParam) from openai.types.chat.chat_completion_content_part_input_audio_param import ( InputAudio) +from pydantic import TypeAdapter # yapf: enable -# pydantic needs the TypedDict from typing_extensions from transformers import (PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin) +# pydantic needs the TypedDict from typing_extensions from typing_extensions import Required, TypeAlias, TypedDict from vllm.config import ModelConfig @@ -879,12 +880,13 @@ def _get_full_multimodal_text_prompt(placeholder_counts: dict[str, int], # No need to validate using Pydantic again _TextParser = partial(cast, ChatCompletionContentPartTextParam) -_ImageParser = partial(cast, ChatCompletionContentPartImageParam) _ImageEmbedsParser = partial(cast, ChatCompletionContentPartImageEmbedsParam) -_AudioParser = partial(cast, ChatCompletionContentPartAudioParam) _InputAudioParser = partial(cast, ChatCompletionContentPartInputAudioParam) _RefusalParser = partial(cast, ChatCompletionContentPartRefusalParam) -_VideoParser = partial(cast, ChatCompletionContentPartVideoParam) +# Need to validate url objects +_ImageParser = TypeAdapter(ChatCompletionContentPartImageParam).validate_python +_AudioParser = TypeAdapter(ChatCompletionContentPartAudioParam).validate_python +_VideoParser = TypeAdapter(ChatCompletionContentPartVideoParam).validate_python _ContentPart: TypeAlias = Union[str, dict[str, str], InputAudio]