diff --git a/docs/source/serving/multimodal_inputs.md b/docs/source/serving/multimodal_inputs.md index d9a093e8d145..bcaa4f9b96cd 100644 --- a/docs/source/serving/multimodal_inputs.md +++ b/docs/source/serving/multimodal_inputs.md @@ -216,7 +216,7 @@ A chat template is **required** to use Chat Completions API. Although most models come with a chat template, for others you have to define one yourself. The chat template can be inferred based on the documentation on the model's HuggingFace repo. -For example, LLaVA-1.5 (`llava-hf/llava-1.5-7b-hf`) requires a chat template that can be found here: +For example, DeepSeek-VL2 requires a chat template that can be found here: ::: ### Image Inputs diff --git a/examples/online_serving/openai_chat_completion_client_for_multimodal.py b/examples/online_serving/openai_chat_completion_client_for_multimodal.py index 70db4d95e649..cffd093c983a 100644 --- a/examples/online_serving/openai_chat_completion_client_for_multimodal.py +++ b/examples/online_serving/openai_chat_completion_client_for_multimodal.py @@ -5,7 +5,7 @@ and run online serving with OpenAI client. Launch the vLLM server with the following command: (single image inference with Llava) -vllm serve llava-hf/llava-1.5-7b-hf --chat-template template_llava.jinja +vllm serve llava-hf/llava-1.5-7b-hf (multi-image inference with Phi-3.5-vision-instruct) vllm serve microsoft/Phi-3.5-vision-instruct --task generate \ diff --git a/examples/template_chameleon.jinja b/examples/template_chameleon.jinja new file mode 100644 index 000000000000..3fa2cccc2406 --- /dev/null +++ b/examples/template_chameleon.jinja @@ -0,0 +1,3 @@ +{%- for message in messages -%} + {{- message['content'] -}} +{%- endfor -%} diff --git a/examples/template_florence2.jinja b/examples/template_florence2.jinja index d257aed6a85b..3fa2cccc2406 100644 --- a/examples/template_florence2.jinja +++ b/examples/template_florence2.jinja @@ -1,7 +1,3 @@ {%- for message in messages -%} - {%- if message['role'] == 'user' -%} - {{- message['content'] -}} - {%- elif message['role'] == 'assistant' -%} - {{- message['content'] -}} - {%- endif -%} + {{- message['content'] -}} {%- endfor -%} diff --git a/examples/template_fuyu.jinja b/examples/template_fuyu.jinja new file mode 100644 index 000000000000..ec337d0c6447 --- /dev/null +++ b/examples/template_fuyu.jinja @@ -0,0 +1,3 @@ +{%- for message in messages -%} + {{- message['content'] + '\n' -}} +{%- endfor -%} diff --git a/examples/template_llava.jinja b/examples/template_llava.jinja deleted file mode 100644 index 6a902ee16772..000000000000 --- a/examples/template_llava.jinja +++ /dev/null @@ -1,23 +0,0 @@ -{%- if messages[0]['role'] == 'system' -%} - {%- set system_message = messages[0]['content'] -%} - {%- set messages = messages[1:] -%} -{%- else -%} - {% set system_message = '' -%} -{%- endif -%} - -{{ bos_token + system_message }} -{%- for message in messages -%} - {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%} - {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} - {%- endif -%} - - {%- if message['role'] == 'user' -%} - {{ 'USER: ' + message['content'] + '\n' }} - {%- elif message['role'] == 'assistant' -%} - {{ 'ASSISTANT: ' + message['content'] + eos_token + '\n' }} - {%- endif -%} -{%- endfor -%} - -{%- if add_generation_prompt -%} - {{ 'ASSISTANT:' }} -{% endif %} diff --git a/examples/template_paligemma.jinja b/examples/template_paligemma.jinja new file mode 100644 index 000000000000..3fa2cccc2406 --- /dev/null +++ b/examples/template_paligemma.jinja @@ -0,0 +1,3 @@ +{%- for message in messages -%} + {{- message['content'] -}} +{%- endfor -%} diff --git a/examples/template_qwen_vl.jinja b/examples/template_qwen_vl.jinja new file mode 100644 index 000000000000..3fa2cccc2406 --- /dev/null +++ b/examples/template_qwen_vl.jinja @@ -0,0 +1,3 @@ +{%- for message in messages -%} + {{- message['content'] -}} +{%- endfor -%} diff --git a/examples/template_qwen_vl_chat.jinja b/examples/template_qwen_vl_chat.jinja new file mode 100644 index 000000000000..e76ab0c2d25a --- /dev/null +++ b/examples/template_qwen_vl_chat.jinja @@ -0,0 +1,10 @@ +{%- for message in messages -%} + {{- '<|im_start|>' + message['role'] + '\n' + message['content'] -}} + {%- if (loop.last and add_generation_prompt) or not loop.last -%} + {{- '<|im_end|>' + '\n' -}} + {%- endif -%} +{%- endfor -%} + +{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%} + {{- '<|im_start|>assistant\n' -}} +{%- endif -%} diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 92c1e0fec6b7..1de30f0ac057 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -900,6 +900,7 @@ def test_resolve_content_format_hf_defined(model, expected_format): [("template_alpaca.jinja", "string"), ("template_baichuan.jinja", "string"), ("template_blip2.jinja", "string"), + ("template_chameleon.jinja", "string"), ("template_chatglm.jinja", "string"), ("template_chatglm2.jinja", "string"), ("template_chatml.jinja", "string"), @@ -908,9 +909,12 @@ def test_resolve_content_format_hf_defined(model, expected_format): ("template_falcon_180b.jinja", "string"), ("template_falcon.jinja", "string"), ("template_florence2.jinja", "string"), + ("template_fuyu.jinja", "string"), ("template_inkbot.jinja", "string"), - ("template_llava.jinja", "string"), + ("template_paligemma.jinja", "string"), ("template_teleflm.jinja", "string"), + ("template_qwen_vl.jinja", "string"), + ("template_qwen_vl_chat.jinja", "string"), ("template_vlm2vec.jinja", "openai"), ("tool_chat_template_granite_20b_fc.jinja", "string"), ("tool_chat_template_hermes.jinja", "string"), diff --git a/tests/v1/tpu/test_multimodal.py b/tests/v1/tpu/test_multimodal.py index dbd2e220451c..8c87fc836b51 100644 --- a/tests/v1/tpu/test_multimodal.py +++ b/tests/v1/tpu/test_multimodal.py @@ -64,8 +64,6 @@ async def test_basic_vision(model_name: str, base64_encoded_image: dict[str, "576", # NOTE: max-num-batched-tokens>=mm_item_size "--disable_chunked_mm_input", - "--chat-template", - "examples/template_llava.jinja" ] # Server will pre-compile on first startup (takes a long time).