[Frontend] Add missing chat templates for various MLLMs (#17758)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-05-07 15:10:01 +08:00 committed by GitHub
parent 043e4c4955
commit 8a15c2603a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 30 additions and 33 deletions

View File

@ -216,7 +216,7 @@ A chat template is **required** to use Chat Completions API.
Although most models come with a chat template, for others you have to define one yourself. Although most models come with a chat template, for others you have to define one yourself.
The chat template can be inferred based on the documentation on the model's HuggingFace repo. The chat template can be inferred based on the documentation on the model's HuggingFace repo.
For example, LLaVA-1.5 (`llava-hf/llava-1.5-7b-hf`) requires a chat template that can be found here: <gh-file:examples/template_llava.jinja> For example, DeepSeek-VL2 requires a chat template that can be found here: <gh-file:examples/template_deepseek_vl2.jinja>
::: :::
### Image Inputs ### Image Inputs

View File

@ -5,7 +5,7 @@ and run online serving with OpenAI client.
Launch the vLLM server with the following command: Launch the vLLM server with the following command:
(single image inference with Llava) (single image inference with Llava)
vllm serve llava-hf/llava-1.5-7b-hf --chat-template template_llava.jinja vllm serve llava-hf/llava-1.5-7b-hf
(multi-image inference with Phi-3.5-vision-instruct) (multi-image inference with Phi-3.5-vision-instruct)
vllm serve microsoft/Phi-3.5-vision-instruct --task generate \ vllm serve microsoft/Phi-3.5-vision-instruct --task generate \

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] -}}
{%- endfor -%}

View File

@ -1,7 +1,3 @@
{%- for message in messages -%} {%- for message in messages -%}
{%- if message['role'] == 'user' -%} {{- message['content'] -}}
{{- message['content'] -}}
{%- elif message['role'] == 'assistant' -%}
{{- message['content'] -}}
{%- endif -%}
{%- endfor -%} {%- endfor -%}

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] + '\n' -}}
{%- endfor -%}

View File

@ -1,23 +0,0 @@
{%- if messages[0]['role'] == 'system' -%}
{%- set system_message = messages[0]['content'] -%}
{%- set messages = messages[1:] -%}
{%- else -%}
{% set system_message = '' -%}
{%- endif -%}
{{ bos_token + system_message }}
{%- for message in messages -%}
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
{%- endif -%}
{%- if message['role'] == 'user' -%}
{{ 'USER: ' + message['content'] + '\n' }}
{%- elif message['role'] == 'assistant' -%}
{{ 'ASSISTANT: ' + message['content'] + eos_token + '\n' }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{ 'ASSISTANT:' }}
{% endif %}

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] -}}
{%- endfor -%}

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] -}}
{%- endfor -%}

View File

@ -0,0 +1,10 @@
{%- for message in messages -%}
{{- '<|im_start|>' + message['role'] + '\n' + message['content'] -}}
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
{{- '<|im_end|>' + '\n' -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- '<|im_start|>assistant\n' -}}
{%- endif -%}

View File

@ -900,6 +900,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
[("template_alpaca.jinja", "string"), [("template_alpaca.jinja", "string"),
("template_baichuan.jinja", "string"), ("template_baichuan.jinja", "string"),
("template_blip2.jinja", "string"), ("template_blip2.jinja", "string"),
("template_chameleon.jinja", "string"),
("template_chatglm.jinja", "string"), ("template_chatglm.jinja", "string"),
("template_chatglm2.jinja", "string"), ("template_chatglm2.jinja", "string"),
("template_chatml.jinja", "string"), ("template_chatml.jinja", "string"),
@ -908,9 +909,12 @@ def test_resolve_content_format_hf_defined(model, expected_format):
("template_falcon_180b.jinja", "string"), ("template_falcon_180b.jinja", "string"),
("template_falcon.jinja", "string"), ("template_falcon.jinja", "string"),
("template_florence2.jinja", "string"), ("template_florence2.jinja", "string"),
("template_fuyu.jinja", "string"),
("template_inkbot.jinja", "string"), ("template_inkbot.jinja", "string"),
("template_llava.jinja", "string"), ("template_paligemma.jinja", "string"),
("template_teleflm.jinja", "string"), ("template_teleflm.jinja", "string"),
("template_qwen_vl.jinja", "string"),
("template_qwen_vl_chat.jinja", "string"),
("template_vlm2vec.jinja", "openai"), ("template_vlm2vec.jinja", "openai"),
("tool_chat_template_granite_20b_fc.jinja", "string"), ("tool_chat_template_granite_20b_fc.jinja", "string"),
("tool_chat_template_hermes.jinja", "string"), ("tool_chat_template_hermes.jinja", "string"),

View File

@ -64,8 +64,6 @@ async def test_basic_vision(model_name: str, base64_encoded_image: dict[str,
"576", "576",
# NOTE: max-num-batched-tokens>=mm_item_size # NOTE: max-num-batched-tokens>=mm_item_size
"--disable_chunked_mm_input", "--disable_chunked_mm_input",
"--chat-template",
"examples/template_llava.jinja"
] ]
# Server will pre-compile on first startup (takes a long time). # Server will pre-compile on first startup (takes a long time).