[Frontend] Add missing chat templates for various MLLMs (#17758)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-05-07 15:10:01 +08:00 committed by GitHub
parent 043e4c4955
commit 8a15c2603a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 30 additions and 33 deletions

View File

@ -216,7 +216,7 @@ A chat template is **required** to use Chat Completions API.
Although most models come with a chat template, for others you have to define one yourself.
The chat template can be inferred based on the documentation on the model's HuggingFace repo.
For example, LLaVA-1.5 (`llava-hf/llava-1.5-7b-hf`) requires a chat template that can be found here: <gh-file:examples/template_llava.jinja>
For example, DeepSeek-VL2 requires a chat template that can be found here: <gh-file:examples/template_deepseek_vl2.jinja>
:::
### Image Inputs

View File

@ -5,7 +5,7 @@ and run online serving with OpenAI client.
Launch the vLLM server with the following command:
(single image inference with Llava)
vllm serve llava-hf/llava-1.5-7b-hf --chat-template template_llava.jinja
vllm serve llava-hf/llava-1.5-7b-hf
(multi-image inference with Phi-3.5-vision-instruct)
vllm serve microsoft/Phi-3.5-vision-instruct --task generate \

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] -}}
{%- endfor -%}

View File

@ -1,7 +1,3 @@
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- message['content'] -}}
{%- elif message['role'] == 'assistant' -%}
{{- message['content'] -}}
{%- endif -%}
{%- endfor -%}

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] + '\n' -}}
{%- endfor -%}

View File

@ -1,23 +0,0 @@
{%- if messages[0]['role'] == 'system' -%}
{%- set system_message = messages[0]['content'] -%}
{%- set messages = messages[1:] -%}
{%- else -%}
{% set system_message = '' -%}
{%- endif -%}
{{ bos_token + system_message }}
{%- for message in messages -%}
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
{%- endif -%}
{%- if message['role'] == 'user' -%}
{{ 'USER: ' + message['content'] + '\n' }}
{%- elif message['role'] == 'assistant' -%}
{{ 'ASSISTANT: ' + message['content'] + eos_token + '\n' }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{ 'ASSISTANT:' }}
{% endif %}

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] -}}
{%- endfor -%}

View File

@ -0,0 +1,3 @@
{%- for message in messages -%}
{{- message['content'] -}}
{%- endfor -%}

View File

@ -0,0 +1,10 @@
{%- for message in messages -%}
{{- '<|im_start|>' + message['role'] + '\n' + message['content'] -}}
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
{{- '<|im_end|>' + '\n' -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- '<|im_start|>assistant\n' -}}
{%- endif -%}

View File

@ -900,6 +900,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
[("template_alpaca.jinja", "string"),
("template_baichuan.jinja", "string"),
("template_blip2.jinja", "string"),
("template_chameleon.jinja", "string"),
("template_chatglm.jinja", "string"),
("template_chatglm2.jinja", "string"),
("template_chatml.jinja", "string"),
@ -908,9 +909,12 @@ def test_resolve_content_format_hf_defined(model, expected_format):
("template_falcon_180b.jinja", "string"),
("template_falcon.jinja", "string"),
("template_florence2.jinja", "string"),
("template_fuyu.jinja", "string"),
("template_inkbot.jinja", "string"),
("template_llava.jinja", "string"),
("template_paligemma.jinja", "string"),
("template_teleflm.jinja", "string"),
("template_qwen_vl.jinja", "string"),
("template_qwen_vl_chat.jinja", "string"),
("template_vlm2vec.jinja", "openai"),
("tool_chat_template_granite_20b_fc.jinja", "string"),
("tool_chat_template_hermes.jinja", "string"),

View File

@ -64,8 +64,6 @@ async def test_basic_vision(model_name: str, base64_encoded_image: dict[str,
"576",
# NOTE: max-num-batched-tokens>=mm_item_size
"--disable_chunked_mm_input",
"--chat-template",
"examples/template_llava.jinja"
]
# Server will pre-compile on first startup (takes a long time).