mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 10:55:45 +08:00
[Frontend] Add missing chat templates for various MLLMs (#17758)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
043e4c4955
commit
8a15c2603a
@ -216,7 +216,7 @@ A chat template is **required** to use Chat Completions API.
|
||||
|
||||
Although most models come with a chat template, for others you have to define one yourself.
|
||||
The chat template can be inferred based on the documentation on the model's HuggingFace repo.
|
||||
For example, LLaVA-1.5 (`llava-hf/llava-1.5-7b-hf`) requires a chat template that can be found here: <gh-file:examples/template_llava.jinja>
|
||||
For example, DeepSeek-VL2 requires a chat template that can be found here: <gh-file:examples/template_deepseek_vl2.jinja>
|
||||
:::
|
||||
|
||||
### Image Inputs
|
||||
|
||||
@ -5,7 +5,7 @@ and run online serving with OpenAI client.
|
||||
Launch the vLLM server with the following command:
|
||||
|
||||
(single image inference with Llava)
|
||||
vllm serve llava-hf/llava-1.5-7b-hf --chat-template template_llava.jinja
|
||||
vllm serve llava-hf/llava-1.5-7b-hf
|
||||
|
||||
(multi-image inference with Phi-3.5-vision-instruct)
|
||||
vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
|
||||
|
||||
3
examples/template_chameleon.jinja
Normal file
3
examples/template_chameleon.jinja
Normal file
@ -0,0 +1,3 @@
|
||||
{%- for message in messages -%}
|
||||
{{- message['content'] -}}
|
||||
{%- endfor -%}
|
||||
@ -1,7 +1,3 @@
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{{- message['content'] -}}
|
||||
{%- elif message['role'] == 'assistant' -%}
|
||||
{{- message['content'] -}}
|
||||
{%- endif -%}
|
||||
{{- message['content'] -}}
|
||||
{%- endfor -%}
|
||||
|
||||
3
examples/template_fuyu.jinja
Normal file
3
examples/template_fuyu.jinja
Normal file
@ -0,0 +1,3 @@
|
||||
{%- for message in messages -%}
|
||||
{{- message['content'] + '\n' -}}
|
||||
{%- endfor -%}
|
||||
@ -1,23 +0,0 @@
|
||||
{%- if messages[0]['role'] == 'system' -%}
|
||||
{%- set system_message = messages[0]['content'] -%}
|
||||
{%- set messages = messages[1:] -%}
|
||||
{%- else -%}
|
||||
{% set system_message = '' -%}
|
||||
{%- endif -%}
|
||||
|
||||
{{ bos_token + system_message }}
|
||||
{%- for message in messages -%}
|
||||
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
||||
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{{ 'USER: ' + message['content'] + '\n' }}
|
||||
{%- elif message['role'] == 'assistant' -%}
|
||||
{{ 'ASSISTANT: ' + message['content'] + eos_token + '\n' }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
|
||||
{%- if add_generation_prompt -%}
|
||||
{{ 'ASSISTANT:' }}
|
||||
{% endif %}
|
||||
3
examples/template_paligemma.jinja
Normal file
3
examples/template_paligemma.jinja
Normal file
@ -0,0 +1,3 @@
|
||||
{%- for message in messages -%}
|
||||
{{- message['content'] -}}
|
||||
{%- endfor -%}
|
||||
3
examples/template_qwen_vl.jinja
Normal file
3
examples/template_qwen_vl.jinja
Normal file
@ -0,0 +1,3 @@
|
||||
{%- for message in messages -%}
|
||||
{{- message['content'] -}}
|
||||
{%- endfor -%}
|
||||
10
examples/template_qwen_vl_chat.jinja
Normal file
10
examples/template_qwen_vl_chat.jinja
Normal file
@ -0,0 +1,10 @@
|
||||
{%- for message in messages -%}
|
||||
{{- '<|im_start|>' + message['role'] + '\n' + message['content'] -}}
|
||||
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
|
||||
{{- '<|im_end|>' + '\n' -}}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
|
||||
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
|
||||
{{- '<|im_start|>assistant\n' -}}
|
||||
{%- endif -%}
|
||||
@ -900,6 +900,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
|
||||
[("template_alpaca.jinja", "string"),
|
||||
("template_baichuan.jinja", "string"),
|
||||
("template_blip2.jinja", "string"),
|
||||
("template_chameleon.jinja", "string"),
|
||||
("template_chatglm.jinja", "string"),
|
||||
("template_chatglm2.jinja", "string"),
|
||||
("template_chatml.jinja", "string"),
|
||||
@ -908,9 +909,12 @@ def test_resolve_content_format_hf_defined(model, expected_format):
|
||||
("template_falcon_180b.jinja", "string"),
|
||||
("template_falcon.jinja", "string"),
|
||||
("template_florence2.jinja", "string"),
|
||||
("template_fuyu.jinja", "string"),
|
||||
("template_inkbot.jinja", "string"),
|
||||
("template_llava.jinja", "string"),
|
||||
("template_paligemma.jinja", "string"),
|
||||
("template_teleflm.jinja", "string"),
|
||||
("template_qwen_vl.jinja", "string"),
|
||||
("template_qwen_vl_chat.jinja", "string"),
|
||||
("template_vlm2vec.jinja", "openai"),
|
||||
("tool_chat_template_granite_20b_fc.jinja", "string"),
|
||||
("tool_chat_template_hermes.jinja", "string"),
|
||||
|
||||
@ -64,8 +64,6 @@ async def test_basic_vision(model_name: str, base64_encoded_image: dict[str,
|
||||
"576",
|
||||
# NOTE: max-num-batched-tokens>=mm_item_size
|
||||
"--disable_chunked_mm_input",
|
||||
"--chat-template",
|
||||
"examples/template_llava.jinja"
|
||||
]
|
||||
|
||||
# Server will pre-compile on first startup (takes a long time).
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user