mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 12:55:02 +08:00
Add chat template for Llama 4 models (#16428)
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
parent
fe92176321
commit
05e1fbfc52
@ -152,10 +152,11 @@ Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_cha
|
|||||||
|
|
||||||
Supported models:
|
Supported models:
|
||||||
|
|
||||||
All Llama 3.1 and 3.2 models should be supported.
|
All Llama 3.1, 3.2 and 4 models should be supported.
|
||||||
|
|
||||||
* `meta-llama/Llama-3.1-*`
|
* `meta-llama/Llama-3.1-*`
|
||||||
* `meta-llama/Llama-3.2-*`
|
* `meta-llama/Llama-3.2-*`
|
||||||
|
* `meta-llama/Llama-4-*`
|
||||||
|
|
||||||
The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) introduced by the Llama-3.2 models, see the `pythonic` tool parser below.
|
The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) introduced by the Llama-3.2 models, see the `pythonic` tool parser below.
|
||||||
|
|
||||||
@ -176,6 +177,12 @@ images.
|
|||||||
|
|
||||||
Recommended flags: `--tool-call-parser llama3_json --chat-template {see_above}`
|
Recommended flags: `--tool-call-parser llama3_json --chat-template {see_above}`
|
||||||
|
|
||||||
|
VLLM also provides a JSON based chat template for Llama 4:
|
||||||
|
* `examples/tool_chat_template_llama4_json.jinja` - this is based on the "official" chat template for the Llama 4
|
||||||
|
models, but tweaked so that it works better with vLLM.
|
||||||
|
|
||||||
|
For Llama 4 use `--tool-call-parser llama4_json examples/tool_chat_template_llama4_json.jinja`.
|
||||||
|
|
||||||
#### IBM Granite
|
#### IBM Granite
|
||||||
|
|
||||||
Supported models:
|
Supported models:
|
||||||
|
|||||||
116
examples/tool_chat_template_llama4_json.jinja
Normal file
116
examples/tool_chat_template_llama4_json.jinja
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
{%- macro is_array_of_type_objects(var) -%}
|
||||||
|
{%- if var is iterable and var is not string -%}
|
||||||
|
{%- set valid = true -%}
|
||||||
|
{%- for item in var -%}
|
||||||
|
{%- if 'type' not in item -%}
|
||||||
|
{%- set valid = false -%}
|
||||||
|
{%- break -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ valid }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ false }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{%- macro render_message(message) %}
|
||||||
|
{%- if message['content'] is string %}
|
||||||
|
{{- message['content']|trim }}
|
||||||
|
{%- elif is_array_of_type_objects(data) == 'True' %}
|
||||||
|
{%- for content in message['content'] %}
|
||||||
|
{%- if content['type'] == 'image' %}
|
||||||
|
{{- '<|image|>' }}
|
||||||
|
{%- elif content['type'] == 'text' %}
|
||||||
|
{{- content['text']|trim }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
{%- else %}
|
||||||
|
{{- message['content']|tojson }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{{- bos_token }}
|
||||||
|
{%- if custom_tools is defined %}
|
||||||
|
{%- set tools = custom_tools %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- if not tools_in_user_message is defined %}
|
||||||
|
{%- set tools_in_user_message = true %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- if not tools is defined %}
|
||||||
|
{%- set tools = none %}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{#- This block extracts the system message, so we can slot it into the right place. #}
|
||||||
|
{%- if messages[0]['role'] == 'system' %}
|
||||||
|
{%- set system_message = messages[0] %}
|
||||||
|
{%- set messages = messages[1:] %}
|
||||||
|
{%- else %}
|
||||||
|
{%- set system_message = ({ "content": "You are a helpful assistant with tool calling "
|
||||||
|
"capabilities. Only reply with a tool call if the function exists in the "
|
||||||
|
"library provided by the user. If it doesn't exist, just reply directly in "
|
||||||
|
"natural language. When you receive a tool call response, use the output to "
|
||||||
|
"format an answer to the original user question."}) %}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one '
|
||||||
|
'or more function/tool calls to fulfill the task. \n'
|
||||||
|
'If none are needed, then proceed to the response.\n\n'
|
||||||
|
'Tool Call Syntax: You can call tools using the following syntax:\n'
|
||||||
|
'{"name": function name, "parameters": dictionary of argument name and its value}.\n'
|
||||||
|
'Separate multiple function calls by "; ". Do not use variables.\n'
|
||||||
|
'Do not include anything else when calling the tools with the syntax above.\n\n'
|
||||||
|
'Here is a list of functions in JSON format that you can invoke.\n' %}
|
||||||
|
|
||||||
|
{{- "<|header_start|>system<|header_end|>\n\n" }}
|
||||||
|
{%- if tools is not none and not tools_in_user_message %}
|
||||||
|
{{- tool_lib_preamble }}
|
||||||
|
{%- for t in tools %}
|
||||||
|
{{- t | tojson(indent=4) }}
|
||||||
|
{{- "\n\n" }}
|
||||||
|
{%- endfor %}
|
||||||
|
{%- endif %}
|
||||||
|
{{- render_message(system_message) }}
|
||||||
|
{{ "<|eot|>\n" }}
|
||||||
|
|
||||||
|
{#- Custom tools are passed in a user message with some extra guidance #}
|
||||||
|
{%- if tools_in_user_message and not tools is none %}
|
||||||
|
{#- Extract the first user message so we can plug it in here #}
|
||||||
|
{%- if messages | length != 0 %}
|
||||||
|
{%- set first_user_message = messages[0] %}
|
||||||
|
{%- set messages = messages[1:] %}
|
||||||
|
{%- else %}
|
||||||
|
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
|
||||||
|
{%- endif %}
|
||||||
|
{{- '<|header_start|>user<|header_end|>\n\n' }}
|
||||||
|
{{- tool_lib_preamble }}
|
||||||
|
{%- for t in tools %}
|
||||||
|
{{- t | tojson(indent=4) }}
|
||||||
|
{{- "\n\n" }}
|
||||||
|
{%- endfor %}
|
||||||
|
{{- render_message(first_user_message) + "\n<|eot|>"}}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{%- for message in messages %}
|
||||||
|
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
|
||||||
|
{{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
|
||||||
|
{{- render_message(message) }}
|
||||||
|
{{- "\n<|eot|>" }}
|
||||||
|
{%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
|
||||||
|
{{- '\n<|header_start|>assistant<|header_end|>\n\n' -}}
|
||||||
|
{{- render_message(message) }}
|
||||||
|
{%- for tool_call in message.tool_calls %}
|
||||||
|
{{- '{"name": "' + tool_call.function.name + '", ' }}
|
||||||
|
{{- '"parameters": ' }}
|
||||||
|
{{- tool_call.function.arguments | tojson }}
|
||||||
|
{{- "}" }}
|
||||||
|
{%- endfor %}
|
||||||
|
{{- "\n<|eot|>" }}
|
||||||
|
{%- elif message.role == "tool" or message.role == "ipython" %}
|
||||||
|
{{- "\n<|header_start|>ipython<|header_end|>\n\n" }}
|
||||||
|
{{- render_message(message) }}
|
||||||
|
{{- "\n<|eom|>" }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
{%- if add_generation_prompt %}
|
||||||
|
{{- '\n<|header_start|>assistant<|header_end|>\n\n' }}
|
||||||
|
{%- endif %}
|
||||||
@ -98,6 +98,20 @@ CONFIGS: dict[str, ServerConfig] = {
|
|||||||
"extended":
|
"extended":
|
||||||
True
|
True
|
||||||
},
|
},
|
||||||
|
"llama4_json": {
|
||||||
|
"model":
|
||||||
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||||
|
"arguments": [
|
||||||
|
"--enforce-eager", "--no-enable-prefix-caching", "-tp", "4",
|
||||||
|
"--distributed-executor-backend", "mp", "--tool-call-parser",
|
||||||
|
"llama4_json", "--chat-template",
|
||||||
|
str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja")
|
||||||
|
],
|
||||||
|
"supports_parallel":
|
||||||
|
True,
|
||||||
|
"extended":
|
||||||
|
True
|
||||||
|
},
|
||||||
"mistral": {
|
"mistral": {
|
||||||
"model":
|
"model":
|
||||||
"mistralai/Mistral-7B-Instruct-v0.3",
|
"mistralai/Mistral-7B-Instruct-v0.3",
|
||||||
|
|||||||
@ -27,6 +27,7 @@ logger = init_logger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
@ToolParserManager.register_module("llama3_json")
|
@ToolParserManager.register_module("llama3_json")
|
||||||
|
@ToolParserManager.register_module("llama4_json")
|
||||||
class Llama3JsonToolParser(ToolParser):
|
class Llama3JsonToolParser(ToolParser):
|
||||||
"""
|
"""
|
||||||
Tool call parser for Llama 3.1 models intended for use with the
|
Tool call parser for Llama 3.1 models intended for use with the
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user