Add chat template for Llama 4 models (#16428)

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
2025-12-13 12:55:02 +08:00 · 2025-04-24 17:19:36 -03:00 · 2025-04-24 17:19:36 -03:00 · 05e1fbfc52
commit 05e1fbfc52
parent fe92176321
4 changed files with 139 additions and 1 deletions
--- a/docs/source/features/tool_calling.md
+++ b/docs/source/features/tool_calling.md
@ -152,10 +152,11 @@ Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_cha
 Supported models:
-All Llama 3.1 and 3.2 models should be supported.
+All Llama 3.1, 3.2 and 4 models should be supported.
 * `meta-llama/Llama-3.1-*`
 * `meta-llama/Llama-3.2-*`
 * `meta-llama/Llama-4-*`
 The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) introduced by the Llama-3.2 models, see the `pythonic` tool parser below.
@ -176,6 +177,12 @@ images.
 Recommended flags: `--tool-call-parser llama3_json --chat-template {see_above}`
 VLLM also provides a JSON based chat template for Llama 4:
 * `examples/tool_chat_template_llama4_json.jinja` - this is based on the "official" chat template for the Llama 4
 models, but tweaked so that it works better with vLLM.
 For Llama 4 use `--tool-call-parser llama4_json examples/tool_chat_template_llama4_json.jinja`.
 #### IBM Granite
 Supported models:
--- a/examples/tool_chat_template_llama4_json.jinja
+++ b/examples/tool_chat_template_llama4_json.jinja
@ -0,0 +1,116 @@
 {%- macro is_array_of_type_objects(var) -%}
    {%- if var is iterable and var is not string -%}
        {%- set valid = true -%}
        {%- for item in var -%}
            {%- if 'type' not in item -%}
                {%- set valid = false -%}
                {%- break -%}
            {%- endif -%}
        {%- endfor -%}
        {{ valid }}
    {%- else -%}
        {{ false }}
    {%- endif -%}
 {%- endmacro %}
 {%- macro render_message(message) %}
    {%- if message['content'] is string %}
        {{- message['content']|trim }}
    {%- elif is_array_of_type_objects(data) == 'True' %}
        {%- for content in message['content'] %}
            {%- if content['type'] == 'image' %}
                {{- '<|image|>' }}
            {%- elif content['type'] == 'text' %}
                {{- content['text']|trim }}
            {%- endif %}
        {%- endfor %}
    {%- else %}
        {{- message['content']|tojson }}
    {%- endif %}
 {%- endmacro %}
 {{- bos_token }}
 {%- if custom_tools is defined %}
    {%- set tools = custom_tools %}
 {%- endif %}
 {%- if not tools_in_user_message is defined %}
    {%- set tools_in_user_message = true %}
 {%- endif %}
 {%- if not tools is defined %}
    {%- set tools = none %}
 {%- endif %}
 {#- This block extracts the system message, so we can slot it into the right place. #}
 {%- if messages[0]['role'] == 'system' %}
    {%- set system_message = messages[0] %}
    {%- set messages = messages[1:] %}
 {%- else %}
    {%- set system_message = ({ "content": "You are a helpful assistant with tool calling "
        "capabilities. Only reply with a tool call if the function exists in the "
        "library provided by the user. If it doesn't exist, just reply directly in "
        "natural language. When you receive a tool call response, use the output to "
        "format an answer to the original user question."}) %}
 {%- endif %}
 {%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one '
    'or more function/tool calls to fulfill the task. \n'
    'If none are needed, then proceed to the response.\n\n'
    'Tool Call Syntax: You can call tools using the following syntax:\n'
    '{"name": function name, "parameters": dictionary of argument name and its value}.\n'
    'Separate multiple function calls by "; ". Do not use variables.\n'
    'Do not include anything else when calling the tools with the syntax above.\n\n'
    'Here is a list of functions in JSON format that you can invoke.\n' %}
 {{- "<|header_start|>system<|header_end|>\n\n" }}
 {%- if tools is not none and not tools_in_user_message %}
    {{- tool_lib_preamble }}
    {%- for t in tools %}
        {{- t | tojson(indent=4) }}
        {{- "\n\n" }}
    {%- endfor %}
 {%- endif %}
 {{- render_message(system_message) }}
 {{ "<|eot|>\n" }}
 {#- Custom tools are passed in a user message with some extra guidance #}
 {%- if tools_in_user_message and not tools is none %}
    {#- Extract the first user message so we can plug it in here #}
    {%- if messages | length != 0 %}
        {%- set first_user_message = messages[0] %}
        {%- set messages = messages[1:] %}
    {%- else %}
        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
    {%- endif %}
    {{- '<|header_start|>user<|header_end|>\n\n' }}
    {{- tool_lib_preamble }}
    {%- for t in tools %}
        {{- t | tojson(indent=4) }}
        {{- "\n\n" }}
    {%- endfor %}
    {{- render_message(first_user_message) + "\n<|eot|>"}}
 {%- endif %}
 {%- for message in messages %}
    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
        {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
        {{- render_message(message) }}
        {{- "\n<|eot|>" }}
    {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
        {{- '\n<|header_start|>assistant<|header_end|>\n\n' -}}
        {{- render_message(message) }}
        {%- for tool_call in message.tool_calls %}
           {{- '{"name": "' + tool_call.function.name + '", ' }}
           {{- '"parameters": ' }}
           {{- tool_call.function.arguments | tojson }}
           {{- "}" }}
        {%- endfor %}
       {{- "\n<|eot|>" }}
    {%- elif message.role == "tool" or message.role == "ipython" %}
        {{- "\n<|header_start|>ipython<|header_end|>\n\n" }}
        {{- render_message(message) }}
        {{- "\n<|eom|>" }}
    {%- endif %}
 {%- endfor %}
 {%- if add_generation_prompt %}
    {{- '\n<|header_start|>assistant<|header_end|>\n\n' }}
 {%- endif %}
--- a/tests/tool_use/utils.py
+++ b/tests/tool_use/utils.py
@ -98,6 +98,20 @@ CONFIGS: dict[str, ServerConfig] = {
        "extended":
        True
    },
    "llama4_json": {
        "model":
        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "arguments": [
            "--enforce-eager", "--no-enable-prefix-caching", "-tp", "4",
            "--distributed-executor-backend", "mp", "--tool-call-parser",
            "llama4_json", "--chat-template",
            str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja")
        ],
        "supports_parallel":
        True,
        "extended":
        True
    },
    "mistral": {
        "model":
        "mistralai/Mistral-7B-Instruct-v0.3",
--- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
@ -27,6 +27,7 @@ logger = init_logger(__name__)
@ToolParserManager.register_module("llama3_json")
@ToolParserManager.register_module("llama4_json")
 class Llama3JsonToolParser(ToolParser):
    """
    Tool call parser for Llama 3.1 models intended for use with the