[Fix] correct tool_id for kimi-k2 when use tool_choice=required (#21259)

Co-authored-by: wangzhengtao <wangzhengtao@msh.team>
2025-12-17 21:25:58 +08:00 · 2025-08-21 03:59:54 +08:00 · 2025-08-21 03:59:54 +08:00 · 582bbe6bd7
commit 582bbe6bd7
parent 0cdbf5e61c
15 changed files with 283 additions and 166 deletions
--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@ -13,48 +13,7 @@ from ...utils import RemoteOpenAIServer
 # any model with a chat template should work here
 MODEL_NAME = "Qwen/Qwen3-0.6B"
-
+tools = [
@pytest.fixture(scope="module")
 def server():  # noqa: F811
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "half",
        "--enable-auto-tool-choice",
        "--guided-decoding-backend",
        "xgrammar",
        "--tool-call-parser",
        "hermes",
        "--reasoning-parser",
        "qwen3",
    ]
    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server
@pytest_asyncio.fixture
 async def client(server):
    async with server.get_async_client() as async_client:
        yield async_client
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("stream", [True, False])
@pytest.mark.parametrize("tool_choice", [
    "auto", "required", {
        "type": "function",
        "function": {
            "name": "get_current_weather"
        }
    }
 ])
@pytest.mark.parametrize("enable_thinking", [True, False])
 async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
                                 stream: bool, tool_choice: Union[str, dict],
                                 enable_thinking: bool):
    tools = [
    {
        "type": "function",
        "function": {
@ -77,14 +36,12 @@ async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
                    },
                    "unit": {
                        "type": "string",
-                            "description":
+                        "description": "The unit to fetch the temperature in",
                            "The unit to fetch the temperature in",
                        "enum": ["celsius", "fahrenheit"],
                    },
                    "options": {
                        "$ref": "#/$defs/WeatherOptions",
-                            "description":
+                        "description": "Optional parameters for weather query",
                            "Optional parameters for weather query",
                    },
                },
                "required": ["country", "unit"],
@ -149,8 +106,7 @@ async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
                    },
                    "unit": {
                        "type": "string",
-                            "description":
+                        "description": "The unit to fetch the temperature in",
                            "The unit to fetch the temperature in",
                        "enum": ["celsius", "fahrenheit"],
                    },
                },
@ -158,9 +114,9 @@ async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
            },
        },
    },
-    ]
+]
-    messages = [
+messages = [
    {
        "role": "user",
        "content": "Hi! How are you doing today?"
@ -176,7 +132,51 @@ async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
        "Can you tell me what the current weather is in Berlin and the "\
        "forecast for the next 5 days, in fahrenheit?",
    },
 ]
@pytest.fixture(scope="module")
 def server():  # noqa: F811
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "half",
        "--enable-auto-tool-choice",
        "--guided-decoding-backend",
        "xgrammar",
        "--tool-call-parser",
        "hermes",
        "--reasoning-parser",
        "qwen3",
        "--gpu-memory-utilization",
        "0.4"
    ]
    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server
@pytest_asyncio.fixture
 async def client(server):
    async with server.get_async_client() as async_client:
        yield async_client
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("stream", [True, False])
@pytest.mark.parametrize("tool_choice", [
    "auto", "required", {
        "type": "function",
        "function": {
            "name": "get_current_weather"
        }
    }
 ])
@pytest.mark.parametrize("enable_thinking", [True, False])
 async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
                                 stream: bool, tool_choice: Union[str, dict],
                                 enable_thinking: bool):
    if not stream:
        # Non-streaming test
        chat_completion = await client.chat.completions.create(
@ -216,3 +216,71 @@ async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
                output.extend(chunk.choices[0].delta.tool_calls)
        assert len(output) > 0
@pytest.fixture(scope="module")
 def k2_server():  # noqa: F811
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "half",
        "--enable-auto-tool-choice",
        "--guided-decoding-backend",
        "xgrammar",
        "--tool-call-parser",
        "hermes",
        "--reasoning-parser",
        "qwen3",
        "--gpu-memory-utilization",
        "0.4",
    ]
    # hack to test kimi_k2 tool use tool_id format.
    # avoid error in is_deepseek_mla check by setting kv_lora_rank=null
    with RemoteOpenAIServer(MODEL_NAME,
                            args,
                            override_hf_configs={
                                "model_type": 'kimi_k2',
                                'kv_lora_rank': None
                            }) as remote_server:
        yield remote_server
@pytest_asyncio.fixture
 async def k2_client(k2_server):
    async with k2_server.get_async_client() as async_client:
        yield async_client
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("stream", [True, False])
@pytest.mark.parametrize("tool_choice", ["required"])
 async def test_tool_id_kimi_k2(k2_client: openai.AsyncOpenAI, model_name: str,
                               stream: bool, tool_choice: str):
    if not stream:
        # Non-streaming test
        chat_completion = await k2_client.chat.completions.create(
            messages=messages,
            model=model_name,
            tools=tools,
            tool_choice=tool_choice)
        assert chat_completion.choices[0].message.tool_calls is not None
        assert len(chat_completion.choices[0].message.tool_calls) > 0
        assert chat_completion.choices[0].message.tool_calls[
            0].id == 'functions.get_current_weather:0'
    else:
        # Streaming test
        output_stream = await k2_client.chat.completions.create(
            messages=messages,
            model=model_name,
            tools=tools,
            tool_choice=tool_choice,
            stream=True)
        output = []
        async for chunk in output_stream:
            if chunk.choices and chunk.choices[0].delta.tool_calls:
                output.extend(chunk.choices[0].delta.tool_calls)
        for o in output:
            assert o.id is None or o.id == 'functions.get_current_weather:0'
--- a/tests/utils.py
+++ b/tests/utils.py
@ -5,6 +5,7 @@ import asyncio
 import copy
 import functools
 import importlib
 import json
 import os
 import signal
 import subprocess
@ -101,7 +102,8 @@ class RemoteOpenAIServer:
                 env_dict: Optional[dict[str, str]] = None,
                 seed: Optional[int] = 0,
                 auto_port: bool = True,
-                 max_wait_seconds: Optional[float] = None) -> None:
+                 max_wait_seconds: Optional[float] = None,
                 override_hf_configs: Optional[dict[str, Any]] = None) -> None:
        if auto_port:
            if "-p" in vllm_serve_args or "--port" in vllm_serve_args:
                raise ValueError("You have manually specified the port "
@ -120,6 +122,12 @@ class RemoteOpenAIServer:
            vllm_serve_args = vllm_serve_args + ["--seed", str(seed)]
        if override_hf_configs is not None:
            vllm_serve_args = vllm_serve_args + [
                "--hf-overrides",
                json.dumps(override_hf_configs)
            ]
        parser = FlexibleArgumentParser(
            description="vLLM's remote OpenAI server.")
        subparsers = parser.add_subparsers(required=False, dest="subparser")
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@ -1345,5 +1345,18 @@ def apply_mistral_chat_template(
            "template")
        raise ValueError(str(e)) from e
-def random_tool_call_id() -> str:
+def get_history_tool_calls_cnt(conversation: list[ConversationMessage]):
    idx = 0
    for msg in conversation:
        if msg['role'] == 'assistant':
            tool_calls = msg.get('tool_calls')
            idx += len(list(tool_calls)) if tool_calls is not None else 0 # noqa
    return idx
 def make_tool_call_id(id_type:str='random', func_name=None, idx=None):
    if id_type=='kimi_k2':
        return f'functions.{func_name}:{idx}'
    else:
        # by default return random
        return f"chatcmpl-tool-{random_uuid()}"
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@ -38,7 +38,7 @@ from typing_extensions import TypeAlias
 from vllm import envs
 from vllm.entrypoints.chat_utils import (ChatCompletionMessageParam,
-                                         random_tool_call_id)
+                                         make_tool_call_id)
 from vllm.entrypoints.score_utils import (ScoreContentPartParam,
                                          ScoreMultiModalParam)
 from vllm.logger import init_logger
@ -1634,7 +1634,7 @@ class FunctionCall(OpenAIBaseModel):
 class ToolCall(OpenAIBaseModel):
-    id: str = Field(default_factory=random_tool_call_id)
+    id: str = Field(default_factory=make_tool_call_id)
    type: Literal["function"] = "function"
    function: FunctionCall
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -19,7 +19,8 @@ from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
                                         ConversationMessage,
-                                         random_tool_call_id)
+                                         get_history_tool_calls_cnt,
                                         make_tool_call_id)
 from vllm.entrypoints.harmony_utils import (
    get_developer_message, get_stop_tokens_for_assistant_actions,
    get_streamable_parser_for_assistant, get_system_message, parse_chat_input,
@ -133,6 +134,10 @@ class OpenAIServingChat(OpenAIServing):
            source = "model" if source == "auto" else source
            logger.info("Using default chat sampling params from %s: %s",
                        source, self.default_sampling_params)
        if self.model_config.hf_config.model_type == 'kimi_k2':
            self.tool_call_id_type = 'kimi_k2'
        else:
            self.tool_call_id_type = 'random'
        self.use_harmony = model_config.hf_config.model_type == "gpt_oss"
        if self.use_harmony:
@ -379,6 +384,7 @@ class OpenAIServingChat(OpenAIServing):
        current_text: Optional[str],
        delta_text: str,
        function_name_returned: bool,
        tool_call_idx: Optional[int] = None
    ) -> tuple[Optional[DeltaMessage], bool]:
        if current_text is None or current_text == "":
            # if the current text is empty, we cannot parse it
@ -424,8 +430,12 @@ class OpenAIServingChat(OpenAIServing):
                        current_tool_call = obj[-2]
                    function_name_returned = True
                    tool_call_id = make_tool_call_id(
                        id_type=self.tool_call_id_type,
                        func_name=current_tool_call["name"],
                        idx=tool_call_idx)
                    delta_message = DeltaMessage(tool_calls=[
-                        DeltaToolCall(id=random_tool_call_id(),
+                        DeltaToolCall(id=tool_call_id,
                                      function=DeltaFunctionCall(
                                          name=current_tool_call["name"],
                                          arguments=arguments),
@ -491,6 +501,10 @@ class OpenAIServingChat(OpenAIServing):
        all_previous_token_ids: Optional[list[list[int]]]
        function_name_returned = [False] * num_choices
        if self.tool_call_id_type == 'kimi_k2':
            history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
        else:
            history_tool_call_cnt = 0
        # Always track previous_texts for comprehensive output logging
        previous_texts = [""] * num_choices
@ -673,7 +687,6 @@ class OpenAIServingChat(OpenAIServing):
                        previous_text = previous_texts[i]
                        previous_token_ids = all_previous_token_ids[i]
                        current_text = previous_text + delta_text
                        # avoid the None + list error.
                        if previous_token_ids:
                            current_token_ids = previous_token_ids + as_list(
@ -733,7 +746,7 @@ class OpenAIServingChat(OpenAIServing):
                                    index=i)
                            else:
                                delta_tool_call = DeltaToolCall(
-                                    id=random_tool_call_id(),
+                                    id=make_tool_call_id(),
                                    type="function",
                                    function=DeltaFunctionCall(
                                        name=tool_choice_function_name,
@ -764,7 +777,11 @@ class OpenAIServingChat(OpenAIServing):
                                previous_text=previous_text,
                                current_text=content,
                                delta_text=delta_text,
-                                function_name_returned=fn_name_returned))
+                                function_name_returned=fn_name_returned,
                                tool_call_idx=history_tool_call_cnt))
                        if (delta_message and delta_message.tool_calls and
                                delta_message.tool_calls[0].id is not None):
                            history_tool_call_cnt += 1
                        # update the previous values for the next iteration
                        previous_texts[i] = current_text
@ -1089,6 +1106,10 @@ class OpenAIServingChat(OpenAIServing):
        assert final_res is not None
        choices: list[ChatCompletionResponseChoice] = []
        if self.tool_call_id_type == 'kimi_k2':
            history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
        else:
            history_tool_call_cnt = 0
        role = self.get_chat_request_role(request)
        for output in final_res.outputs:
@ -1194,17 +1215,26 @@ class OpenAIServingChat(OpenAIServing):
                assert content is not None
                tool_calls = TypeAdapter(
                    list[FunctionDefinition]).validate_json(content)
                tool_call_ids = []
                for tool_call in tool_calls:
                    tool_call_ids.append(
                        make_tool_call_id(id_type=self.tool_call_id_type,
                                          func_name=tool_call.name,
                                          idx=history_tool_call_cnt))
                    history_tool_call_cnt += 1
                message = ChatMessage(
                    role=role,
                    content="",
                    reasoning_content=reasoning_content,
                    tool_calls=[
-                        tool_call_class(function=FunctionCall(
+                        tool_call_class(id=tool_call_ids[i],
                                        function=FunctionCall(
                                            name=tool_call.name,
-                            arguments=json.dumps(tool_call.parameters,
+                                            arguments=json.dumps(
                                                tool_call.parameters,
                                                ensure_ascii=False)))
-                        for tool_call in tool_calls
+                        for i, tool_call in enumerate(tool_calls)
-                    ])
+                    ],
                    reasoning_content=reasoning_content)
            # if the request doesn't use tool choice
            # OR specifies to not use a tool
@ -1248,7 +1278,6 @@ class OpenAIServingChat(OpenAIServing):
                    if (tool_call_info.content
                            and len(tool_call_info.content) > 0):
                        ret_content = tool_call_info.content
                    message = ChatMessage(role=role,
                                          reasoning_content=reasoning_content,
                                          content=ret_content)
@ -1327,12 +1356,11 @@ class OpenAIServingChat(OpenAIServing):
                elif choice.message.tool_calls:
                    # For tool calls, log the function name and arguments
                    tool_call_descriptions = []
-                    for tool_call in choice.message.tool_calls:
+                    for tc in choice.message.tool_calls:
-                        if hasattr(tool_call.function, "name") and hasattr(
+                        if hasattr(tc.function, "name") and hasattr(
-                                tool_call.function, "arguments"):
+                                tc.function, "arguments"):
                            tool_call_descriptions.append(
-                                f"{tool_call.function.name}({tool_call.function.arguments})"
+                                f"{tc.function.name}({tc.function.arguments})")
                            )
                    tool_calls_str = ", ".join(tool_call_descriptions)
                    output_text = f"[tool_calls: {tool_calls_str}]"
--- a/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py
@ -6,7 +6,7 @@ from typing import Union
 import regex as re
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -267,7 +267,7 @@ class DeepSeekV3ToolParser(ToolParser):
                        DeltaToolCall(
                            index=self.current_tool_id,
                            type="function",
-                            id=random_tool_call_id(),
+                            id=make_tool_call_id(),
                            function=DeltaFunctionCall(
                                name=function_name).model_dump(
                                    exclude_none=True),
--- a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py
@ -10,7 +10,7 @@ import partial_json_parser
 import regex as re
 from partial_json_parser.core.options import Allow
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -203,7 +203,7 @@ class Granite20bFCToolParser(ToolParser):
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
-                                      id=random_tool_call_id(),
+                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
--- a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
@ -8,7 +8,7 @@ from typing import Union
 import partial_json_parser
 from partial_json_parser.core.options import Allow
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -185,7 +185,7 @@ class GraniteToolParser(ToolParser):
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
-                                      id=random_tool_call_id(),
+                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
--- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
@ -9,7 +9,7 @@ import partial_json_parser
 import regex as re
 from partial_json_parser.core.options import Allow
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -307,7 +307,7 @@ class Hermes2ProToolParser(ToolParser):
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
-                                      id=random_tool_call_id(),
+                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
--- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
@ -8,7 +8,7 @@ from typing import Union
 import partial_json_parser
 from partial_json_parser.core.options import Allow
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -107,7 +107,7 @@ class Internlm2ToolParser(ToolParser):
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
-                                      id=random_tool_call_id(),
+                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
--- a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
@ -9,7 +9,7 @@ import partial_json_parser
 import regex as re
 from partial_json_parser.core.options import Allow
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -222,7 +222,7 @@ class JambaToolParser(ToolParser):
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
-                                      id=random_tool_call_id(),
+                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
--- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
@ -10,7 +10,7 @@ import regex as re
 from partial_json_parser.core.options import Allow
 from transformers import PreTrainedTokenizerBase
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -213,7 +213,7 @@ class Llama3JsonToolParser(ToolParser):
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
-                                      id=random_tool_call_id(),
+                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
--- a/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
@ -7,7 +7,7 @@ from typing import Any, Optional, Union
 import regex as re
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -394,7 +394,7 @@ class MinimaxToolParser(ToolParser):
            sent_tools.append({
                "sent_name": False,
                "sent_arguments": "",
-                "id": random_tool_call_id(),
+                "id": make_tool_call_id(),
            })
        while len(tool_ids) < tool_count:
--- a/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py
@ -8,7 +8,7 @@ from typing import Any, Optional
 import regex as re
 from transformers import PreTrainedTokenizerBase
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaMessage,
                                              ExtractedToolCallInformation,
@ -74,7 +74,7 @@ class Phi4MiniJsonToolParser(ToolParser):
            tool_calls: list[ToolCall] = [
                ToolCall(
-                    id=random_tool_call_id(),
+                    id=make_tool_call_id(),
                    type="function",
                    function=FunctionCall(
                        name=raw_function_call["name"],
--- a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
@ -7,7 +7,7 @@ from typing import Any, Optional, Union
 import regex as re
-from vllm.entrypoints.chat_utils import random_tool_call_id
+from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaFunctionCall, DeltaMessage,
                                              DeltaToolCall,
@ -226,7 +226,7 @@ class xLAMToolParser(ToolParser):
                        function_name = name_match.group(1)
                        # The test expects us to send just the name first
-                        tool_id = random_tool_call_id()
+                        tool_id = make_tool_call_id()
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=0,