diff --git a/tests/entrypoints/openai/test_tokenization.py b/tests/entrypoints/openai/test_tokenization.py index 663b722426c58..9773f3e45b99c 100644 --- a/tests/entrypoints/openai/test_tokenization.py +++ b/tests/entrypoints/openai/test_tokenization.py @@ -145,6 +145,83 @@ async def test_tokenize_chat( } +@pytest.mark.asyncio +@pytest.mark.parametrize( + "model_name,tokenizer_name", + [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")], + indirect=["tokenizer_name"], +) +async def test_tokenize_chat_with_tools( + server: RemoteOpenAIServer, + model_name: str, + tokenizer_name: str, +): + tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, + tokenizer_mode="fast") + + for add_generation in [False, True]: + for add_special in [False, True]: + conversation = [{ + "role": + "user", + "content": + "What's the weather like in Paris today?", + }] + + tools = [{ + "type": "function", + "function": { + "name": "get_weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string" + } + }, + }, + }, + }] + + for continue_final in [False, True]: + if add_generation and continue_final: + continue + if continue_final: + conversation.append({ + "role": "assistant", + "content": "Sure," + }) + + prompt = tokenizer.apply_chat_template( + add_generation_prompt=add_generation, + continue_final_message=continue_final, + conversation=conversation, + tools=tools, + tokenize=False, + ) + tokens = tokenizer.encode(prompt, + add_special_tokens=add_special) + + response = requests.post( + server.url_for("tokenize"), + json={ + "add_generation_prompt": add_generation, + "continue_final_message": continue_final, + "add_special_tokens": add_special, + "messages": conversation, + "model": model_name, + "tools": tools, + }, + ) + response.raise_for_status() + + assert response.json() == { + "tokens": tokens, + "count": len(tokens), + "max_model_len": 8192, + } + + @pytest.mark.asyncio @pytest.mark.parametrize( "model_name,tokenizer_name", diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 8ac6534875dd1..cd6ee36701179 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1593,6 +1593,10 @@ class TokenizeChatRequest(OpenAIBaseModel): default=None, description=("Additional kwargs to pass to the HF processor."), ) + tools: Optional[list[ChatCompletionToolsParam]] = Field( + default=None, + description=("A list of tools the model may call."), + ) @model_validator(mode="before") @classmethod diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py index 5f4678cb0e690..349e0ac9e68b3 100644 --- a/vllm/entrypoints/openai/serving_tokenization.py +++ b/vllm/entrypoints/openai/serving_tokenization.py @@ -65,6 +65,8 @@ class OpenAIServingTokenization(OpenAIServing): tokenizer = await self.engine_client.get_tokenizer(lora_request) if isinstance(request, TokenizeChatRequest): + tool_dicts = (None if request.tools is None else + [tool.model_dump() for tool in request.tools]) ( _, request_prompts, @@ -73,6 +75,7 @@ class OpenAIServingTokenization(OpenAIServing): request, tokenizer, request.messages, + tool_dicts=tool_dicts, chat_template=request.chat_template or self.chat_template, chat_template_content_format=self. chat_template_content_format,