From 28f350e147c4b5c050c0080ef0d924c15ab87635 Mon Sep 17 00:00:00 2001 From: Jakub Smid <90085992+biba10@users.noreply.github.com> Date: Wed, 3 Sep 2025 12:47:55 +0200 Subject: [PATCH] Support add_generation_prompt in embeddings endpoint with chat request (#23931) Signed-off-by: biba10 --- vllm/entrypoints/openai/protocol.py | 8 ++++++++ vllm/entrypoints/openai/serving_embedding.py | 4 +--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 488102232562..413e1dd8d633 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1342,6 +1342,14 @@ class EmbeddingChatRequest(OpenAIBaseModel): truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None # --8<-- [start:chat-embedding-extra-params] + add_generation_prompt: bool = Field( + default=False, + description= + ("If true, the generation prompt will be added to the chat template. " + "This is a parameter used by chat template in tokenizer config of the " + "model."), + ) + add_special_tokens: bool = Field( default=False, description=( diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index 0a0d98db2d0d..c6d3509afda7 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -93,9 +93,7 @@ class EmbeddingMixin(OpenAIServing): or ctx.chat_template, chat_template_content_format=ctx. chat_template_content_format, - # In embedding requests, we are not generating tokens, - # so there is no need to append extra tokens to the input - add_generation_prompt=False, + add_generation_prompt=ctx.request.add_generation_prompt, continue_final_message=False, add_special_tokens=ctx.request.add_special_tokens, )