From 28f350e147c4b5c050c0080ef0d924c15ab87635 Mon Sep 17 00:00:00 2001
From: Jakub Smid <90085992+biba10@users.noreply.github.com>
Date: Wed, 3 Sep 2025 12:47:55 +0200
Subject: [PATCH] Support add_generation_prompt in embeddings endpoint with
 chat request (#23931)

Signed-off-by: biba10 <jaksmid@seznam.cz>
---
 vllm/entrypoints/openai/protocol.py          | 8 ++++++++
 vllm/entrypoints/openai/serving_embedding.py | 4 +---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 488102232562..413e1dd8d633 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -1342,6 +1342,14 @@ class EmbeddingChatRequest(OpenAIBaseModel):
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
 
     # --8<-- [start:chat-embedding-extra-params]
+    add_generation_prompt: bool = Field(
+        default=False,
+        description=
+        ("If true, the generation prompt will be added to the chat template. "
+         "This is a parameter used by chat template in tokenizer config of the "
+         "model."),
+    )
+
     add_special_tokens: bool = Field(
         default=False,
         description=(
diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py
index 0a0d98db2d0d..c6d3509afda7 100644
--- a/vllm/entrypoints/openai/serving_embedding.py
+++ b/vllm/entrypoints/openai/serving_embedding.py
@@ -93,9 +93,7 @@ class EmbeddingMixin(OpenAIServing):
                     or ctx.chat_template,
                     chat_template_content_format=ctx.
                     chat_template_content_format,
-                    # In embedding requests, we are not generating tokens,
-                    # so there is no need to append extra tokens to the input
-                    add_generation_prompt=False,
+                    add_generation_prompt=ctx.request.add_generation_prompt,
                     continue_final_message=False,
                     add_special_tokens=ctx.request.add_special_tokens,
                 )