Support add_generation_prompt in embeddings endpoint with chat request (#23931)

Signed-off-by: biba10 <jaksmid@seznam.cz>
This commit is contained in:
Jakub Smid 2025-09-03 12:47:55 +02:00 committed by GitHub
parent 51383bd472
commit 28f350e147
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 3 deletions

View File

@ -1342,6 +1342,14 @@ class EmbeddingChatRequest(OpenAIBaseModel):
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
# --8<-- [start:chat-embedding-extra-params]
add_generation_prompt: bool = Field(
default=False,
description=
("If true, the generation prompt will be added to the chat template. "
"This is a parameter used by chat template in tokenizer config of the "
"model."),
)
add_special_tokens: bool = Field(
default=False,
description=(

View File

@ -93,9 +93,7 @@ class EmbeddingMixin(OpenAIServing):
or ctx.chat_template,
chat_template_content_format=ctx.
chat_template_content_format,
# In embedding requests, we are not generating tokens,
# so there is no need to append extra tokens to the input
add_generation_prompt=False,
add_generation_prompt=ctx.request.add_generation_prompt,
continue_final_message=False,
add_special_tokens=ctx.request.add_special_tokens,
)