From bec060fd99e371b1adc53f65636061f702fa8e61 Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Wed, 17 Sep 2025 23:25:07 -0500 Subject: [PATCH] Mark prompt logprobs as incompatible with prompt embeds at API level (#25077) Signed-off-by: Andrew Sansom --- .../test_completion_with_prompt_embeds.py | 17 +++++++++++++++++ vllm/engine/llm_engine.py | 11 +++++++---- vllm/entrypoints/openai/serving_completion.py | 5 +++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index dbfb1b024f7c..7b58f851a4d2 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -228,3 +228,20 @@ async def test_completions_with_logprobs_and_prompt_embeds( assert max(logprobs_arg, 1) <= len(top_logprobs) <= logprobs_arg + 1 assert len(logprobs.tokens) == 5 + + +@pytest.mark.asyncio +async def test_prompt_logprobs_raises_error( + client_with_prompt_embeds: openai.AsyncOpenAI): + with pytest.raises(BadRequestError, match="not compatible"): + encoded_embeds = create_dummy_embeds() + await client_with_prompt_embeds.completions.create( + model=MODEL_NAME, + prompt="", + max_tokens=5, + temperature=0.0, + extra_body={ + "prompt_embeds": encoded_embeds, + "prompt_logprobs": True + }, + ) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c35bd20371d0..34b5dcb58750 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -671,10 +671,13 @@ class LLMEngine: arrival_time = time.time() if (isinstance(prompt, dict) - and prompt.get("prompt_embeds", None) is not None - and not prompt.get("prompt_token_ids", None)): - seq_len = prompt["prompt_embeds"].shape[0] - prompt["prompt_token_ids"] = [0] * seq_len + and prompt.get("prompt_embeds", None) is not None): + if not prompt.get("prompt_token_ids", None): + seq_len = prompt["prompt_embeds"].shape[0] + prompt["prompt_token_ids"] = [0] * seq_len + if params.prompt_logprobs is not None: + raise ValueError( + "prompt_logprobs is not compatible with prompt embeds.") processed_inputs = self.input_preprocessor.preprocess( prompt, diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 044f08f32b0d..0c61c48da0bc 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -112,6 +112,11 @@ class OpenAIServingCompletion(OpenAIServing): return self.create_error_response( "Echo is unsupported with prompt embeds.") + if (request.prompt_logprobs is not None + and request.prompt_embeds is not None): + return self.create_error_response( + "prompt_logprobs is not compatible with prompt embeds.") + request_id = ( f"cmpl-" f"{self._base_request_id(raw_request, request.request_id)}")