diff --git a/tests/entrypoints/openai/test_truncation.py b/tests/entrypoints/openai/test_truncation.py index 18ddc493c9283..121c0413e1af7 100644 --- a/tests/entrypoints/openai/test_truncation.py +++ b/tests/entrypoints/openai/test_truncation.py @@ -64,6 +64,28 @@ async def test_smaller_truncation_size(client: openai.AsyncOpenAI): assert response["usage"]["prompt_tokens"] == truncation_size +@pytest.mark.asyncio +async def test_zero_truncation_size(client: openai.AsyncOpenAI): + truncation_size = 0 + kwargs: dict[str, Any] = { + "model": MODEL_NAME, + "input": input, + "truncate_prompt_tokens": truncation_size + } + + with pytest.raises(openai.BadRequestError) as err: + await client.post(path="embeddings", cast_to=object, body={**kwargs}) + + assert err.value.status_code == 400 + error_details = err.value.response.json()["error"] + + assert error_details["type"] == "BadRequestError" + assert "This model's maximum context length is" in error_details["message"] + assert "tokens in the input for embedding generation" in error_details[ + "message"] + assert "Please reduce the length of the input" in error_details["message"] + + @pytest.mark.asyncio async def test_bigger_truncation_size(client: openai.AsyncOpenAI): truncation_size = max_model_len + 1