From fd8e055ffba508e094cd1793e49bbdc5e53b7266 Mon Sep 17 00:00:00 2001 From: Guillaume Calmettes Date: Fri, 14 Mar 2025 08:58:34 -0400 Subject: [PATCH] [BugFix]: properly catch templating error when preprocess input (#13976) Signed-off-by: Guillaume Calmettes --- vllm/entrypoints/openai/serving_chat.py | 10 ++++++++++ vllm/entrypoints/openai/serving_completion.py | 10 ++++++++++ vllm/entrypoints/openai/serving_embedding.py | 3 +++ vllm/entrypoints/openai/serving_pooling.py | 7 +++++++ vllm/entrypoints/openai/serving_tokenization.py | 7 +++++++ 5 files changed, 37 insertions(+) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 1ba33f78cde77..130dfe1841fda 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -7,6 +7,7 @@ from collections.abc import AsyncGenerator, AsyncIterator from collections.abc import Sequence as GenericSequence from typing import Callable, Final, Optional, Union +import jinja2 from fastapi import Request from vllm.config import ModelConfig @@ -199,6 +200,15 @@ class OpenAIServingChat(OpenAIServing): except ValueError as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) + except TypeError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) + except RuntimeError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) + except jinja2.TemplateError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) request_id = "chatcmpl-" \ f"{self._base_request_id(raw_request, request.request_id)}" diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 1db91a91e37a9..1067f35ce2402 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -6,6 +6,7 @@ from collections.abc import AsyncGenerator, AsyncIterator from collections.abc import Sequence as GenericSequence from typing import Optional, Union, cast +import jinja2 from fastapi import Request from vllm.config import ModelConfig @@ -114,6 +115,15 @@ class OpenAIServingCompletion(OpenAIServing): except ValueError as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) + except TypeError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) + except RuntimeError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) + except jinja2.TemplateError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) # Schedule the request and get the result generator. generators: list[AsyncGenerator[RequestOutput, None]] = [] diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index 5f6e06e6f79f0..1c2c78aaf8926 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -142,6 +142,9 @@ class OpenAIServingEmbedding(OpenAIServing): except ValueError as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) + except TypeError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) # Schedule the request and get the result generator. generators: list[AsyncGenerator[PoolingRequestOutput, None]] = [] diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 0a3ca2aa7c5bf..894128ee974cd 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -6,6 +6,7 @@ import time from collections.abc import AsyncGenerator from typing import Final, Literal, Optional, Union, cast +import jinja2 import numpy as np from fastapi import Request from typing_extensions import assert_never @@ -138,6 +139,12 @@ class OpenAIServingPooling(OpenAIServing): except ValueError as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) + except TypeError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) + except jinja2.TemplateError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) # Schedule the request and get the result generator. generators: list[AsyncGenerator[PoolingRequestOutput, None]] = [] diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py index 4e95ef59e80eb..90c0da2a24d51 100644 --- a/vllm/entrypoints/openai/serving_tokenization.py +++ b/vllm/entrypoints/openai/serving_tokenization.py @@ -2,6 +2,7 @@ from typing import Final, Optional, Union +import jinja2 from fastapi import Request from vllm.config import ModelConfig @@ -91,6 +92,12 @@ class OpenAIServingTokenization(OpenAIServing): except ValueError as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) + except TypeError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) + except jinja2.TemplateError as e: + logger.exception("Error in preprocessing prompt inputs") + return self.create_error_response(str(e)) input_ids: list[int] = [] for i, engine_prompt in enumerate(engine_prompts):