[Frontend] Add --log-error-stack to print stack trace for error response (#22960)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang 2025-08-26 21:58:59 -07:00 committed by GitHub
parent 644d57d531
commit 3210264421
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 51 additions and 8 deletions

View File

@ -1749,6 +1749,7 @@ async def init_app_state(
enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage, enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs, enable_log_outputs=args.enable_log_outputs,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None ) if "generate" in supported_tasks else None
state.openai_serving_chat = OpenAIServingChat( state.openai_serving_chat = OpenAIServingChat(
engine_client, engine_client,
@ -1767,6 +1768,7 @@ async def init_app_state(
enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage, enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs, enable_log_outputs=args.enable_log_outputs,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None ) if "generate" in supported_tasks else None
state.openai_serving_completion = OpenAIServingCompletion( state.openai_serving_completion = OpenAIServingCompletion(
engine_client, engine_client,
@ -1776,6 +1778,7 @@ async def init_app_state(
return_tokens_as_token_ids=args.return_tokens_as_token_ids, return_tokens_as_token_ids=args.return_tokens_as_token_ids,
enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage, enable_force_include_usage=args.enable_force_include_usage,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None ) if "generate" in supported_tasks else None
state.openai_serving_pooling = OpenAIServingPooling( state.openai_serving_pooling = OpenAIServingPooling(
engine_client, engine_client,
@ -1784,6 +1787,7 @@ async def init_app_state(
request_logger=request_logger, request_logger=request_logger,
chat_template=resolved_chat_template, chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format, chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) if "encode" in supported_tasks else None ) if "encode" in supported_tasks else None
state.openai_serving_embedding = OpenAIServingEmbedding( state.openai_serving_embedding = OpenAIServingEmbedding(
engine_client, engine_client,
@ -1792,12 +1796,14 @@ async def init_app_state(
request_logger=request_logger, request_logger=request_logger,
chat_template=resolved_chat_template, chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format, chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) if "embed" in supported_tasks else None ) if "embed" in supported_tasks else None
state.openai_serving_classification = ServingClassification( state.openai_serving_classification = ServingClassification(
engine_client, engine_client,
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "classify" in supported_tasks else None ) if "classify" in supported_tasks else None
enable_serving_reranking = ("classify" in supported_tasks and getattr( enable_serving_reranking = ("classify" in supported_tasks and getattr(
@ -1807,6 +1813,7 @@ async def init_app_state(
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if ("embed" in supported_tasks or enable_serving_reranking) else None ) if ("embed" in supported_tasks or enable_serving_reranking) else None
state.openai_serving_tokenization = OpenAIServingTokenization( state.openai_serving_tokenization = OpenAIServingTokenization(
@ -1816,18 +1823,21 @@ async def init_app_state(
request_logger=request_logger, request_logger=request_logger,
chat_template=resolved_chat_template, chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format, chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) )
state.openai_serving_transcription = OpenAIServingTranscription( state.openai_serving_transcription = OpenAIServingTranscription(
engine_client, engine_client,
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "transcription" in supported_tasks else None ) if "transcription" in supported_tasks else None
state.openai_serving_translation = OpenAIServingTranslation( state.openai_serving_translation = OpenAIServingTranslation(
engine_client, engine_client,
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "transcription" in supported_tasks else None ) if "transcription" in supported_tasks else None
state.enable_server_load_tracking = args.enable_server_load_tracking state.enable_server_load_tracking = args.enable_server_load_tracking

View File

@ -180,6 +180,8 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
"""Maximum number of HTTP headers allowed in a request for h11 parser. """Maximum number of HTTP headers allowed in a request for h11 parser.
Helps mitigate header abuse. Default: 256.""" Helps mitigate header abuse. Default: 256."""
log_error_stack: bool = envs.VLLM_SERVER_DEV_MODE
"""If set to True, log the stack trace of error responses"""
@staticmethod @staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

View File

@ -76,13 +76,15 @@ class OpenAIServingChat(OpenAIServing):
enable_prompt_tokens_details: bool = False, enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
enable_log_outputs: bool = False, enable_log_outputs: bool = False,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage) enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack)
self.response_role = response_role self.response_role = response_role
self.chat_template = chat_template self.chat_template = chat_template

View File

@ -129,12 +129,14 @@ class ServingClassification(ClassificationMixin):
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=log_error_stack,
) )
async def create_classify( async def create_classify(

View File

@ -59,6 +59,7 @@ class OpenAIServingCompletion(OpenAIServing):
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
enable_prompt_tokens_details: bool = False, enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
log_error_stack: bool = False,
): ):
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
@ -67,6 +68,7 @@ class OpenAIServingCompletion(OpenAIServing):
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage, enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack,
) )
self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.default_sampling_params = ( self.default_sampling_params = (

View File

@ -593,11 +593,13 @@ class OpenAIServingEmbedding(EmbeddingMixin):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
chat_template: Optional[str], chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption, chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format self.chat_template_content_format: Final = chat_template_content_format

View File

@ -5,6 +5,7 @@ import io
import json import json
import sys import sys
import time import time
import traceback
from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from http import HTTPStatus from http import HTTPStatus
@ -205,6 +206,7 @@ class OpenAIServing:
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
log_error_stack: bool = False,
): ):
super().__init__() super().__init__()
@ -222,6 +224,7 @@ class OpenAIServing:
self._async_tokenizer_pool: dict[AnyTokenizer, self._async_tokenizer_pool: dict[AnyTokenizer,
AsyncMicrobatchTokenizer] = {} AsyncMicrobatchTokenizer] = {}
self.log_error_stack = log_error_stack
def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer: def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer:
""" """
@ -412,6 +415,12 @@ class OpenAIServing:
message: str, message: str,
err_type: str = "BadRequestError", err_type: str = "BadRequestError",
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse: status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse:
if self.log_error_stack:
exc_type, _, _ = sys.exc_info()
if exc_type is not None:
traceback.print_exc()
else:
traceback.print_stack()
return ErrorResponse(error=ErrorInfo( return ErrorResponse(error=ErrorInfo(
message=message, type=err_type, code=status_code.value)) message=message, type=err_type, code=status_code.value))

View File

@ -58,11 +58,13 @@ class OpenAIServingPooling(OpenAIServing):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
chat_template: Optional[str], chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption, chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format self.chat_template_content_format: Final = chat_template_content_format

View File

@ -88,6 +88,7 @@ class OpenAIServingResponses(OpenAIServing):
enable_prompt_tokens_details: bool = False, enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
enable_log_outputs: bool = False, enable_log_outputs: bool = False,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
@ -96,6 +97,7 @@ class OpenAIServingResponses(OpenAIServing):
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage, enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack,
) )
self.chat_template = chat_template self.chat_template = chat_template

View File

@ -47,11 +47,13 @@ class ServingScores(OpenAIServing):
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
async def _embedding_score( async def _embedding_score(
self, self,

View File

@ -39,11 +39,13 @@ class OpenAIServingTokenization(OpenAIServing):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
chat_template: Optional[str], chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption, chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format self.chat_template_content_format: Final = chat_template_content_format

View File

@ -32,13 +32,15 @@ class OpenAIServingTranscription(OpenAISpeechToText):
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
log_error_stack: bool = False,
): ):
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
task_type="transcribe") task_type="transcribe",
log_error_stack=log_error_stack)
async def create_transcription( async def create_transcription(
self, audio_data: bytes, request: TranscriptionRequest, self, audio_data: bytes, request: TranscriptionRequest,
@ -88,13 +90,15 @@ class OpenAIServingTranslation(OpenAISpeechToText):
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
log_error_stack: bool = False,
): ):
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
task_type="translate") task_type="translate",
log_error_stack=log_error_stack)
async def create_translation( async def create_translation(
self, audio_data: bytes, request: TranslationRequest, self, audio_data: bytes, request: TranslationRequest,

View File

@ -53,12 +53,14 @@ class OpenAISpeechToText(OpenAIServing):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
task_type: Literal["transcribe", "translate"] = "transcribe", task_type: Literal["transcribe", "translate"] = "transcribe",
log_error_stack: bool = False,
): ):
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids) return_tokens_as_token_ids=return_tokens_as_token_ids,
log_error_stack=log_error_stack)
self.default_sampling_params = ( self.default_sampling_params = (
self.model_config.get_diff_sampling_param()) self.model_config.get_diff_sampling_param())