mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-01 00:37:11 +08:00
[Frontend] Add --log-error-stack to print stack trace for error response (#22960)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
parent
644d57d531
commit
3210264421
@ -1749,6 +1749,7 @@ async def init_app_state(
|
|||||||
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
||||||
enable_force_include_usage=args.enable_force_include_usage,
|
enable_force_include_usage=args.enable_force_include_usage,
|
||||||
enable_log_outputs=args.enable_log_outputs,
|
enable_log_outputs=args.enable_log_outputs,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "generate" in supported_tasks else None
|
) if "generate" in supported_tasks else None
|
||||||
state.openai_serving_chat = OpenAIServingChat(
|
state.openai_serving_chat = OpenAIServingChat(
|
||||||
engine_client,
|
engine_client,
|
||||||
@ -1767,6 +1768,7 @@ async def init_app_state(
|
|||||||
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
||||||
enable_force_include_usage=args.enable_force_include_usage,
|
enable_force_include_usage=args.enable_force_include_usage,
|
||||||
enable_log_outputs=args.enable_log_outputs,
|
enable_log_outputs=args.enable_log_outputs,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "generate" in supported_tasks else None
|
) if "generate" in supported_tasks else None
|
||||||
state.openai_serving_completion = OpenAIServingCompletion(
|
state.openai_serving_completion = OpenAIServingCompletion(
|
||||||
engine_client,
|
engine_client,
|
||||||
@ -1776,6 +1778,7 @@ async def init_app_state(
|
|||||||
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
|
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
|
||||||
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
||||||
enable_force_include_usage=args.enable_force_include_usage,
|
enable_force_include_usage=args.enable_force_include_usage,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "generate" in supported_tasks else None
|
) if "generate" in supported_tasks else None
|
||||||
state.openai_serving_pooling = OpenAIServingPooling(
|
state.openai_serving_pooling = OpenAIServingPooling(
|
||||||
engine_client,
|
engine_client,
|
||||||
@ -1784,6 +1787,7 @@ async def init_app_state(
|
|||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
chat_template=resolved_chat_template,
|
chat_template=resolved_chat_template,
|
||||||
chat_template_content_format=args.chat_template_content_format,
|
chat_template_content_format=args.chat_template_content_format,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "encode" in supported_tasks else None
|
) if "encode" in supported_tasks else None
|
||||||
state.openai_serving_embedding = OpenAIServingEmbedding(
|
state.openai_serving_embedding = OpenAIServingEmbedding(
|
||||||
engine_client,
|
engine_client,
|
||||||
@ -1792,12 +1796,14 @@ async def init_app_state(
|
|||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
chat_template=resolved_chat_template,
|
chat_template=resolved_chat_template,
|
||||||
chat_template_content_format=args.chat_template_content_format,
|
chat_template_content_format=args.chat_template_content_format,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "embed" in supported_tasks else None
|
) if "embed" in supported_tasks else None
|
||||||
state.openai_serving_classification = ServingClassification(
|
state.openai_serving_classification = ServingClassification(
|
||||||
engine_client,
|
engine_client,
|
||||||
model_config,
|
model_config,
|
||||||
state.openai_serving_models,
|
state.openai_serving_models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "classify" in supported_tasks else None
|
) if "classify" in supported_tasks else None
|
||||||
|
|
||||||
enable_serving_reranking = ("classify" in supported_tasks and getattr(
|
enable_serving_reranking = ("classify" in supported_tasks and getattr(
|
||||||
@ -1807,6 +1813,7 @@ async def init_app_state(
|
|||||||
model_config,
|
model_config,
|
||||||
state.openai_serving_models,
|
state.openai_serving_models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if ("embed" in supported_tasks or enable_serving_reranking) else None
|
) if ("embed" in supported_tasks or enable_serving_reranking) else None
|
||||||
|
|
||||||
state.openai_serving_tokenization = OpenAIServingTokenization(
|
state.openai_serving_tokenization = OpenAIServingTokenization(
|
||||||
@ -1816,18 +1823,21 @@ async def init_app_state(
|
|||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
chat_template=resolved_chat_template,
|
chat_template=resolved_chat_template,
|
||||||
chat_template_content_format=args.chat_template_content_format,
|
chat_template_content_format=args.chat_template_content_format,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
)
|
)
|
||||||
state.openai_serving_transcription = OpenAIServingTranscription(
|
state.openai_serving_transcription = OpenAIServingTranscription(
|
||||||
engine_client,
|
engine_client,
|
||||||
model_config,
|
model_config,
|
||||||
state.openai_serving_models,
|
state.openai_serving_models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "transcription" in supported_tasks else None
|
) if "transcription" in supported_tasks else None
|
||||||
state.openai_serving_translation = OpenAIServingTranslation(
|
state.openai_serving_translation = OpenAIServingTranslation(
|
||||||
engine_client,
|
engine_client,
|
||||||
model_config,
|
model_config,
|
||||||
state.openai_serving_models,
|
state.openai_serving_models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
|
log_error_stack=args.log_error_stack,
|
||||||
) if "transcription" in supported_tasks else None
|
) if "transcription" in supported_tasks else None
|
||||||
|
|
||||||
state.enable_server_load_tracking = args.enable_server_load_tracking
|
state.enable_server_load_tracking = args.enable_server_load_tracking
|
||||||
|
|||||||
@ -180,6 +180,8 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
|
|||||||
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
|
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
|
||||||
"""Maximum number of HTTP headers allowed in a request for h11 parser.
|
"""Maximum number of HTTP headers allowed in a request for h11 parser.
|
||||||
Helps mitigate header abuse. Default: 256."""
|
Helps mitigate header abuse. Default: 256."""
|
||||||
|
log_error_stack: bool = envs.VLLM_SERVER_DEV_MODE
|
||||||
|
"""If set to True, log the stack trace of error responses"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||||
|
|||||||
@ -76,13 +76,15 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
enable_prompt_tokens_details: bool = False,
|
enable_prompt_tokens_details: bool = False,
|
||||||
enable_force_include_usage: bool = False,
|
enable_force_include_usage: bool = False,
|
||||||
enable_log_outputs: bool = False,
|
enable_log_outputs: bool = False,
|
||||||
|
log_error_stack: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
||||||
enable_force_include_usage=enable_force_include_usage)
|
enable_force_include_usage=enable_force_include_usage,
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
self.response_role = response_role
|
self.response_role = response_role
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
|
|||||||
@ -129,12 +129,14 @@ class ServingClassification(ClassificationMixin):
|
|||||||
models: OpenAIServingModels,
|
models: OpenAIServingModels,
|
||||||
*,
|
*,
|
||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
|
log_error_stack: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
engine_client=engine_client,
|
engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
|
log_error_stack=log_error_stack,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def create_classify(
|
async def create_classify(
|
||||||
|
|||||||
@ -59,6 +59,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
|||||||
return_tokens_as_token_ids: bool = False,
|
return_tokens_as_token_ids: bool = False,
|
||||||
enable_prompt_tokens_details: bool = False,
|
enable_prompt_tokens_details: bool = False,
|
||||||
enable_force_include_usage: bool = False,
|
enable_force_include_usage: bool = False,
|
||||||
|
log_error_stack: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
engine_client=engine_client,
|
engine_client=engine_client,
|
||||||
@ -67,6 +68,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
|||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
||||||
enable_force_include_usage=enable_force_include_usage,
|
enable_force_include_usage=enable_force_include_usage,
|
||||||
|
log_error_stack=log_error_stack,
|
||||||
)
|
)
|
||||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||||
self.default_sampling_params = (
|
self.default_sampling_params = (
|
||||||
|
|||||||
@ -593,11 +593,13 @@ class OpenAIServingEmbedding(EmbeddingMixin):
|
|||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
chat_template: Optional[str],
|
chat_template: Optional[str],
|
||||||
chat_template_content_format: ChatTemplateContentFormatOption,
|
chat_template_content_format: ChatTemplateContentFormatOption,
|
||||||
|
log_error_stack: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger)
|
request_logger=request_logger,
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
self.chat_template_content_format: Final = chat_template_content_format
|
self.chat_template_content_format: Final = chat_template_content_format
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import io
|
|||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
|
from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
@ -205,6 +206,7 @@ class OpenAIServing:
|
|||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
return_tokens_as_token_ids: bool = False,
|
return_tokens_as_token_ids: bool = False,
|
||||||
enable_force_include_usage: bool = False,
|
enable_force_include_usage: bool = False,
|
||||||
|
log_error_stack: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
@ -222,6 +224,7 @@ class OpenAIServing:
|
|||||||
|
|
||||||
self._async_tokenizer_pool: dict[AnyTokenizer,
|
self._async_tokenizer_pool: dict[AnyTokenizer,
|
||||||
AsyncMicrobatchTokenizer] = {}
|
AsyncMicrobatchTokenizer] = {}
|
||||||
|
self.log_error_stack = log_error_stack
|
||||||
|
|
||||||
def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer:
|
def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer:
|
||||||
"""
|
"""
|
||||||
@ -412,6 +415,12 @@ class OpenAIServing:
|
|||||||
message: str,
|
message: str,
|
||||||
err_type: str = "BadRequestError",
|
err_type: str = "BadRequestError",
|
||||||
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse:
|
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse:
|
||||||
|
if self.log_error_stack:
|
||||||
|
exc_type, _, _ = sys.exc_info()
|
||||||
|
if exc_type is not None:
|
||||||
|
traceback.print_exc()
|
||||||
|
else:
|
||||||
|
traceback.print_stack()
|
||||||
return ErrorResponse(error=ErrorInfo(
|
return ErrorResponse(error=ErrorInfo(
|
||||||
message=message, type=err_type, code=status_code.value))
|
message=message, type=err_type, code=status_code.value))
|
||||||
|
|
||||||
|
|||||||
@ -58,11 +58,13 @@ class OpenAIServingPooling(OpenAIServing):
|
|||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
chat_template: Optional[str],
|
chat_template: Optional[str],
|
||||||
chat_template_content_format: ChatTemplateContentFormatOption,
|
chat_template_content_format: ChatTemplateContentFormatOption,
|
||||||
|
log_error_stack: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger)
|
request_logger=request_logger,
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
self.chat_template_content_format: Final = chat_template_content_format
|
self.chat_template_content_format: Final = chat_template_content_format
|
||||||
|
|||||||
@ -88,6 +88,7 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
enable_prompt_tokens_details: bool = False,
|
enable_prompt_tokens_details: bool = False,
|
||||||
enable_force_include_usage: bool = False,
|
enable_force_include_usage: bool = False,
|
||||||
enable_log_outputs: bool = False,
|
enable_log_outputs: bool = False,
|
||||||
|
log_error_stack: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
engine_client=engine_client,
|
engine_client=engine_client,
|
||||||
@ -96,6 +97,7 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
||||||
enable_force_include_usage=enable_force_include_usage,
|
enable_force_include_usage=enable_force_include_usage,
|
||||||
|
log_error_stack=log_error_stack,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
|
|||||||
@ -47,11 +47,13 @@ class ServingScores(OpenAIServing):
|
|||||||
models: OpenAIServingModels,
|
models: OpenAIServingModels,
|
||||||
*,
|
*,
|
||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
|
log_error_stack: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger)
|
request_logger=request_logger,
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
async def _embedding_score(
|
async def _embedding_score(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -39,11 +39,13 @@ class OpenAIServingTokenization(OpenAIServing):
|
|||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
chat_template: Optional[str],
|
chat_template: Optional[str],
|
||||||
chat_template_content_format: ChatTemplateContentFormatOption,
|
chat_template_content_format: ChatTemplateContentFormatOption,
|
||||||
|
log_error_stack: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger)
|
request_logger=request_logger,
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
self.chat_template_content_format: Final = chat_template_content_format
|
self.chat_template_content_format: Final = chat_template_content_format
|
||||||
|
|||||||
@ -32,13 +32,15 @@ class OpenAIServingTranscription(OpenAISpeechToText):
|
|||||||
*,
|
*,
|
||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
return_tokens_as_token_ids: bool = False,
|
return_tokens_as_token_ids: bool = False,
|
||||||
|
log_error_stack: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
||||||
task_type="transcribe")
|
task_type="transcribe",
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
async def create_transcription(
|
async def create_transcription(
|
||||||
self, audio_data: bytes, request: TranscriptionRequest,
|
self, audio_data: bytes, request: TranscriptionRequest,
|
||||||
@ -88,13 +90,15 @@ class OpenAIServingTranslation(OpenAISpeechToText):
|
|||||||
*,
|
*,
|
||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
return_tokens_as_token_ids: bool = False,
|
return_tokens_as_token_ids: bool = False,
|
||||||
|
log_error_stack: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
||||||
task_type="translate")
|
task_type="translate",
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
async def create_translation(
|
async def create_translation(
|
||||||
self, audio_data: bytes, request: TranslationRequest,
|
self, audio_data: bytes, request: TranslationRequest,
|
||||||
|
|||||||
@ -53,12 +53,14 @@ class OpenAISpeechToText(OpenAIServing):
|
|||||||
request_logger: Optional[RequestLogger],
|
request_logger: Optional[RequestLogger],
|
||||||
return_tokens_as_token_ids: bool = False,
|
return_tokens_as_token_ids: bool = False,
|
||||||
task_type: Literal["transcribe", "translate"] = "transcribe",
|
task_type: Literal["transcribe", "translate"] = "transcribe",
|
||||||
|
log_error_stack: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__(engine_client=engine_client,
|
super().__init__(engine_client=engine_client,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
models=models,
|
models=models,
|
||||||
request_logger=request_logger,
|
request_logger=request_logger,
|
||||||
return_tokens_as_token_ids=return_tokens_as_token_ids)
|
return_tokens_as_token_ids=return_tokens_as_token_ids,
|
||||||
|
log_error_stack=log_error_stack)
|
||||||
|
|
||||||
self.default_sampling_params = (
|
self.default_sampling_params = (
|
||||||
self.model_config.get_diff_sampling_param())
|
self.model_config.get_diff_sampling_param())
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user