mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-21 02:36:59 +08:00
[Frontend] Improve StreamingResponse Exception Handling (#11752)
This commit is contained in:
parent
eba17173d3
commit
33fc1e2e86
@ -301,7 +301,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
] * num_choices
|
] * num_choices
|
||||||
else:
|
else:
|
||||||
tool_parsers = [None] * num_choices
|
tool_parsers = [None] * num_choices
|
||||||
except RuntimeError as e:
|
except Exception as e:
|
||||||
logger.exception("Error in tool parser creation.")
|
logger.exception("Error in tool parser creation.")
|
||||||
data = self.create_streaming_error_response(str(e))
|
data = self.create_streaming_error_response(str(e))
|
||||||
yield f"data: {data}\n\n"
|
yield f"data: {data}\n\n"
|
||||||
@ -591,7 +591,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
completion_tokens=num_completion_tokens,
|
completion_tokens=num_completion_tokens,
|
||||||
total_tokens=num_prompt_tokens + num_completion_tokens)
|
total_tokens=num_prompt_tokens + num_completion_tokens)
|
||||||
|
|
||||||
except ValueError as e:
|
except Exception as e:
|
||||||
# TODO: Use a vllm-specific Validation Error
|
# TODO: Use a vllm-specific Validation Error
|
||||||
logger.exception("Error in chat completion stream generator.")
|
logger.exception("Error in chat completion stream generator.")
|
||||||
data = self.create_streaming_error_response(str(e))
|
data = self.create_streaming_error_response(str(e))
|
||||||
|
|||||||
@ -371,7 +371,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
|||||||
# report to FastAPI middleware aggregate usage across all choices
|
# report to FastAPI middleware aggregate usage across all choices
|
||||||
request_metadata.final_usage_info = final_usage_info
|
request_metadata.final_usage_info = final_usage_info
|
||||||
|
|
||||||
except ValueError as e:
|
except Exception as e:
|
||||||
# TODO: Use a vllm-specific Validation Error
|
# TODO: Use a vllm-specific Validation Error
|
||||||
data = self.create_streaming_error_response(str(e))
|
data = self.create_streaming_error_response(str(e))
|
||||||
yield f"data: {data}\n\n"
|
yield f"data: {data}\n\n"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user