mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 13:27:19 +08:00
Add /generate API
This commit is contained in:
parent
69c9a01538
commit
1bff9a59ec
@ -689,6 +689,58 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
|
||||
return StreamingResponse(content=generator, media_type="text/event-stream")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/generate",
|
||||
dependencies=[Depends(validate_json_request)],
|
||||
responses={
|
||||
HTTPStatus.OK.value: {"content": {"text/event-stream": {}}},
|
||||
HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
|
||||
HTTPStatus.NOT_FOUND.value: {"model": ErrorResponse},
|
||||
HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
|
||||
},
|
||||
)
|
||||
@with_cancellation
|
||||
@load_aware_call
|
||||
async def generate(raw_request: Request):
|
||||
handler = completion(raw_request)
|
||||
if handler is None:
|
||||
return base(raw_request).create_error_response(
|
||||
message="The model does not support Completions API"
|
||||
)
|
||||
|
||||
request_dict = await raw_request.json()
|
||||
completion_request = CompletionRequest(
|
||||
prompt=request_dict["prompt"],
|
||||
max_tokens=request_dict["max_tokens"],
|
||||
temperature=request_dict["temperature"],
|
||||
top_p=request_dict["top_p"],
|
||||
top_k=request_dict["top_k"],
|
||||
logprobs=request_dict["logprobs"],
|
||||
ignore_eos=request_dict["ignore_eos"],
|
||||
return_token_ids=True,
|
||||
)
|
||||
|
||||
try:
|
||||
generator = await handler.create_completion(completion_request, raw_request)
|
||||
except OverflowError as e:
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.BAD_REQUEST.value, detail=str(e)
|
||||
) from e
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value, detail=str(e)
|
||||
) from e
|
||||
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(
|
||||
content=generator.model_dump(), status_code=generator.error.code
|
||||
)
|
||||
elif isinstance(generator, CompletionResponse):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
return StreamingResponse(content=generator, media_type="text/event-stream")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/embeddings",
|
||||
dependencies=[Depends(validate_json_request)],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user