mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-19 08:26:58 +08:00
Remove /generate API
This commit is contained in:
parent
1bff9a59ec
commit
eda71c2847
@ -689,58 +689,6 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
|
|||||||
return StreamingResponse(content=generator, media_type="text/event-stream")
|
return StreamingResponse(content=generator, media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
|
||||||
"/generate",
|
|
||||||
dependencies=[Depends(validate_json_request)],
|
|
||||||
responses={
|
|
||||||
HTTPStatus.OK.value: {"content": {"text/event-stream": {}}},
|
|
||||||
HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
|
|
||||||
HTTPStatus.NOT_FOUND.value: {"model": ErrorResponse},
|
|
||||||
HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
@with_cancellation
|
|
||||||
@load_aware_call
|
|
||||||
async def generate(raw_request: Request):
|
|
||||||
handler = completion(raw_request)
|
|
||||||
if handler is None:
|
|
||||||
return base(raw_request).create_error_response(
|
|
||||||
message="The model does not support Completions API"
|
|
||||||
)
|
|
||||||
|
|
||||||
request_dict = await raw_request.json()
|
|
||||||
completion_request = CompletionRequest(
|
|
||||||
prompt=request_dict["prompt"],
|
|
||||||
max_tokens=request_dict["max_tokens"],
|
|
||||||
temperature=request_dict["temperature"],
|
|
||||||
top_p=request_dict["top_p"],
|
|
||||||
top_k=request_dict["top_k"],
|
|
||||||
logprobs=request_dict["logprobs"],
|
|
||||||
ignore_eos=request_dict["ignore_eos"],
|
|
||||||
return_token_ids=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
generator = await handler.create_completion(completion_request, raw_request)
|
|
||||||
except OverflowError as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=HTTPStatus.BAD_REQUEST.value, detail=str(e)
|
|
||||||
) from e
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value, detail=str(e)
|
|
||||||
) from e
|
|
||||||
|
|
||||||
if isinstance(generator, ErrorResponse):
|
|
||||||
return JSONResponse(
|
|
||||||
content=generator.model_dump(), status_code=generator.error.code
|
|
||||||
)
|
|
||||||
elif isinstance(generator, CompletionResponse):
|
|
||||||
return JSONResponse(content=generator.model_dump())
|
|
||||||
|
|
||||||
return StreamingResponse(content=generator, media_type="text/event-stream")
|
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/v1/embeddings",
|
"/v1/embeddings",
|
||||||
dependencies=[Depends(validate_json_request)],
|
dependencies=[Depends(validate_json_request)],
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user