Align max_tokens behavior with openai (#852)

This commit is contained in:
Wen Sun 2023-09-24 09:10:13 +08:00 committed by GitHub
parent 9f6be8692e
commit bbbf86565f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 3 additions and 1 deletions

View File

@ -130,6 +130,8 @@ async def check_length(
input_ids = tokenizer(prompt).input_ids
token_num = len(input_ids)
if request.max_tokens is None:
request.max_tokens = max_model_len - token_num
if token_num + request.max_tokens > max_model_len:
return input_ids, create_error_response(
HTTPStatus.BAD_REQUEST,

View File

@ -58,7 +58,7 @@ class ChatCompletionRequest(BaseModel):
temperature: Optional[float] = 0.7
top_p: Optional[float] = 1.0
n: Optional[int] = 1
max_tokens: Optional[int] = 16
max_tokens: Optional[int] = None
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
stream: Optional[bool] = False
presence_penalty: Optional[float] = 0.0