mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 16:55:48 +08:00
Align max_tokens behavior with openai (#852)
This commit is contained in:
parent
9f6be8692e
commit
bbbf86565f
@ -130,6 +130,8 @@ async def check_length(
|
|||||||
input_ids = tokenizer(prompt).input_ids
|
input_ids = tokenizer(prompt).input_ids
|
||||||
token_num = len(input_ids)
|
token_num = len(input_ids)
|
||||||
|
|
||||||
|
if request.max_tokens is None:
|
||||||
|
request.max_tokens = max_model_len - token_num
|
||||||
if token_num + request.max_tokens > max_model_len:
|
if token_num + request.max_tokens > max_model_len:
|
||||||
return input_ids, create_error_response(
|
return input_ids, create_error_response(
|
||||||
HTTPStatus.BAD_REQUEST,
|
HTTPStatus.BAD_REQUEST,
|
||||||
|
|||||||
@ -58,7 +58,7 @@ class ChatCompletionRequest(BaseModel):
|
|||||||
temperature: Optional[float] = 0.7
|
temperature: Optional[float] = 0.7
|
||||||
top_p: Optional[float] = 1.0
|
top_p: Optional[float] = 1.0
|
||||||
n: Optional[int] = 1
|
n: Optional[int] = 1
|
||||||
max_tokens: Optional[int] = 16
|
max_tokens: Optional[int] = None
|
||||||
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
||||||
stream: Optional[bool] = False
|
stream: Optional[bool] = False
|
||||||
presence_penalty: Optional[float] = 0.0
|
presence_penalty: Optional[float] = 0.0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user