mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 03:41:47 +08:00
add custom server params (#1868)
This commit is contained in:
parent
0f90effc66
commit
60dc62dc9e
@ -253,8 +253,10 @@ async def create_chat_completion(request: ChatCompletionRequest,
|
||||
n=request.n,
|
||||
presence_penalty=request.presence_penalty,
|
||||
frequency_penalty=request.frequency_penalty,
|
||||
repetition_penalty=request.repetition_penalty,
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p,
|
||||
min_p=request.min_p,
|
||||
stop=request.stop,
|
||||
stop_token_ids=request.stop_token_ids,
|
||||
max_tokens=request.max_tokens,
|
||||
@ -497,9 +499,11 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
|
||||
best_of=request.best_of,
|
||||
presence_penalty=request.presence_penalty,
|
||||
frequency_penalty=request.frequency_penalty,
|
||||
repetition_penalty=request.repetition_penalty,
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p,
|
||||
top_k=request.top_k,
|
||||
min_p=request.min_p,
|
||||
stop=request.stop,
|
||||
stop_token_ids=request.stop_token_ids,
|
||||
ignore_eos=request.ignore_eos,
|
||||
|
||||
@ -75,6 +75,8 @@ class ChatCompletionRequest(BaseModel):
|
||||
spaces_between_special_tokens: Optional[bool] = True
|
||||
add_generation_prompt: Optional[bool] = True
|
||||
echo: Optional[bool] = False
|
||||
repetition_penalty: Optional[float] = 1.0
|
||||
min_p: Optional[float] = 0.0
|
||||
|
||||
|
||||
class CompletionRequest(BaseModel):
|
||||
@ -102,6 +104,8 @@ class CompletionRequest(BaseModel):
|
||||
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
|
||||
skip_special_tokens: Optional[bool] = True
|
||||
spaces_between_special_tokens: Optional[bool] = True
|
||||
repetition_penalty: Optional[float] = 1.0
|
||||
min_p: Optional[float] = 0.0
|
||||
|
||||
|
||||
class LogProbs(BaseModel):
|
||||
|
||||
@ -149,6 +149,7 @@ class SamplingParams:
|
||||
# Zero temperature means greedy sampling.
|
||||
self.top_p = 1.0
|
||||
self.top_k = -1
|
||||
self.min_p = 0.0
|
||||
self._verify_greedy_sampling()
|
||||
|
||||
def _verify_args(self) -> None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user