mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-29 12:17:14 +08:00
[Docs] add the parallel sampling usage in LLMEngine and AsyncLLM (#24222)
This commit is contained in:
parent
eaffe4486c
commit
c9ff9e6f0c
@ -81,7 +81,13 @@ class SamplingParams(
|
||||
"""
|
||||
|
||||
n: int = 1
|
||||
"""Number of output sequences to return for the given prompt."""
|
||||
"""Number of outputs to return for the given prompt request.
|
||||
|
||||
NOTE:
|
||||
`AsyncLLM` streams outputs by default. When `n > 1`, all `n` outputs
|
||||
are generated and streamed cumulatively per request. To see all `n`
|
||||
outputs upon completion, use `output_kind=RequestOutputKind.FINAL_ONLY`
|
||||
in `SamplingParams`."""
|
||||
best_of: Optional[int] = None
|
||||
"""Number of output sequences that are generated from the prompt. From
|
||||
these `best_of` sequences, the top `n` sequences are returned. `best_of`
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user