mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-04 17:24:32 +08:00
[Docs] Fix pooling-params doc references in openai_compatible_server.md (#24939)
This commit is contained in:
parent
8ed039d527
commit
eaffe4486c
@ -46,7 +46,6 @@ Engine classes for offline and online inference.
|
|||||||
Inference parameters for vLLM APIs.
|
Inference parameters for vLLM APIs.
|
||||||
|
|
||||||
[](){ #sampling-params }
|
[](){ #sampling-params }
|
||||||
[](){ #pooling-params }
|
|
||||||
|
|
||||||
- [vllm.SamplingParams][]
|
- [vllm.SamplingParams][]
|
||||||
- [vllm.PoolingParams][]
|
- [vllm.PoolingParams][]
|
||||||
|
|||||||
@ -317,10 +317,11 @@ Full example: <gh-file:examples/online_serving/pooling/openai_chat_embedding_cli
|
|||||||
|
|
||||||
#### Extra parameters
|
#### Extra parameters
|
||||||
|
|
||||||
The following [pooling parameters][pooling-params] are supported.
|
The following [pooling parameters][vllm.PoolingParams] are supported.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
--8<-- "vllm/entrypoints/openai/protocol.py:embedding-pooling-params"
|
--8<-- "vllm/pooling_params.py:common-pooling-params"
|
||||||
|
--8<-- "vllm/pooling_params.py:embedding-pooling-params"
|
||||||
```
|
```
|
||||||
|
|
||||||
The following extra parameters are supported by default:
|
The following extra parameters are supported by default:
|
||||||
@ -527,10 +528,11 @@ curl -v "http://127.0.0.1:8000/classify" \
|
|||||||
|
|
||||||
#### Extra parameters
|
#### Extra parameters
|
||||||
|
|
||||||
The following [pooling parameters][pooling-params] are supported.
|
The following [pooling parameters][vllm.PoolingParams] are supported.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
--8<-- "vllm/entrypoints/openai/protocol.py:classification-pooling-params"
|
--8<-- "vllm/pooling_params.py:common-pooling-params"
|
||||||
|
--8<-- "vllm/pooling_params.py:classification-pooling-params"
|
||||||
```
|
```
|
||||||
|
|
||||||
The following extra parameters are supported:
|
The following extra parameters are supported:
|
||||||
@ -733,10 +735,11 @@ Full example: <gh-file:examples/online_serving/openai_cross_encoder_score_for_mu
|
|||||||
|
|
||||||
#### Extra parameters
|
#### Extra parameters
|
||||||
|
|
||||||
The following [pooling parameters][pooling-params] are supported.
|
The following [pooling parameters][vllm.PoolingParams] are supported.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
--8<-- "vllm/entrypoints/openai/protocol.py:score-pooling-params"
|
--8<-- "vllm/pooling_params.py:common-pooling-params"
|
||||||
|
--8<-- "vllm/pooling_params.py:classification-pooling-params"
|
||||||
```
|
```
|
||||||
|
|
||||||
The following extra parameters are supported:
|
The following extra parameters are supported:
|
||||||
@ -815,10 +818,11 @@ Result documents will be sorted by relevance, and the `index` property can be us
|
|||||||
|
|
||||||
#### Extra parameters
|
#### Extra parameters
|
||||||
|
|
||||||
The following [pooling parameters][pooling-params] are supported.
|
The following [pooling parameters][vllm.PoolingParams] are supported.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
--8<-- "vllm/entrypoints/openai/protocol.py:rerank-pooling-params"
|
--8<-- "vllm/pooling_params.py:common-pooling-params"
|
||||||
|
--8<-- "vllm/pooling_params.py:classification-pooling-params"
|
||||||
```
|
```
|
||||||
|
|
||||||
The following extra parameters are supported:
|
The following extra parameters are supported:
|
||||||
|
|||||||
@ -20,25 +20,33 @@ class PoolingParams(
|
|||||||
"""API parameters for pooling models.
|
"""API parameters for pooling models.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
|
truncate_prompt_tokens: Controls prompt truncation.
|
||||||
|
Set to -1 to use the model's default truncation size.
|
||||||
|
Set to k to keep only the last k tokens (left truncation).
|
||||||
|
Set to None to disable truncation.
|
||||||
normalize: Whether to normalize the embeddings outputs.
|
normalize: Whether to normalize the embeddings outputs.
|
||||||
dimensions: Reduce the dimensions of embeddings
|
dimensions: Reduce the dimensions of embeddings
|
||||||
if model support matryoshka representation.
|
if model support matryoshka representation.
|
||||||
activation: Whether to apply activation function to
|
activation: Whether to apply activation function to
|
||||||
the classification outputs.
|
the classification outputs.
|
||||||
softmax: Whether to apply softmax to the reward outputs.
|
softmax: Whether to apply softmax to the reward outputs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# --8<-- [start:common-pooling-params]
|
||||||
truncate_prompt_tokens: Optional[Annotated[int,
|
truncate_prompt_tokens: Optional[Annotated[int,
|
||||||
msgspec.Meta(ge=-1)]] = None
|
msgspec.Meta(ge=-1)]] = None
|
||||||
"""If set to -1, will use the truncation size supported by the model. If
|
# --8<-- [end:common-pooling-params]
|
||||||
set to an integer k, will use only the last k tokens from the prompt
|
|
||||||
(i.e., left truncation). If set to `None`, truncation is disabled."""
|
|
||||||
|
|
||||||
## for embeddings models
|
## for embeddings models
|
||||||
|
# --8<-- [start:embedding-pooling-params]
|
||||||
dimensions: Optional[int] = None
|
dimensions: Optional[int] = None
|
||||||
normalize: Optional[bool] = None
|
normalize: Optional[bool] = None
|
||||||
|
# --8<-- [end:embedding-pooling-params]
|
||||||
|
|
||||||
## for classification models
|
## for classification, scoring and rerank
|
||||||
|
# --8<-- [start:classification-pooling-params]
|
||||||
activation: Optional[bool] = None
|
activation: Optional[bool] = None
|
||||||
|
# --8<-- [end:classification-pooling-params]
|
||||||
|
|
||||||
## for reward models
|
## for reward models
|
||||||
softmax: Optional[bool] = None
|
softmax: Optional[bool] = None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user