mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 18:16:46 +08:00
[Doc] Add model development API Reference (#11884)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
1d967acb45
commit
65097ca0af
@ -38,7 +38,7 @@ steps:
|
|||||||
- pip install -r requirements-docs.txt
|
- pip install -r requirements-docs.txt
|
||||||
- SPHINXOPTS=\"-W\" make html
|
- SPHINXOPTS=\"-W\" make html
|
||||||
# Check API reference (if it fails, you may have missing mock imports)
|
# Check API reference (if it fails, you may have missing mock imports)
|
||||||
- grep \"sig sig-object py\" build/html/api/params.html
|
- grep \"sig sig-object py\" build/html/api/inference_params.html
|
||||||
|
|
||||||
- label: Async Engine, Inputs, Utils, Worker Test # 24min
|
- label: Async Engine, Inputs, Utils, Worker Test # 24min
|
||||||
fast_check: true
|
fast_check: true
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
# Optional Parameters
|
# Inference Parameters
|
||||||
|
|
||||||
Optional parameters for vLLM APIs.
|
Inference parameters for vLLM APIs.
|
||||||
|
|
||||||
(sampling-params)=
|
(sampling-params)=
|
||||||
|
|
||||||
@ -19,4 +19,3 @@ Optional parameters for vLLM APIs.
|
|||||||
.. autoclass:: vllm.PoolingParams
|
.. autoclass:: vllm.PoolingParams
|
||||||
:members:
|
:members:
|
||||||
```
|
```
|
||||||
|
|
||||||
9
docs/source/api/model/adapters.md
Normal file
9
docs/source/api/model/adapters.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Model Adapters
|
||||||
|
|
||||||
|
## Module Contents
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: vllm.model_executor.models.adapters
|
||||||
|
:members:
|
||||||
|
:member-order: bysource
|
||||||
|
```
|
||||||
12
docs/source/api/model/index.md
Normal file
12
docs/source/api/model/index.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# Model Development
|
||||||
|
|
||||||
|
## Submodules
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
interfaces_base
|
||||||
|
interfaces
|
||||||
|
adapters
|
||||||
|
```
|
||||||
|
|
||||||
9
docs/source/api/model/interfaces.md
Normal file
9
docs/source/api/model/interfaces.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Optional Interfaces
|
||||||
|
|
||||||
|
## Module Contents
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: vllm.model_executor.models.interfaces
|
||||||
|
:members:
|
||||||
|
:member-order: bysource
|
||||||
|
```
|
||||||
9
docs/source/api/model/interfaces_base.md
Normal file
9
docs/source/api/model/interfaces_base.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Base Model Interfaces
|
||||||
|
|
||||||
|
## Module Contents
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: vllm.model_executor.models.interfaces_base
|
||||||
|
:members:
|
||||||
|
:member-order: bysource
|
||||||
|
```
|
||||||
@ -139,8 +139,9 @@ community/sponsors
|
|||||||
|
|
||||||
api/offline_inference/index
|
api/offline_inference/index
|
||||||
api/engine/index
|
api/engine/index
|
||||||
|
api/inference_params
|
||||||
api/multimodal/index
|
api/multimodal/index
|
||||||
api/params
|
api/model/index
|
||||||
```
|
```
|
||||||
|
|
||||||
% Design Documents: Details about vLLM internals
|
% Design Documents: Details about vLLM internals
|
||||||
|
|||||||
@ -38,13 +38,15 @@ class SupportsMultiModal(Protocol):
|
|||||||
to be merged with text embeddings.
|
to be merged with text embeddings.
|
||||||
|
|
||||||
The output embeddings must be one of the following formats:
|
The output embeddings must be one of the following formats:
|
||||||
|
|
||||||
- A list or tuple of 2D tensors, where each tensor corresponds to
|
- A list or tuple of 2D tensors, where each tensor corresponds to
|
||||||
each input multimodal data item (e.g, image).
|
each input multimodal data item (e.g, image).
|
||||||
- A single 3D tensor, with the batch dimension grouping the 2D tensors.
|
- A single 3D tensor, with the batch dimension grouping the 2D tensors.
|
||||||
|
|
||||||
NOTE: The returned multimodal embeddings must be in the same order as
|
Note:
|
||||||
the appearances of their corresponding multimodal data item in the
|
The returned multimodal embeddings must be in the same order as
|
||||||
input prompt.
|
the appearances of their corresponding multimodal data item in the
|
||||||
|
input prompt.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@ -59,6 +61,7 @@ class SupportsMultiModal(Protocol):
|
|||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@overload
|
||||||
def get_input_embeddings(
|
def get_input_embeddings(
|
||||||
self,
|
self,
|
||||||
input_ids: torch.Tensor,
|
input_ids: torch.Tensor,
|
||||||
|
|||||||
@ -35,6 +35,7 @@ T_co = TypeVar("T_co", default=torch.Tensor, covariant=True)
|
|||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class VllmModel(Protocol[C_co, T_co]):
|
class VllmModel(Protocol[C_co, T_co]):
|
||||||
|
"""The interface required for all models in vLLM."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -97,6 +98,7 @@ def is_vllm_model(
|
|||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]):
|
class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]):
|
||||||
|
"""The interface required for all generative models in vLLM."""
|
||||||
|
|
||||||
def compute_logits(
|
def compute_logits(
|
||||||
self,
|
self,
|
||||||
@ -142,6 +144,7 @@ def is_text_generation_model(
|
|||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]):
|
class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]):
|
||||||
|
"""The interface required for all pooling models in vLLM."""
|
||||||
|
|
||||||
def pooler(
|
def pooler(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user