[Doc] Add model development API Reference (#11884)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-01-09 17:43:40 +08:00 committed by GitHub
parent 1d967acb45
commit 65097ca0af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 54 additions and 9 deletions

View File

@ -38,7 +38,7 @@ steps:
- pip install -r requirements-docs.txt - pip install -r requirements-docs.txt
- SPHINXOPTS=\"-W\" make html - SPHINXOPTS=\"-W\" make html
# Check API reference (if it fails, you may have missing mock imports) # Check API reference (if it fails, you may have missing mock imports)
- grep \"sig sig-object py\" build/html/api/params.html - grep \"sig sig-object py\" build/html/api/inference_params.html
- label: Async Engine, Inputs, Utils, Worker Test # 24min - label: Async Engine, Inputs, Utils, Worker Test # 24min
fast_check: true fast_check: true

View File

@ -1,6 +1,6 @@
# Optional Parameters # Inference Parameters
Optional parameters for vLLM APIs. Inference parameters for vLLM APIs.
(sampling-params)= (sampling-params)=
@ -19,4 +19,3 @@ Optional parameters for vLLM APIs.
.. autoclass:: vllm.PoolingParams .. autoclass:: vllm.PoolingParams
:members: :members:
``` ```

View File

@ -0,0 +1,9 @@
# Model Adapters
## Module Contents
```{eval-rst}
.. automodule:: vllm.model_executor.models.adapters
:members:
:member-order: bysource
```

View File

@ -0,0 +1,12 @@
# Model Development
## Submodules
```{toctree}
:maxdepth: 1
interfaces_base
interfaces
adapters
```

View File

@ -0,0 +1,9 @@
# Optional Interfaces
## Module Contents
```{eval-rst}
.. automodule:: vllm.model_executor.models.interfaces
:members:
:member-order: bysource
```

View File

@ -0,0 +1,9 @@
# Base Model Interfaces
## Module Contents
```{eval-rst}
.. automodule:: vllm.model_executor.models.interfaces_base
:members:
:member-order: bysource
```

View File

@ -139,8 +139,9 @@ community/sponsors
api/offline_inference/index api/offline_inference/index
api/engine/index api/engine/index
api/inference_params
api/multimodal/index api/multimodal/index
api/params api/model/index
``` ```
% Design Documents: Details about vLLM internals % Design Documents: Details about vLLM internals

View File

@ -38,13 +38,15 @@ class SupportsMultiModal(Protocol):
to be merged with text embeddings. to be merged with text embeddings.
The output embeddings must be one of the following formats: The output embeddings must be one of the following formats:
- A list or tuple of 2D tensors, where each tensor corresponds to - A list or tuple of 2D tensors, where each tensor corresponds to
each input multimodal data item (e.g, image). each input multimodal data item (e.g, image).
- A single 3D tensor, with the batch dimension grouping the 2D tensors. - A single 3D tensor, with the batch dimension grouping the 2D tensors.
NOTE: The returned multimodal embeddings must be in the same order as Note:
the appearances of their corresponding multimodal data item in the The returned multimodal embeddings must be in the same order as
input prompt. the appearances of their corresponding multimodal data item in the
input prompt.
""" """
... ...
@ -59,6 +61,7 @@ class SupportsMultiModal(Protocol):
) -> torch.Tensor: ) -> torch.Tensor:
... ...
@overload
def get_input_embeddings( def get_input_embeddings(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor,

View File

@ -35,6 +35,7 @@ T_co = TypeVar("T_co", default=torch.Tensor, covariant=True)
@runtime_checkable @runtime_checkable
class VllmModel(Protocol[C_co, T_co]): class VllmModel(Protocol[C_co, T_co]):
"""The interface required for all models in vLLM."""
def __init__( def __init__(
self, self,
@ -97,6 +98,7 @@ def is_vllm_model(
@runtime_checkable @runtime_checkable
class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]): class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]):
"""The interface required for all generative models in vLLM."""
def compute_logits( def compute_logits(
self, self,
@ -142,6 +144,7 @@ def is_text_generation_model(
@runtime_checkable @runtime_checkable
class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]): class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]):
"""The interface required for all pooling models in vLLM."""
def pooler( def pooler(
self, self,