[Doc] Add model development API Reference (#11884)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-12-15 18:16:46 +08:00 · 2025-01-09 17:43:40 +08:00 · 2025-01-09 17:43:40 +08:00 · 65097ca0af
commit 65097ca0af
parent 1d967acb45
9 changed files with 54 additions and 9 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -38,7 +38,7 @@ steps:
  - pip install -r requirements-docs.txt
  - SPHINXOPTS=\"-W\" make html
  # Check API reference (if it fails, you may have missing mock imports)
-  - grep \"sig sig-object py\" build/html/api/params.html
+  - grep \"sig sig-object py\" build/html/api/inference_params.html
 - label: Async Engine, Inputs, Utils, Worker Test # 24min
  fast_check: true
--- a/docs/source/api/inference_params.md
+++ b/docs/source/api/inference_params.md
@ -1,6 +1,6 @@
-# Optional Parameters
+# Inference Parameters
-Optional parameters for vLLM APIs.
+Inference parameters for vLLM APIs.
 (sampling-params)=
@ -19,4 +19,3 @@ Optional parameters for vLLM APIs.
 .. autoclass:: vllm.PoolingParams
    :members:
 ```
--- a/docs/source/api/model/adapters.md
+++ b/docs/source/api/model/adapters.md
@ -0,0 +1,9 @@
 # Model Adapters
 ## Module Contents
 ```{eval-rst}
 .. automodule:: vllm.model_executor.models.adapters
    :members:
    :member-order: bysource
 ```
--- a/docs/source/api/model/index.md
+++ b/docs/source/api/model/index.md
@ -0,0 +1,12 @@
 # Model Development
 ## Submodules
 ```{toctree}
 :maxdepth: 1
 interfaces_base
 interfaces
 adapters
 ```
--- a/docs/source/api/model/interfaces.md
+++ b/docs/source/api/model/interfaces.md
@ -0,0 +1,9 @@
 # Optional Interfaces
 ## Module Contents
 ```{eval-rst}
 .. automodule:: vllm.model_executor.models.interfaces
    :members:
    :member-order: bysource
 ```
--- a/docs/source/api/model/interfaces_base.md
+++ b/docs/source/api/model/interfaces_base.md
@ -0,0 +1,9 @@
 # Base Model Interfaces
 ## Module Contents
 ```{eval-rst}
 .. automodule:: vllm.model_executor.models.interfaces_base
    :members:
    :member-order: bysource
 ```
--- a/docs/source/index.md
+++ b/docs/source/index.md
@ -139,8 +139,9 @@ community/sponsors
 api/offline_inference/index
 api/engine/index
 api/inference_params
 api/multimodal/index
-api/params
+api/model/index
 ```
 % Design Documents: Details about vLLM internals
--- a/vllm/model_executor/models/interfaces.py
+++ b/vllm/model_executor/models/interfaces.py
@ -38,13 +38,15 @@ class SupportsMultiModal(Protocol):
        to be merged with text embeddings.
        The output embeddings must be one of the following formats:
        - A list or tuple of 2D tensors, where each tensor corresponds to 
-          each input multimodal data item (e.g, image).
+            each input multimodal data item (e.g, image).
        - A single 3D tensor, with the batch dimension grouping the 2D tensors.
-        NOTE: The returned multimodal embeddings must be in the same order as 
+        Note:
-        the appearances of their corresponding multimodal data item in the 
+            The returned multimodal embeddings must be in the same order as 
-        input prompt.
+            the appearances of their corresponding multimodal data item in the 
            input prompt.
        """
        ...
@ -59,6 +61,7 @@ class SupportsMultiModal(Protocol):
    ) -> torch.Tensor:
        ...
    @overload
    def get_input_embeddings(
        self,
        input_ids: torch.Tensor,
--- a/vllm/model_executor/models/interfaces_base.py
+++ b/vllm/model_executor/models/interfaces_base.py
@ -35,6 +35,7 @@ T_co = TypeVar("T_co", default=torch.Tensor, covariant=True)
@runtime_checkable
 class VllmModel(Protocol[C_co, T_co]):
    """The interface required for all models in vLLM."""
    def __init__(
        self,
@ -97,6 +98,7 @@ def is_vllm_model(
@runtime_checkable
 class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]):
    """The interface required for all generative models in vLLM."""
    def compute_logits(
        self,
@ -142,6 +144,7 @@ def is_text_generation_model(
@runtime_checkable
 class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]):
    """The interface required for all pooling models in vLLM."""
    def pooler(
        self,