Include private attributes in API documentation (#18614)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-01-26 21:14:36 +08:00 · 2025-05-23 15:18:31 +02:00 · 2025-05-23 15:18:31 +02:00 · d4c2919760
commit d4c2919760
parent 6220f3c6b0
3 changed files with 42 additions and 48 deletions
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@ -66,6 +66,7 @@ plugins:
          options:
            show_symbol_type_heading: true
            show_symbol_type_toc: true
+            filters: []
            summary:
              modules: true
            show_if_no_docstring: true
--- a/vllm/model_executor/layers/rejection_sampler.py
+++ b/vllm/model_executor/layers/rejection_sampler.py
@ -262,16 +262,16 @@ class RejectionSampler(SpecDecodeStochasticBaseSampler):
        True, then a token can be accepted, else it should be
        rejected.

-        Given {math}`q(\hat{x}_{n+1}|x_1, \dots, x_n)`, the probability of
-        {math}`\hat{x}_{n+1}` given context {math}`x_1, \dots, x_n` according
-        to the target model, and {math}`p(\hat{x}_{n+1}|x_1, \dots, x_n)`, the
+        Given $q(\hat{x}_{n+1}|x_1, \dots, x_n)$, the probability of
+        $\hat{x}_{n+1}$ given context $x_1, \dots, x_n$ according
+        to the target model, and $p(\hat{x}_{n+1}|x_1, \dots, x_n)$, the
        same conditional probability according to the draft model, the token
        is accepted with probability:

-        :::{math}
+        $$
        \min\left(1, \frac{q(\hat{x}_{n+1}|x_1, \dots, x_n)}
                        {p(\hat{x}_{n+1}|x_1, \dots, x_n)}\right)
-        :::
+        $$

        This implementation does not apply causality. When using the output,
        if a token is rejected, subsequent tokens should not be used.
@ -314,30 +314,31 @@ class RejectionSampler(SpecDecodeStochasticBaseSampler):
        target model is recovered (within hardware numerics).

        The probability distribution used in this rejection case is constructed
-        as follows. Given {math}`q(x|x_1, \dots, x_n)`, the probability of
-        {math}`x` given context {math}`x_1, \dots, x_n` according to the target
-        model and {math}`p(x|x_1, \dots, x_n)`, the same conditional probability
+        as follows. Given $q(x|x_1, \dots, x_n)$, the probability of
+        $x$ given context $x_1, \dots, x_n$ according to the target
+        model and $p(x|x_1, \dots, x_n)$, the same conditional probability
        according to the draft model:

-        :::{math}
+        $$
        x_{n+1} \sim (q(x|x_1, \dots, x_n) - p(x|x_1, \dots, x_n))_+
-        :::
+        $$

-        where {math}`(f(x))_+` is defined as:
+        where $(f(x))_+$ is defined as:

-        :::{math}
+        $$
        (f(x))_+ = \frac{\max(0, f(x))}{\sum_x \max(0, f(x))}
-        :::
+        $$

        See https://github.com/vllm-project/vllm/pull/2336 for a visualization
        of the draft, target, and recovered probability distributions.

        Returns a tensor of shape [batch_size, k, vocab_size].

-        Note: This batches operations on GPU and thus constructs the recovered
-        distribution for all tokens, even if they are accepted. This causes
-        division-by-zero errors, so we use self._smallest_positive_value to
-        avoid that. This introduces some drift to the distribution.
+        Note: 
+            This batches operations on GPU and thus constructs the recovered
+            distribution for all tokens, even if they are accepted. This causes
+            division-by-zero errors, so we use self._smallest_positive_value to
+            avoid that. This introduces some drift to the distribution.
        """
        _, k, _ = draft_probs.shape

--- a/vllm/model_executor/layers/typical_acceptance_sampler.py
+++ b/vllm/model_executor/layers/typical_acceptance_sampler.py
@ -93,29 +93,27 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):
        Evaluates and returns a mask of accepted tokens based on the
        posterior probabilities.

-        Parameters:
-        ----------
-        target_probs : torch.Tensor
-            A tensor of shape (batch_size, k, vocab_size) representing 
-            the probabilities of each token in the vocabulary for each
-            position in the proposed sequence. This is the distribution
-            generated by the target model.
-        draft_token_ids : torch.Tensor
-            A tensor of shape (batch_size, k) representing the proposed
-            token ids.
+        Args:
+            target_probs (torch.Tensor): A tensor of shape
+                (batch_size, k, vocab_size) representing  the probabilities of
+                each token in the vocabulary for each position in the proposed
+                sequence. This is the distribution generated by the target
+                model.
+            draft_token_ids (torch.Tensor): A tensor of shape (batch_size, k)
+                representing the proposed token ids.

        A draft token_id x_{n+k} is accepted if it satisfies the
        following condition
    
-        :::{math}
+        $$
        p_{\text{original}}(x_{n+k} | x_1, x_2, \dots, x_{n+k-1}) > 
        \min \left( \epsilon, \delta * \exp \left(
            -H(p_{\text{original}}(
                \cdot | x_1, x_2, \ldots, x_{n+k-1})) \right) \right)
-        :::
+        $$
        
-        where {math}`p_{\text{original}}` corresponds to target_probs 
-        and {math}`\epsilon` and {math}`\delta` correspond to hyperparameters
+        where $p_{\text{original}}$ corresponds to target_probs 
+        and $\epsilon$ and $\delta$ correspond to hyperparameters
        specified using self._posterior_threshold and self._posterior_alpha

        This method computes the posterior probabilities for the given
@ -126,13 +124,10 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):
        returns a boolean mask indicating which tokens can be accepted.

        Returns:
-        -------
-        torch.Tensor
-            A boolean tensor of shape (batch_size, k) where each element
-            indicates whether the corresponding draft token has been accepted
-            or rejected. True indicates acceptance and false indicates
-            rejection.
-            
+            torch.Tensor: A boolean tensor of shape (batch_size, k) where each
+                element indicates whether the corresponding draft token has
+                been accepted or rejected. True indicates acceptance and false
+                indicates rejection.
        """
        device = target_probs.device
        candidates_prob = torch.gather(
@ -156,17 +151,14 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):
        The recovered token ids will fill the first unmatched token
        by the target token.

-        Parameters
-        ----------
-        target_probs : torch.Tensor
-            A tensor of shape (batch_size, k, vocab_size) containing 
-            the target probability distribution
+        Args:
+            target_probs (torch.Tensor): A tensor of shape
+                (batch_size, k, vocab_size) containing the target probability
+                distribution.

-        Returns
-        -------
-        torch.Tensor
-            A tensor of shape (batch_size, k) with the recovered token
-            ids which are selected from target probs.
+        Returns:
+            torch.Tensor: A tensor of shape (batch_size, k) with the recovered
+                token ids which are selected from target probs.
        """
        max_indices = torch.argmax(target_probs, dim=-1)