From d4c291976085e4bdfabe22ed7c69534263c4cc7b Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 23 May 2025 15:18:31 +0200
Subject: [PATCH] Include private attributes in API documentation (#18614)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 mkdocs.yaml                                   |  1 +
 .../layers/rejection_sampler.py               | 35 ++++++------
 .../layers/typical_acceptance_sampler.py      | 54 ++++++++-----------
 3 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/mkdocs.yaml b/mkdocs.yaml
index a1c6319bb0080..b6fabbeed15a5 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -66,6 +66,7 @@ plugins:
           options:
             show_symbol_type_heading: true
             show_symbol_type_toc: true
+            filters: []
             summary:
               modules: true
             show_if_no_docstring: true
diff --git a/vllm/model_executor/layers/rejection_sampler.py b/vllm/model_executor/layers/rejection_sampler.py
index af82b9dc93b70..3db73495827c6 100644
--- a/vllm/model_executor/layers/rejection_sampler.py
+++ b/vllm/model_executor/layers/rejection_sampler.py
@@ -262,16 +262,16 @@ class RejectionSampler(SpecDecodeStochasticBaseSampler):
         True, then a token can be accepted, else it should be
         rejected.
 
-        Given {math}`q(\hat{x}_{n+1}|x_1, \dots, x_n)`, the probability of
-        {math}`\hat{x}_{n+1}` given context {math}`x_1, \dots, x_n` according
-        to the target model, and {math}`p(\hat{x}_{n+1}|x_1, \dots, x_n)`, the
+        Given $q(\hat{x}_{n+1}|x_1, \dots, x_n)$, the probability of
+        $\hat{x}_{n+1}$ given context $x_1, \dots, x_n$ according
+        to the target model, and $p(\hat{x}_{n+1}|x_1, \dots, x_n)$, the
         same conditional probability according to the draft model, the token
         is accepted with probability:
 
-        :::{math}
+        $$
         \min\left(1, \frac{q(\hat{x}_{n+1}|x_1, \dots, x_n)}
                         {p(\hat{x}_{n+1}|x_1, \dots, x_n)}\right)
-        :::
+        $$
 
         This implementation does not apply causality. When using the output,
         if a token is rejected, subsequent tokens should not be used.
@@ -314,30 +314,31 @@ class RejectionSampler(SpecDecodeStochasticBaseSampler):
         target model is recovered (within hardware numerics).
 
         The probability distribution used in this rejection case is constructed
-        as follows. Given {math}`q(x|x_1, \dots, x_n)`, the probability of
-        {math}`x` given context {math}`x_1, \dots, x_n` according to the target
-        model and {math}`p(x|x_1, \dots, x_n)`, the same conditional probability
+        as follows. Given $q(x|x_1, \dots, x_n)$, the probability of
+        $x$ given context $x_1, \dots, x_n$ according to the target
+        model and $p(x|x_1, \dots, x_n)$, the same conditional probability
         according to the draft model:
 
-        :::{math}
+        $$
         x_{n+1} \sim (q(x|x_1, \dots, x_n) - p(x|x_1, \dots, x_n))_+
-        :::
+        $$
 
-        where {math}`(f(x))_+` is defined as:
+        where $(f(x))_+$ is defined as:
 
-        :::{math}
+        $$
         (f(x))_+ = \frac{\max(0, f(x))}{\sum_x \max(0, f(x))}
-        :::
+        $$
 
         See https://github.com/vllm-project/vllm/pull/2336 for a visualization
         of the draft, target, and recovered probability distributions.
 
         Returns a tensor of shape [batch_size, k, vocab_size].
 
-        Note: This batches operations on GPU and thus constructs the recovered
-        distribution for all tokens, even if they are accepted. This causes
-        division-by-zero errors, so we use self._smallest_positive_value to
-        avoid that. This introduces some drift to the distribution.
+        Note: 
+            This batches operations on GPU and thus constructs the recovered
+            distribution for all tokens, even if they are accepted. This causes
+            division-by-zero errors, so we use self._smallest_positive_value to
+            avoid that. This introduces some drift to the distribution.
         """
         _, k, _ = draft_probs.shape
 
diff --git a/vllm/model_executor/layers/typical_acceptance_sampler.py b/vllm/model_executor/layers/typical_acceptance_sampler.py
index 527a301cd8e26..a14c86148e730 100644
--- a/vllm/model_executor/layers/typical_acceptance_sampler.py
+++ b/vllm/model_executor/layers/typical_acceptance_sampler.py
@@ -93,29 +93,27 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):
         Evaluates and returns a mask of accepted tokens based on the
         posterior probabilities.
 
-        Parameters:
-        ----------
-        target_probs : torch.Tensor
-            A tensor of shape (batch_size, k, vocab_size) representing 
-            the probabilities of each token in the vocabulary for each
-            position in the proposed sequence. This is the distribution
-            generated by the target model.
-        draft_token_ids : torch.Tensor
-            A tensor of shape (batch_size, k) representing the proposed
-            token ids.
+        Args:
+            target_probs (torch.Tensor): A tensor of shape
+                (batch_size, k, vocab_size) representing  the probabilities of
+                each token in the vocabulary for each position in the proposed
+                sequence. This is the distribution generated by the target
+                model.
+            draft_token_ids (torch.Tensor): A tensor of shape (batch_size, k)
+                representing the proposed token ids.
 
         A draft token_id x_{n+k} is accepted if it satisfies the
         following condition
     
-        :::{math}
+        $$
         p_{\text{original}}(x_{n+k} | x_1, x_2, \dots, x_{n+k-1}) > 
         \min \left( \epsilon, \delta * \exp \left(
             -H(p_{\text{original}}(
                 \cdot | x_1, x_2, \ldots, x_{n+k-1})) \right) \right)
-        :::
+        $$
         
-        where {math}`p_{\text{original}}` corresponds to target_probs 
-        and {math}`\epsilon` and {math}`\delta` correspond to hyperparameters
+        where $p_{\text{original}}$ corresponds to target_probs 
+        and $\epsilon$ and $\delta$ correspond to hyperparameters
         specified using self._posterior_threshold and self._posterior_alpha
 
         This method computes the posterior probabilities for the given
@@ -126,13 +124,10 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):
         returns a boolean mask indicating which tokens can be accepted.
 
         Returns:
-        -------
-        torch.Tensor
-            A boolean tensor of shape (batch_size, k) where each element
-            indicates whether the corresponding draft token has been accepted
-            or rejected. True indicates acceptance and false indicates
-            rejection.
-            
+            torch.Tensor: A boolean tensor of shape (batch_size, k) where each
+                element indicates whether the corresponding draft token has
+                been accepted or rejected. True indicates acceptance and false
+                indicates rejection.
         """
         device = target_probs.device
         candidates_prob = torch.gather(
@@ -156,17 +151,14 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):
         The recovered token ids will fill the first unmatched token
         by the target token.
 
-        Parameters
-        ----------
-        target_probs : torch.Tensor
-            A tensor of shape (batch_size, k, vocab_size) containing 
-            the target probability distribution
+        Args:
+            target_probs (torch.Tensor): A tensor of shape
+                (batch_size, k, vocab_size) containing the target probability
+                distribution.
 
-        Returns
-        -------
-        torch.Tensor
-            A tensor of shape (batch_size, k) with the recovered token
-            ids which are selected from target probs.
+        Returns:
+            torch.Tensor: A tensor of shape (batch_size, k) with the recovered
+                token ids which are selected from target probs.
         """
         max_indices = torch.argmax(target_probs, dim=-1)