From d4c291976085e4bdfabe22ed7c69534263c4cc7b Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 23 May 2025 15:18:31 +0200 Subject: [PATCH] Include private attributes in API documentation (#18614) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- mkdocs.yaml | 1 + .../layers/rejection_sampler.py | 35 ++++++------ .../layers/typical_acceptance_sampler.py | 54 ++++++++----------- 3 files changed, 42 insertions(+), 48 deletions(-) diff --git a/mkdocs.yaml b/mkdocs.yaml index a1c6319bb0080..b6fabbeed15a5 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -66,6 +66,7 @@ plugins: options: show_symbol_type_heading: true show_symbol_type_toc: true + filters: [] summary: modules: true show_if_no_docstring: true diff --git a/vllm/model_executor/layers/rejection_sampler.py b/vllm/model_executor/layers/rejection_sampler.py index af82b9dc93b70..3db73495827c6 100644 --- a/vllm/model_executor/layers/rejection_sampler.py +++ b/vllm/model_executor/layers/rejection_sampler.py @@ -262,16 +262,16 @@ class RejectionSampler(SpecDecodeStochasticBaseSampler): True, then a token can be accepted, else it should be rejected. - Given {math}`q(\hat{x}_{n+1}|x_1, \dots, x_n)`, the probability of - {math}`\hat{x}_{n+1}` given context {math}`x_1, \dots, x_n` according - to the target model, and {math}`p(\hat{x}_{n+1}|x_1, \dots, x_n)`, the + Given $q(\hat{x}_{n+1}|x_1, \dots, x_n)$, the probability of + $\hat{x}_{n+1}$ given context $x_1, \dots, x_n$ according + to the target model, and $p(\hat{x}_{n+1}|x_1, \dots, x_n)$, the same conditional probability according to the draft model, the token is accepted with probability: - :::{math} + $$ \min\left(1, \frac{q(\hat{x}_{n+1}|x_1, \dots, x_n)} {p(\hat{x}_{n+1}|x_1, \dots, x_n)}\right) - ::: + $$ This implementation does not apply causality. When using the output, if a token is rejected, subsequent tokens should not be used. @@ -314,30 +314,31 @@ class RejectionSampler(SpecDecodeStochasticBaseSampler): target model is recovered (within hardware numerics). The probability distribution used in this rejection case is constructed - as follows. Given {math}`q(x|x_1, \dots, x_n)`, the probability of - {math}`x` given context {math}`x_1, \dots, x_n` according to the target - model and {math}`p(x|x_1, \dots, x_n)`, the same conditional probability + as follows. Given $q(x|x_1, \dots, x_n)$, the probability of + $x$ given context $x_1, \dots, x_n$ according to the target + model and $p(x|x_1, \dots, x_n)$, the same conditional probability according to the draft model: - :::{math} + $$ x_{n+1} \sim (q(x|x_1, \dots, x_n) - p(x|x_1, \dots, x_n))_+ - ::: + $$ - where {math}`(f(x))_+` is defined as: + where $(f(x))_+$ is defined as: - :::{math} + $$ (f(x))_+ = \frac{\max(0, f(x))}{\sum_x \max(0, f(x))} - ::: + $$ See https://github.com/vllm-project/vllm/pull/2336 for a visualization of the draft, target, and recovered probability distributions. Returns a tensor of shape [batch_size, k, vocab_size]. - Note: This batches operations on GPU and thus constructs the recovered - distribution for all tokens, even if they are accepted. This causes - division-by-zero errors, so we use self._smallest_positive_value to - avoid that. This introduces some drift to the distribution. + Note: + This batches operations on GPU and thus constructs the recovered + distribution for all tokens, even if they are accepted. This causes + division-by-zero errors, so we use self._smallest_positive_value to + avoid that. This introduces some drift to the distribution. """ _, k, _ = draft_probs.shape diff --git a/vllm/model_executor/layers/typical_acceptance_sampler.py b/vllm/model_executor/layers/typical_acceptance_sampler.py index 527a301cd8e26..a14c86148e730 100644 --- a/vllm/model_executor/layers/typical_acceptance_sampler.py +++ b/vllm/model_executor/layers/typical_acceptance_sampler.py @@ -93,29 +93,27 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler): Evaluates and returns a mask of accepted tokens based on the posterior probabilities. - Parameters: - ---------- - target_probs : torch.Tensor - A tensor of shape (batch_size, k, vocab_size) representing - the probabilities of each token in the vocabulary for each - position in the proposed sequence. This is the distribution - generated by the target model. - draft_token_ids : torch.Tensor - A tensor of shape (batch_size, k) representing the proposed - token ids. + Args: + target_probs (torch.Tensor): A tensor of shape + (batch_size, k, vocab_size) representing the probabilities of + each token in the vocabulary for each position in the proposed + sequence. This is the distribution generated by the target + model. + draft_token_ids (torch.Tensor): A tensor of shape (batch_size, k) + representing the proposed token ids. A draft token_id x_{n+k} is accepted if it satisfies the following condition - :::{math} + $$ p_{\text{original}}(x_{n+k} | x_1, x_2, \dots, x_{n+k-1}) > \min \left( \epsilon, \delta * \exp \left( -H(p_{\text{original}}( \cdot | x_1, x_2, \ldots, x_{n+k-1})) \right) \right) - ::: + $$ - where {math}`p_{\text{original}}` corresponds to target_probs - and {math}`\epsilon` and {math}`\delta` correspond to hyperparameters + where $p_{\text{original}}$ corresponds to target_probs + and $\epsilon$ and $\delta$ correspond to hyperparameters specified using self._posterior_threshold and self._posterior_alpha This method computes the posterior probabilities for the given @@ -126,13 +124,10 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler): returns a boolean mask indicating which tokens can be accepted. Returns: - ------- - torch.Tensor - A boolean tensor of shape (batch_size, k) where each element - indicates whether the corresponding draft token has been accepted - or rejected. True indicates acceptance and false indicates - rejection. - + torch.Tensor: A boolean tensor of shape (batch_size, k) where each + element indicates whether the corresponding draft token has + been accepted or rejected. True indicates acceptance and false + indicates rejection. """ device = target_probs.device candidates_prob = torch.gather( @@ -156,17 +151,14 @@ class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler): The recovered token ids will fill the first unmatched token by the target token. - Parameters - ---------- - target_probs : torch.Tensor - A tensor of shape (batch_size, k, vocab_size) containing - the target probability distribution + Args: + target_probs (torch.Tensor): A tensor of shape + (batch_size, k, vocab_size) containing the target probability + distribution. - Returns - ------- - torch.Tensor - A tensor of shape (batch_size, k) with the recovered token - ids which are selected from target probs. + Returns: + torch.Tensor: A tensor of shape (batch_size, k) with the recovered + token ids which are selected from target probs. """ max_indices = torch.argmax(target_probs, dim=-1)