mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 11:35:41 +08:00
[V1][Spec Decoding] Include bonus tokens in mean acceptance length (#17908)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
parent
ea2236bf95
commit
7e3571134f
@ -118,8 +118,8 @@ def main():
|
|||||||
acceptance_counts[step] += count
|
acceptance_counts[step] += count
|
||||||
|
|
||||||
print("-" * 50)
|
print("-" * 50)
|
||||||
print(f"mean acceptance length: \
|
print(f"mean acceptance length (including bonus tokens): \
|
||||||
{sum(acceptance_counts) / acceptance_counts[0]:.2f}")
|
{1 + (sum(acceptance_counts) / acceptance_counts[0]):.2f}")
|
||||||
print("-" * 50)
|
print("-" * 50)
|
||||||
|
|
||||||
# print acceptance at each token position
|
# print acceptance at each token position
|
||||||
|
|||||||
@ -73,7 +73,9 @@ class SpecDecodingLogging:
|
|||||||
|
|
||||||
draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
|
draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
|
||||||
100 if num_draft_tokens > 0 else float("nan"))
|
100 if num_draft_tokens > 0 else float("nan"))
|
||||||
mean_acceptance_length = (num_accepted_tokens / num_drafts)
|
|
||||||
|
# Conventionally, mean acceptance length includes the bonus token
|
||||||
|
mean_acceptance_length = 1 + (num_accepted_tokens / num_drafts)
|
||||||
|
|
||||||
pos_matrix = np.array(self.accepted_tokens_per_pos_lists)
|
pos_matrix = np.array(self.accepted_tokens_per_pos_lists)
|
||||||
acceptance_rates = np.sum(pos_matrix, axis=0) / num_drafts
|
acceptance_rates = np.sum(pos_matrix, axis=0) / num_drafts
|
||||||
@ -103,10 +105,12 @@ class SpecDecodingProm:
|
|||||||
rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
|
rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
|
||||||
rate(vllm:spec_decode_num_draft_tokens_total[$interval])
|
rate(vllm:spec_decode_num_draft_tokens_total[$interval])
|
||||||
|
|
||||||
The mean acceptance length can be calculated using:
|
The mean acceptance length (conventionally including bonus tokens)
|
||||||
|
can be calculated using:
|
||||||
|
|
||||||
|
1 + (
|
||||||
rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
|
rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
|
||||||
rate(vllm:spec_decode_num_drafts[$interval])
|
rate(vllm:spec_decode_num_drafts[$interval]))
|
||||||
|
|
||||||
A per-position acceptance rate vector can be computed using
|
A per-position acceptance rate vector can be computed using
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user