From 7e3571134fa8385f4795c7a1c2a40f3b2859a22c Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 9 May 2025 21:32:36 +0100 Subject: [PATCH] [V1][Spec Decoding] Include bonus tokens in mean acceptance length (#17908) Signed-off-by: Mark McLoughlin --- examples/offline_inference/eagle.py | 4 ++-- vllm/v1/spec_decode/metrics.py | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/offline_inference/eagle.py b/examples/offline_inference/eagle.py index 91e2f68ecffb..020521611f33 100644 --- a/examples/offline_inference/eagle.py +++ b/examples/offline_inference/eagle.py @@ -118,8 +118,8 @@ def main(): acceptance_counts[step] += count print("-" * 50) - print(f"mean acceptance length: \ - {sum(acceptance_counts) / acceptance_counts[0]:.2f}") + print(f"mean acceptance length (including bonus tokens): \ + {1 + (sum(acceptance_counts) / acceptance_counts[0]):.2f}") print("-" * 50) # print acceptance at each token position diff --git a/vllm/v1/spec_decode/metrics.py b/vllm/v1/spec_decode/metrics.py index 33ce98284e20..eb550a6c4697 100644 --- a/vllm/v1/spec_decode/metrics.py +++ b/vllm/v1/spec_decode/metrics.py @@ -73,7 +73,9 @@ class SpecDecodingLogging: draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens * 100 if num_draft_tokens > 0 else float("nan")) - mean_acceptance_length = (num_accepted_tokens / num_drafts) + + # Conventionally, mean acceptance length includes the bonus token + mean_acceptance_length = 1 + (num_accepted_tokens / num_drafts) pos_matrix = np.array(self.accepted_tokens_per_pos_lists) acceptance_rates = np.sum(pos_matrix, axis=0) / num_drafts @@ -103,10 +105,12 @@ class SpecDecodingProm: rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) / rate(vllm:spec_decode_num_draft_tokens_total[$interval]) - The mean acceptance length can be calculated using: + The mean acceptance length (conventionally including bonus tokens) + can be calculated using: + 1 + ( rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) / - rate(vllm:spec_decode_num_drafts[$interval]) + rate(vllm:spec_decode_num_drafts[$interval])) A per-position acceptance rate vector can be computed using