From 7e3571134fa8385f4795c7a1c2a40f3b2859a22c Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Fri, 9 May 2025 21:32:36 +0100
Subject: [PATCH] [V1][Spec Decoding] Include bonus tokens in mean acceptance
 length (#17908)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 examples/offline_inference/eagle.py |  4 ++--
 vllm/v1/spec_decode/metrics.py      | 10 +++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/examples/offline_inference/eagle.py b/examples/offline_inference/eagle.py
index 91e2f68ecffb..020521611f33 100644
--- a/examples/offline_inference/eagle.py
+++ b/examples/offline_inference/eagle.py
@@ -118,8 +118,8 @@ def main():
             acceptance_counts[step] += count
 
     print("-" * 50)
-    print(f"mean acceptance length: \
-        {sum(acceptance_counts) / acceptance_counts[0]:.2f}")
+    print(f"mean acceptance length (including bonus tokens): \
+        {1 + (sum(acceptance_counts) / acceptance_counts[0]):.2f}")
     print("-" * 50)
 
     # print acceptance at each token position
diff --git a/vllm/v1/spec_decode/metrics.py b/vllm/v1/spec_decode/metrics.py
index 33ce98284e20..eb550a6c4697 100644
--- a/vllm/v1/spec_decode/metrics.py
+++ b/vllm/v1/spec_decode/metrics.py
@@ -73,7 +73,9 @@ class SpecDecodingLogging:
 
         draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
                                  100 if num_draft_tokens > 0 else float("nan"))
-        mean_acceptance_length = (num_accepted_tokens / num_drafts)
+
+        # Conventionally, mean acceptance length includes the bonus token
+        mean_acceptance_length = 1 + (num_accepted_tokens / num_drafts)
 
         pos_matrix = np.array(self.accepted_tokens_per_pos_lists)
         acceptance_rates = np.sum(pos_matrix, axis=0) / num_drafts
@@ -103,10 +105,12 @@ class SpecDecodingProm:
       rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
       rate(vllm:spec_decode_num_draft_tokens_total[$interval])
 
-    The mean acceptance length can be calculated using:
+    The mean acceptance length (conventionally including bonus tokens)
+    can be calculated using:
 
+      1 + (
       rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
-      rate(vllm:spec_decode_num_drafts[$interval])
+      rate(vllm:spec_decode_num_drafts[$interval]))
 
     A per-position acceptance rate vector can be computed using