From 39a22dcaac707ebc6c79bfbfc12d6375a2094f38 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 1 Sep 2025 08:54:01 -0700 Subject: [PATCH] [Misc] Minor code simplification for spec decode (#24053) Signed-off-by: Woosuk Kwon --- vllm/v1/core/sched/scheduler.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index d4391b1c2137a..e07d53ff84d37 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -873,19 +873,19 @@ class Scheduler(SchedulerInterface): scheduled_spec_token_ids = ( scheduler_output.scheduled_spec_decode_tokens.get(req_id)) if scheduled_spec_token_ids: + num_draft_tokens = len(scheduled_spec_token_ids) + num_accepted = len(generated_token_ids) - 1 + num_rejected = num_draft_tokens - num_accepted # num_computed_tokens represents the number of tokens # processed in the current step, considering scheduled # tokens and rejections. If some tokens are rejected, # num_computed_tokens is decreased by the number of rejected - # tokens, where is given by: - # len(scheduled_spec_token_ids) + 1 - len(generated_token_ids). - num_tokens_rejected = (len(scheduled_spec_token_ids) + 1 - - len(generated_token_ids)) - request.num_computed_tokens -= num_tokens_rejected + # tokens. + request.num_computed_tokens -= num_rejected spec_decoding_stats = self.make_spec_decoding_stats( spec_decoding_stats, - num_draft_tokens=len(scheduled_spec_token_ids), - num_accepted_tokens=len(generated_token_ids) - 1) + num_draft_tokens=num_draft_tokens, + num_accepted_tokens=num_accepted) stopped = False new_logprobs = None