[BugFix][Spec Decode] No in-place update to draft probs (#16952)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-12-11 15:15:32 +08:00 · 2025-04-21 19:54:19 -07:00 · 2025-04-21 19:54:19 -07:00 · 1311913f55
commit 1311913f55
parent 29f395c97c
1 changed files with 3 additions and 1 deletions
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@ -264,7 +264,9 @@ def compute_probs_and_sample_next_token(
    # TODO(woosuk): Consider seeds.
    q = torch.empty_like(probs)
    q.exponential_()
-    next_token_ids = probs.div_(q).argmax(dim=-1).view(-1)
+    # NOTE(woosuk): We shouldn't use `probs.div_(q)` because the draft_probs
    # will be used later for rejection sampling.
    next_token_ids = probs.div(q).argmax(dim=-1).view(-1)
    if not sampling_metadata.all_random:
        greedy_token_ids = probs.argmax(dim=-1)
        next_token_ids = torch.where(