mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 15:15:32 +08:00
[BugFix][Spec Decode] No in-place update to draft probs (#16952)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
29f395c97c
commit
1311913f55
@ -264,7 +264,9 @@ def compute_probs_and_sample_next_token(
|
|||||||
# TODO(woosuk): Consider seeds.
|
# TODO(woosuk): Consider seeds.
|
||||||
q = torch.empty_like(probs)
|
q = torch.empty_like(probs)
|
||||||
q.exponential_()
|
q.exponential_()
|
||||||
next_token_ids = probs.div_(q).argmax(dim=-1).view(-1)
|
# NOTE(woosuk): We shouldn't use `probs.div_(q)` because the draft_probs
|
||||||
|
# will be used later for rejection sampling.
|
||||||
|
next_token_ids = probs.div(q).argmax(dim=-1).view(-1)
|
||||||
if not sampling_metadata.all_random:
|
if not sampling_metadata.all_random:
|
||||||
greedy_token_ids = probs.argmax(dim=-1)
|
greedy_token_ids = probs.argmax(dim=-1)
|
||||||
next_token_ids = torch.where(
|
next_token_ids = torch.where(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user