fix

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2026-07-08 15:07:12 +08:00 · 2025-09-18 16:32:00 -07:00 · 2025-09-18 16:32:00 -07:00 · 86dade710d
commit 86dade710d
parent efda08481b
1 changed files with 3 additions and 4 deletions
--- a/vllm/v1/worker/gpu/sampler.py
+++ b/vllm/v1/worker/gpu/sampler.py
@ -246,16 +246,15 @@ def compute_logprobs(
        logprob_token_ids = torch.cat(
            (sampled_token_ids.unsqueeze(-1), topk_indices), dim=1)
    # NOTE(woosuk): Here, to save GPU memory, we do not materialize the full
    # logprobs tensor. Instead, we only compute and return the logprobs of
    # the topk + 1 tokens.
    logprobs = torch.empty(
        batch_size,
        num_logprobs + 1,
        dtype=torch.float32,
        device=logits.device,
    )
    # NOTE(woosuk): Here, to save GPU memory, we do not materialize the full
    # logprobs tensor. Instead, we only compute and return the logprobs of
    # the topk + 1 tokens.
    BLOCK_SIZE = 1024
    _topk_logprobs_kernel[(batch_size, )](
        logprobs,