From 86dade710d9a5f6dbe3d72d9b45f90aa493f4258 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Thu, 18 Sep 2025 16:32:00 -0700
Subject: [PATCH] fix

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 vllm/v1/worker/gpu/sampler.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/worker/gpu/sampler.py b/vllm/v1/worker/gpu/sampler.py
index d9f74b6f09024..eaf43d4335843 100644
--- a/vllm/v1/worker/gpu/sampler.py
+++ b/vllm/v1/worker/gpu/sampler.py
@@ -246,16 +246,15 @@ def compute_logprobs(
         logprob_token_ids = torch.cat(
             (sampled_token_ids.unsqueeze(-1), topk_indices), dim=1)
 
+    # NOTE(woosuk): Here, to save GPU memory, we do not materialize the full
+    # logprobs tensor. Instead, we only compute and return the logprobs of
+    # the topk + 1 tokens.
     logprobs = torch.empty(
         batch_size,
         num_logprobs + 1,
         dtype=torch.float32,
         device=logits.device,
     )
-
-    # NOTE(woosuk): Here, to save GPU memory, we do not materialize the full
-    # logprobs tensor. Instead, we only compute and return the logprobs of
-    # the topk + 1 tokens.
     BLOCK_SIZE = 1024
     _topk_logprobs_kernel[(batch_size, )](
         logprobs,