From 8ea0c2753a273e24957ab4587c200a3254ebe970 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 17 Aug 2025 18:16:03 -0700 Subject: [PATCH] [Misc] Minor code cleanup for _get_prompt_logprobs_dict (#23064) Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu_model_runner.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index adaa1306f6ca4..fc320be1c3bda 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1722,7 +1722,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): # Compute prompt logprobs if needed. prompt_logprobs_dict = self._get_prompt_logprobs_dict( hidden_states[:num_scheduled_tokens], - scheduler_output, + scheduler_output.num_scheduled_tokens, ) # Get the valid generated tokens. @@ -2064,7 +2064,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): def _get_prompt_logprobs_dict( self, hidden_states: torch.Tensor, - scheduler_output: "SchedulerOutput", + num_scheduled_tokens: dict[str, int], ) -> dict[str, Optional[LogprobsTensors]]: num_prompt_logprobs_dict = self.input_batch.num_prompt_logprobs if not num_prompt_logprobs_dict: @@ -2077,8 +2077,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): # maintainable loop over optimal performance. completed_prefill_reqs = [] for req_id, num_prompt_logprobs in num_prompt_logprobs_dict.items(): - - num_tokens = scheduler_output.num_scheduled_tokens[req_id] + num_tokens = num_scheduled_tokens[req_id] # Get metadata for this request. request = self.requests[req_id]