From 829b9a62d0a89872883397ae4b5184048836589f Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Thu, 14 Aug 2025 08:28:09 -0400 Subject: [PATCH] [Perf] Dont create unnecessary pooling params (#22876) Signed-off-by: Lucas Wilkinson --- vllm/v1/worker/gpu_model_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index a03e860a91c71..8fb9641844fb5 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -341,13 +341,13 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): model_kwargs = dict[str, Any]() num_reqs = self.input_batch.num_reqs - pooling_params = self.input_batch.pooling_metadata.pooling_params - - num_pooling_reqs = len(pooling_params) + num_pooling_reqs = len(self.input_batch.pooling_params) if num_pooling_reqs == 0: return model_kwargs + pooling_params = self.input_batch.pooling_metadata.pooling_params + assert num_pooling_reqs == num_reqs token_type_id_requests = dict[int, Any]()