diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index a03e860a91c71..8fb9641844fb5 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -341,13 +341,13 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): model_kwargs = dict[str, Any]() num_reqs = self.input_batch.num_reqs - pooling_params = self.input_batch.pooling_metadata.pooling_params - - num_pooling_reqs = len(pooling_params) + num_pooling_reqs = len(self.input_batch.pooling_params) if num_pooling_reqs == 0: return model_kwargs + pooling_params = self.input_batch.pooling_metadata.pooling_params + assert num_pooling_reqs == num_reqs token_type_id_requests = dict[int, Any]()