From add4b0ca448e0b053a76b7db215aee0e797786d3 Mon Sep 17 00:00:00 2001 From: Vensen Date: Sun, 14 Dec 2025 22:57:15 +0800 Subject: [PATCH] [Bugfix][benchmarks] Fix input token calculation for rerank benchmark metrics (#30596) Signed-off-by: vensen --- vllm/benchmarks/serve.py | 4 +++- vllm/entrypoints/pooling/score/protocol.py | 1 + vllm/entrypoints/pooling/score/serving.py | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 254e4d35e5350..f5d8ea5a975a9 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -235,7 +235,9 @@ async def get_request( def calculate_metrics_for_embeddings( - outputs: list[RequestFuncOutput], dur_s: float, selected_percentiles: list[float] + outputs: list[RequestFuncOutput], + dur_s: float, + selected_percentiles: list[float], ) -> EmbedBenchmarkMetrics: """Calculate the metrics for the embedding requests. diff --git a/vllm/entrypoints/pooling/score/protocol.py b/vllm/entrypoints/pooling/score/protocol.py index a22219707c357..e81bda2eec3d7 100644 --- a/vllm/entrypoints/pooling/score/protocol.py +++ b/vllm/entrypoints/pooling/score/protocol.py @@ -120,6 +120,7 @@ class RerankResult(BaseModel): class RerankUsage(BaseModel): + prompt_tokens: int total_tokens: int diff --git a/vllm/entrypoints/pooling/score/serving.py b/vllm/entrypoints/pooling/score/serving.py index f574d8bcebb40..edbfcd03ac92c 100644 --- a/vllm/entrypoints/pooling/score/serving.py +++ b/vllm/entrypoints/pooling/score/serving.py @@ -502,5 +502,7 @@ class ServingScores(OpenAIServing): id=request_id, model=model_name, results=results, - usage=RerankUsage(total_tokens=num_prompt_tokens), + usage=RerankUsage( + total_tokens=num_prompt_tokens, prompt_tokens=num_prompt_tokens + ), )