From 0d4f48fa103e8407ae94eb87fd87d2380fbb1eb7 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 8 Oct 2025 13:52:19 +0800
Subject: [PATCH] [Bugfix] Incorrect MM data format in `vllm bench throughput`
 (#26395)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/benchmarks/throughput.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py
index 181a3e196586..04bc29b07aac 100644
--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@@ -201,16 +201,16 @@ async def run_vllm_async(
         sampling_params: list[SamplingParams] = []
         lora_requests: list[Optional[LoRARequest]] = []
         for request in requests:
-            prompts.append(
-                TokensPrompt(
-                    prompt_token_ids=request.prompt["prompt_token_ids"],
-                    multi_modal_data=request.multi_modal_data,
-                )
+            prompt = (
+                TokensPrompt(prompt_token_ids=request.prompt["prompt_token_ids"])
                 if "prompt_token_ids" in request.prompt
-                else TextPrompt(
-                    prompt=request.prompt, multi_modal_data=request.multi_modal_data
-                )
+                else TextPrompt(prompt=request.prompt)
             )
+
+            if request.multi_modal_data:
+                assert isinstance(request.multi_modal_data, dict)
+                prompt["multi_modal_data"] = request.multi_modal_data
+
             sampling_params.append(
                 SamplingParams(
                     n=n,