Fix(benchmarks): allow multiple mm contents in OpenAI Chat Completion Benchmarks (#22534)

Signed-off-by: breno.skuk <breno.skuk@hcompany.ai>
This commit is contained in:
Breno Baldas Skuk 2025-08-10 18:03:15 +02:00 committed by GitHub
parent b76753f0b5
commit 65a7917be4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 49 additions and 10 deletions

View File

@ -31,7 +31,7 @@ class RequestFuncInput:
model_name: Optional[str] = None model_name: Optional[str] = None
logprobs: Optional[int] = None logprobs: Optional[int] = None
extra_body: Optional[dict] = None extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None multi_modal_content: Optional[dict | list[dict]] = None
ignore_eos: bool = False ignore_eos: bool = False
language: Optional[str] = None language: Optional[str] = None
@ -364,7 +364,15 @@ async def async_request_openai_chat_completions(
) as session: ) as session:
content = [{"type": "text", "text": request_func_input.prompt}] content = [{"type": "text", "text": request_func_input.prompt}]
if request_func_input.multi_modal_content: if request_func_input.multi_modal_content:
content.append(request_func_input.multi_modal_content) mm_content = request_func_input.multi_modal_content
if isinstance(mm_content, list):
content.extend(mm_content)
elif isinstance(mm_content, dict):
content.append(mm_content)
else:
raise TypeError(
"multi_modal_content must be a dict or list[dict] for openai-chat"
)
payload = { payload = {
"model": request_func_input.model_name "model": request_func_input.model_name
if request_func_input.model_name if request_func_input.model_name
@ -491,7 +499,10 @@ async def async_request_openai_audio(
buffer.seek(0) buffer.seek(0)
return buffer return buffer
with to_bytes(*request_func_input.multi_modal_content["audio"]) as f: mm_audio = request_func_input.multi_modal_content
if not isinstance(mm_audio, dict) or "audio" not in mm_audio:
raise TypeError("multi_modal_content must be a dict containing 'audio'")
with to_bytes(*mm_audio["audio"]) as f:
form = aiohttp.FormData() form = aiohttp.FormData()
form.add_field("file", f, content_type="audio/wav") form.add_field("file", f, content_type="audio/wav")
for key, value in payload.items(): for key, value in payload.items():

View File

@ -52,7 +52,7 @@ class SampleRequest:
prompt: Union[str, Any] prompt: Union[str, Any]
prompt_len: int prompt_len: int
expected_output_len: int expected_output_len: int
multi_modal_data: Optional[Union[MultiModalDataDict, dict]] = None multi_modal_data: Optional[Union[MultiModalDataDict, dict, list[dict]]] = None
lora_request: Optional[LoRARequest] = None lora_request: Optional[LoRARequest] = None

View File

@ -263,7 +263,14 @@ async def benchmark(
input_requests[0].multi_modal_data, input_requests[0].multi_modal_data,
) )
assert test_mm_content is None or isinstance(test_mm_content, dict) assert (
test_mm_content is None
or isinstance(test_mm_content, dict)
or (
isinstance(test_mm_content, list)
and all(isinstance(item, dict) for item in test_mm_content)
)
), "multi_modal_data must be a dict or list[dict]"
test_input = RequestFuncInput( test_input = RequestFuncInput(
model=model_id, model=model_id,
model_name=model_name, model_name=model_name,

View File

@ -71,7 +71,9 @@ class SampleRequest:
prompt: Union[str, Any] prompt: Union[str, Any]
prompt_len: int prompt_len: int
expected_output_len: int expected_output_len: int
multi_modal_data: Optional[Union[MultiModalDataDict, dict]] = None multi_modal_data: Optional[
Union[MultiModalDataDict, dict, list[dict]]
] = None
lora_request: Optional[LoRARequest] = None lora_request: Optional[LoRARequest] = None

View File

@ -28,7 +28,7 @@ class RequestFuncInput:
model_name: Optional[str] = None model_name: Optional[str] = None
logprobs: Optional[int] = None logprobs: Optional[int] = None
extra_body: Optional[dict] = None extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None multi_modal_content: Optional[dict | list[dict]] = None
ignore_eos: bool = False ignore_eos: bool = False
language: Optional[str] = None language: Optional[str] = None
@ -172,7 +172,16 @@ async def async_request_openai_chat_completions(
content = [{"type": "text", "text": request_func_input.prompt}] content = [{"type": "text", "text": request_func_input.prompt}]
if request_func_input.multi_modal_content: if request_func_input.multi_modal_content:
content.append(request_func_input.multi_modal_content) mm_content = request_func_input.multi_modal_content
if isinstance(mm_content, list):
content.extend(mm_content)
elif isinstance(mm_content, dict):
content.append(mm_content)
else:
raise TypeError(
"multi_modal_content must be a dict or list[dict] "
"for openai-chat"
)
payload = { payload = {
"model": "model":
request_func_input.model_name request_func_input.model_name
@ -310,7 +319,10 @@ async def async_request_openai_audio(
buffer.seek(0) buffer.seek(0)
return buffer return buffer
with to_bytes(*request_func_input.multi_modal_content["audio"]) as f: mm_audio = request_func_input.multi_modal_content
if not isinstance(mm_audio, dict) or "audio" not in mm_audio:
raise TypeError("multi_modal_content must be a dict containing 'audio'")
with to_bytes(*mm_audio["audio"]) as f:
form = aiohttp.FormData() form = aiohttp.FormData()
form.add_field("file", f, content_type="audio/wav") form.add_field("file", f, content_type="audio/wav")
for key, value in payload.items(): for key, value in payload.items():

View File

@ -365,7 +365,14 @@ async def benchmark(
input_requests[0].multi_modal_data, input_requests[0].multi_modal_data,
) )
assert test_mm_content is None or isinstance(test_mm_content, dict) assert (
test_mm_content is None
or isinstance(test_mm_content, dict)
or (
isinstance(test_mm_content, list)
and all(isinstance(item, dict) for item in test_mm_content)
)
), "multi_modal_data must be a dict or list[dict]"
test_input = RequestFuncInput( test_input = RequestFuncInput(
model=model_id, model=model_id,
model_name=model_name, model_name=model_name,