diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index a97fa280f37c..53bd3247afbb 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -396,20 +396,6 @@ async def benchmark( tasks.append(asyncio.create_task(task)) outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks) - if profile: - print("Stopping profiler...") - profile_input = RequestFuncInput( - model=model_id, - prompt=test_prompt, - api_url=base_url + "/stop_profile", - prompt_len=test_prompt_len, - output_len=test_output_len, - logprobs=logprobs, - ) - profile_output = await request_func(request_func_input=profile_input) - if profile_output.success: - print("Profiler stopped") - if pbar is not None: pbar.close() @@ -518,6 +504,20 @@ async def benchmark( print("=" * 50) + if profile: + print("Stopping profiler...") + profile_input = RequestFuncInput( + model=model_id, + prompt=test_prompt, + api_url=base_url + "/stop_profile", + prompt_len=test_prompt_len, + output_len=test_output_len, + logprobs=logprobs, + ) + profile_output = await request_func(request_func_input=profile_input) + if profile_output.success: + print("Profiler stopped") + return result diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index e23a5a9e2233..d535cd5d7e1a 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -538,20 +538,6 @@ async def benchmark( ) outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks) - if profile: - print("Stopping profiler...") - profile_input = RequestFuncInput( - model=model_id, - prompt=test_request.prompt, - api_url=base_url + "/stop_profile", - prompt_len=test_request.prompt_len, - output_len=test_request.expected_output_len, - extra_body={test_request.structure_type: test_request.schema}, - ) - profile_output = await request_func(request_func_input=profile_input) - if profile_output.success: - print("Profiler stopped") - if pbar is not None: pbar.close() @@ -666,6 +652,20 @@ async def benchmark( print("=" * 50) + if profile: + print("Stopping profiler...") + profile_input = RequestFuncInput( + model=model_id, + prompt=test_request.prompt, + api_url=base_url + "/stop_profile", + prompt_len=test_request.prompt_len, + output_len=test_request.expected_output_len, + extra_body={test_request.structure_type: test_request.schema}, + ) + profile_output = await request_func(request_func_input=profile_input) + if profile_output.success: + print("Profiler stopped") + return result, ret diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index f4506c9ce6f4..635363440c08 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -470,20 +470,6 @@ async def benchmark( pbar=pbar))) outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks) - if profile: - print("Stopping profiler...") - profile_input = RequestFuncInput( - model=model_id, - prompt=test_prompt, - api_url=base_url + "/stop_profile", - prompt_len=test_prompt_len, - output_len=test_output_len, - logprobs=logprobs, - ) - profile_output = await request_func(request_func_input=profile_input) - if profile_output.success: - print("Profiler stopped") - if pbar is not None: pbar.close() @@ -576,6 +562,19 @@ async def benchmark( print("=" * 50) + if profile: + print("Stopping profiler...") + profile_input = RequestFuncInput( + model=model_id, + prompt=test_prompt, + api_url=base_url + "/stop_profile", + prompt_len=test_prompt_len, + output_len=test_output_len, + logprobs=logprobs, + ) + profile_output = await request_func(request_func_input=profile_input) + if profile_output.success: + print("Profiler stopped") return result