mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 11:26:09 +08:00
Fix some issues with benchmark data output (#13641)
Signed-off-by: Huy Do <huydhn@gmail.com>
This commit is contained in:
parent
cbae7af552
commit
e7ef74e26e
@ -84,8 +84,13 @@ if __name__ == "__main__":
|
|||||||
# this result is generated via `benchmark_serving.py`
|
# this result is generated via `benchmark_serving.py`
|
||||||
|
|
||||||
# attach the benchmarking command to raw_result
|
# attach the benchmarking command to raw_result
|
||||||
|
try:
|
||||||
with open(test_file.with_suffix(".commands")) as f:
|
with open(test_file.with_suffix(".commands")) as f:
|
||||||
command = json.loads(f.read())
|
command = json.loads(f.read())
|
||||||
|
except OSError as e:
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
|
||||||
raw_result.update(command)
|
raw_result.update(command)
|
||||||
|
|
||||||
# update the test name of this result
|
# update the test name of this result
|
||||||
@ -99,8 +104,13 @@ if __name__ == "__main__":
|
|||||||
# this result is generated via `benchmark_latency.py`
|
# this result is generated via `benchmark_latency.py`
|
||||||
|
|
||||||
# attach the benchmarking command to raw_result
|
# attach the benchmarking command to raw_result
|
||||||
|
try:
|
||||||
with open(test_file.with_suffix(".commands")) as f:
|
with open(test_file.with_suffix(".commands")) as f:
|
||||||
command = json.loads(f.read())
|
command = json.loads(f.read())
|
||||||
|
except OSError as e:
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
|
||||||
raw_result.update(command)
|
raw_result.update(command)
|
||||||
|
|
||||||
# update the test name of this result
|
# update the test name of this result
|
||||||
@ -121,8 +131,13 @@ if __name__ == "__main__":
|
|||||||
# this result is generated via `benchmark_throughput.py`
|
# this result is generated via `benchmark_throughput.py`
|
||||||
|
|
||||||
# attach the benchmarking command to raw_result
|
# attach the benchmarking command to raw_result
|
||||||
|
try:
|
||||||
with open(test_file.with_suffix(".commands")) as f:
|
with open(test_file.with_suffix(".commands")) as f:
|
||||||
command = json.loads(f.read())
|
command = json.loads(f.read())
|
||||||
|
except OSError as e:
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
|
||||||
raw_result.update(command)
|
raw_result.update(command)
|
||||||
|
|
||||||
# update the test name of this result
|
# update the test name of this result
|
||||||
|
|||||||
@ -309,11 +309,14 @@ run_serving_tests() {
|
|||||||
|
|
||||||
new_test_name=$test_name"_qps_"$qps
|
new_test_name=$test_name"_qps_"$qps
|
||||||
|
|
||||||
|
# pass the tensor parallel size to the client so that it can be displayed
|
||||||
|
# on the benchmark dashboard
|
||||||
client_command="python3 benchmark_serving.py \
|
client_command="python3 benchmark_serving.py \
|
||||||
--save-result \
|
--save-result \
|
||||||
--result-dir $RESULTS_FOLDER \
|
--result-dir $RESULTS_FOLDER \
|
||||||
--result-filename ${new_test_name}.json \
|
--result-filename ${new_test_name}.json \
|
||||||
--request-rate $qps \
|
--request-rate $qps \
|
||||||
|
--metadata "tensor_parallel_size=$tp" \
|
||||||
$client_args"
|
$client_args"
|
||||||
|
|
||||||
echo "Running test case $test_name with qps $qps"
|
echo "Running test case $test_name with qps $qps"
|
||||||
|
|||||||
@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from benchmark_utils import convert_to_pytorch_benchmark_format
|
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
@ -30,8 +30,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
|
|||||||
for k in ["avg_latency", "percentiles"]})
|
for k in ["avg_latency", "percentiles"]})
|
||||||
if pt_records:
|
if pt_records:
|
||||||
pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
|
pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
|
||||||
with open(pt_file, "w") as f:
|
write_to_json(pt_file, pt_records)
|
||||||
json.dump(pt_records, f)
|
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace):
|
def main(args: argparse.Namespace):
|
||||||
|
|||||||
@ -56,7 +56,7 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
from argparse import ArgumentParser as FlexibleArgumentParser
|
from argparse import ArgumentParser as FlexibleArgumentParser
|
||||||
|
|
||||||
from benchmark_utils import convert_to_pytorch_benchmark_format
|
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||||
|
|
||||||
MILLISECONDS_TO_SECONDS_CONVERSION = 1000
|
MILLISECONDS_TO_SECONDS_CONVERSION = 1000
|
||||||
|
|
||||||
@ -841,8 +841,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
|
|||||||
if pt_records:
|
if pt_records:
|
||||||
# Don't use json suffix here as we don't want CI to pick it up
|
# Don't use json suffix here as we don't want CI to pick it up
|
||||||
pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json"
|
pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json"
|
||||||
with open(pt_file, "w") as f:
|
write_to_json(pt_file, pt_records)
|
||||||
json.dump(pt_records, f)
|
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace):
|
def main(args: argparse.Namespace):
|
||||||
|
|||||||
@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
import uvloop
|
import uvloop
|
||||||
from benchmark_utils import convert_to_pytorch_benchmark_format
|
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from transformers import (AutoModelForCausalLM, AutoTokenizer,
|
from transformers import (AutoModelForCausalLM, AutoTokenizer,
|
||||||
@ -366,8 +366,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
|
|||||||
if pt_records:
|
if pt_records:
|
||||||
# Don't use json suffix here as we don't want CI to pick it up
|
# Don't use json suffix here as we don't want CI to pick it up
|
||||||
pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
|
pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
|
||||||
with open(pt_file, "w") as f:
|
write_to_json(pt_file, pt_records)
|
||||||
json.dump(pt_records, f)
|
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace):
|
def main(args: argparse.Namespace):
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import json
|
||||||
|
import math
|
||||||
import os
|
import os
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
@ -34,6 +36,34 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
|
|||||||
"extra_info": extra_info,
|
"extra_info": extra_info,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tp = record["benchmark"]["extra_info"]["args"].get(
|
||||||
|
"tensor_parallel_size")
|
||||||
|
# Save tensor_parallel_size parameter if it's part of the metadata
|
||||||
|
if not tp and "tensor_parallel_size" in extra_info:
|
||||||
|
record["benchmark"]["extra_info"]["args"][
|
||||||
|
"tensor_parallel_size"] = extra_info["tensor_parallel_size"]
|
||||||
|
|
||||||
records.append(record)
|
records.append(record)
|
||||||
|
|
||||||
return records
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
class InfEncoder(json.JSONEncoder):
|
||||||
|
|
||||||
|
def clear_inf(self, o: Any):
|
||||||
|
if isinstance(o, dict):
|
||||||
|
return {k: self.clear_inf(v) for k, v in o.items()}
|
||||||
|
elif isinstance(o, list):
|
||||||
|
return [self.clear_inf(v) for v in o]
|
||||||
|
elif isinstance(o, float) and math.isinf(o):
|
||||||
|
return "inf"
|
||||||
|
return o
|
||||||
|
|
||||||
|
def iterencode(self, o: Any, *args, **kwargs) -> Any:
|
||||||
|
return super().iterencode(self.clear_inf(o), *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def write_to_json(filename: str, records: List) -> None:
|
||||||
|
with open(filename, "w") as f:
|
||||||
|
json.dump(records, f, cls=InfEncoder)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user