Convert .buildkite to ruff format (#17656)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-05-13 10:28:31 +01:00 committed by GitHub
parent 23b3134eb5
commit 98fcba1575
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 182 additions and 114 deletions

View File

@ -8,12 +8,12 @@ import zipfile
# Note that we have 400 MiB quota, please use it wisely. # Note that we have 400 MiB quota, please use it wisely.
# See https://github.com/pypi/support/issues/3792 . # See https://github.com/pypi/support/issues/3792 .
# Please also sync the value with the one in Dockerfile. # Please also sync the value with the one in Dockerfile.
VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400)) VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 400))
def print_top_10_largest_files(zip_file): def print_top_10_largest_files(zip_file):
"""Print the top 10 largest files in the given zip file.""" """Print the top 10 largest files in the given zip file."""
with zipfile.ZipFile(zip_file, 'r') as z: with zipfile.ZipFile(zip_file, "r") as z:
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()] file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
file_sizes.sort(key=lambda x: x[1], reverse=True) file_sizes.sort(key=lambda x: x[1], reverse=True)
for f, size in file_sizes[:10]: for f, size in file_sizes[:10]:
@ -28,14 +28,18 @@ def check_wheel_size(directory):
wheel_path = os.path.join(root, file_name) wheel_path = os.path.join(root, file_name)
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024) wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
if wheel_size_mb > VLLM_MAX_SIZE_MB: if wheel_size_mb > VLLM_MAX_SIZE_MB:
print(f"Not allowed: Wheel {wheel_path} is larger " print(
f"({wheel_size_mb:.2f} MB) than the limit " f"Not allowed: Wheel {wheel_path} is larger "
f"({VLLM_MAX_SIZE_MB} MB).") f"({wheel_size_mb:.2f} MB) than the limit "
f"({VLLM_MAX_SIZE_MB} MB)."
)
print_top_10_largest_files(wheel_path) print_top_10_largest_files(wheel_path)
return 1 return 1
else: else:
print(f"Wheel {wheel_path} is within the allowed size " print(
f"({wheel_size_mb:.2f} MB).") f"Wheel {wheel_path} is within the allowed size "
f"({wheel_size_mb:.2f} MB)."
)
return 0 return 0

View File

@ -22,5 +22,5 @@ with open("index.html", "w") as f:
print(f"Generated index.html for {args.wheel}") print(f"Generated index.html for {args.wheel}")
# cloudfront requires escaping the '+' character # cloudfront requires escaping the '+' character
f.write( f.write(
template.format(wheel=filename, template.format(wheel=filename, wheel_html_escaped=filename.replace("+", "%2B"))
wheel_html_escaped=filename.replace("+", "%2B"))) )

View File

@ -8,11 +8,14 @@ def pytest_addoption(parser):
parser.addoption( parser.addoption(
"--config-list-file", "--config-list-file",
action="store", action="store",
help="Path to the file listing model config YAMLs (one per line)") help="Path to the file listing model config YAMLs (one per line)",
parser.addoption("--tp-size", )
action="store", parser.addoption(
default="1", "--tp-size",
help="Tensor parallel size to use for evaluation") action="store",
default="1",
help="Tensor parallel size to use for evaluation",
)
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
@ -33,7 +36,8 @@ def pytest_generate_tests(metafunc):
config_dir = config_list_file.parent config_dir = config_list_file.parent
with open(config_list_file, encoding="utf-8") as f: with open(config_list_file, encoding="utf-8") as f:
configs = [ configs = [
config_dir / line.strip() for line in f config_dir / line.strip()
for line in f
if line.strip() and not line.startswith("#") if line.strip() and not line.startswith("#")
] ]
metafunc.parametrize("config_filename", configs) metafunc.parametrize("config_filename", configs)

View File

@ -16,19 +16,22 @@ RTOL = 0.08
def launch_lm_eval(eval_config, tp_size): def launch_lm_eval(eval_config, tp_size):
trust_remote_code = eval_config.get('trust_remote_code', False) trust_remote_code = eval_config.get("trust_remote_code", False)
model_args = f"pretrained={eval_config['model_name']}," \ model_args = (
f"tensor_parallel_size={tp_size}," \ f"pretrained={eval_config['model_name']},"
f"enforce_eager=true," \ f"tensor_parallel_size={tp_size},"
f"add_bos_token=true," \ f"enforce_eager=true,"
f"trust_remote_code={trust_remote_code}" f"add_bos_token=true,"
f"trust_remote_code={trust_remote_code}"
)
results = lm_eval.simple_evaluate( results = lm_eval.simple_evaluate(
model="vllm", model="vllm",
model_args=model_args, model_args=model_args,
tasks=[task["name"] for task in eval_config["tasks"]], tasks=[task["name"] for task in eval_config["tasks"]],
num_fewshot=eval_config["num_fewshot"], num_fewshot=eval_config["num_fewshot"],
limit=eval_config["limit"], limit=eval_config["limit"],
batch_size="auto") batch_size="auto",
)
return results return results
@ -42,9 +45,10 @@ def test_lm_eval_correctness_param(config_filename, tp_size):
for metric in task["metrics"]: for metric in task["metrics"]:
ground_truth = metric["value"] ground_truth = metric["value"]
measured_value = results["results"][task["name"]][metric["name"]] measured_value = results["results"][task["name"]][metric["name"]]
print(f'{task["name"]} | {metric["name"]}: ' print(
f'ground_truth={ground_truth} | measured={measured_value}') f"{task['name']} | {metric['name']}: "
success = success and np.isclose( f"ground_truth={ground_truth} | measured={measured_value}"
ground_truth, measured_value, rtol=RTOL) )
success = success and np.isclose(ground_truth, measured_value, rtol=RTOL)
assert success assert success

View File

@ -65,18 +65,18 @@ def read_markdown(file):
def results_to_json(latency, throughput, serving): def results_to_json(latency, throughput, serving):
return json.dumps({ return json.dumps(
'latency': latency.to_dict(), {
'throughput': throughput.to_dict(), "latency": latency.to_dict(),
'serving': serving.to_dict() "throughput": throughput.to_dict(),
}) "serving": serving.to_dict(),
}
)
if __name__ == "__main__": if __name__ == "__main__":
# collect results # collect results
for test_file in results_folder.glob("*.json"): for test_file in results_folder.glob("*.json"):
with open(test_file) as f: with open(test_file) as f:
raw_result = json.loads(f.read()) raw_result = json.loads(f.read())
@ -120,7 +120,8 @@ if __name__ == "__main__":
for perc in [10, 25, 50, 75, 90, 99]: for perc in [10, 25, 50, 75, 90, 99]:
# Multiply 1000 to convert the time unit from s to ms # Multiply 1000 to convert the time unit from s to ms
raw_result.update( raw_result.update(
{f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}) {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
)
raw_result["avg_latency"] = raw_result["avg_latency"] * 1000 raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
# add the result to raw_result # add the result to raw_result
@ -153,26 +154,27 @@ if __name__ == "__main__":
serving_results = pd.DataFrame.from_dict(serving_results) serving_results = pd.DataFrame.from_dict(serving_results)
throughput_results = pd.DataFrame.from_dict(throughput_results) throughput_results = pd.DataFrame.from_dict(throughput_results)
raw_results_json = results_to_json(latency_results, throughput_results, raw_results_json = results_to_json(
serving_results) latency_results, throughput_results, serving_results
)
# remapping the key, for visualization purpose # remapping the key, for visualization purpose
if not latency_results.empty: if not latency_results.empty:
latency_results = latency_results[list( latency_results = latency_results[list(latency_column_mapping.keys())].rename(
latency_column_mapping.keys())].rename( columns=latency_column_mapping
columns=latency_column_mapping) )
if not serving_results.empty: if not serving_results.empty:
serving_results = serving_results[list( serving_results = serving_results[list(serving_column_mapping.keys())].rename(
serving_column_mapping.keys())].rename( columns=serving_column_mapping
columns=serving_column_mapping) )
if not throughput_results.empty: if not throughput_results.empty:
throughput_results = throughput_results[list( throughput_results = throughput_results[
throughput_results_column_mapping.keys())].rename( list(throughput_results_column_mapping.keys())
columns=throughput_results_column_mapping) ].rename(columns=throughput_results_column_mapping)
processed_results_json = results_to_json(latency_results, processed_results_json = results_to_json(
throughput_results, latency_results, throughput_results, serving_results
serving_results) )
for df in [latency_results, serving_results, throughput_results]: for df in [latency_results, serving_results, throughput_results]:
if df.empty: if df.empty:
@ -184,38 +186,39 @@ if __name__ == "__main__":
# The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...", # The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
# we want to turn it into "8xGPUTYPE" # we want to turn it into "8xGPUTYPE"
df["GPU"] = df["GPU"].apply( df["GPU"] = df["GPU"].apply(
lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}") lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}"
)
# get markdown tables # get markdown tables
latency_md_table = tabulate(latency_results, latency_md_table = tabulate(
headers='keys', latency_results, headers="keys", tablefmt="pipe", showindex=False
tablefmt='pipe', )
showindex=False) serving_md_table = tabulate(
serving_md_table = tabulate(serving_results, serving_results, headers="keys", tablefmt="pipe", showindex=False
headers='keys', )
tablefmt='pipe', throughput_md_table = tabulate(
showindex=False) throughput_results, headers="keys", tablefmt="pipe", showindex=False
throughput_md_table = tabulate(throughput_results, )
headers='keys',
tablefmt='pipe',
showindex=False)
# document the result # document the result
with open(results_folder / "benchmark_results.md", "w") as f: with open(results_folder / "benchmark_results.md", "w") as f:
results = read_markdown(
results = read_markdown("../.buildkite/nightly-benchmarks/" + "../.buildkite/nightly-benchmarks/"
"performance-benchmarks-descriptions.md") + "performance-benchmarks-descriptions.md"
)
results = results.format( results = results.format(
latency_tests_markdown_table=latency_md_table, latency_tests_markdown_table=latency_md_table,
throughput_tests_markdown_table=throughput_md_table, throughput_tests_markdown_table=throughput_md_table,
serving_tests_markdown_table=serving_md_table, serving_tests_markdown_table=serving_md_table,
benchmarking_results_in_json_string=processed_results_json) benchmarking_results_in_json_string=processed_results_json,
)
f.write(results) f.write(results)
# document benchmarking results in json # document benchmarking results in json
with open(results_folder / "benchmark_results.json", "w") as f: with open(results_folder / "benchmark_results.json", "w") as f:
results = (
results = latency_results.to_dict( latency_results.to_dict(orient="records")
orient='records') + throughput_results.to_dict( + throughput_results.to_dict(orient="records")
orient='records') + serving_results.to_dict(orient='records') + serving_results.to_dict(orient="records")
)
f.write(json.dumps(results)) f.write(json.dumps(results))

View File

@ -14,15 +14,12 @@ def main(model, cachedir):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Download and save Hugging Face tokenizer") description="Download and save Hugging Face tokenizer"
parser.add_argument("--model", )
type=str, parser.add_argument("--model", type=str, required=True, help="Name of the model")
required=True, parser.add_argument(
help="Name of the model") "--cachedir", type=str, required=True, help="Directory to save the tokenizer"
parser.add_argument("--cachedir", )
type=str,
required=True,
help="Directory to save the tokenizer")
args = parser.parse_args() args = parser.parse_args()
main(args.model, args.cachedir) main(args.model, args.cachedir)

View File

@ -11,33 +11,33 @@ from tabulate import tabulate
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description= description="Parse command line arguments for summary-nightly-results script."
'Parse command line arguments for summary-nightly-results script.') )
parser.add_argument('--results-folder', parser.add_argument(
type=str, "--results-folder",
required=True, type=str,
help='The folder where the results are stored.') required=True,
parser.add_argument('--description', help="The folder where the results are stored.",
type=str, )
required=True, parser.add_argument(
help='Description of the results.') "--description", type=str, required=True, help="Description of the results."
)
args = parser.parse_args() args = parser.parse_args()
return args return args
def get_perf(df, method, model, metric): def get_perf(df, method, model, metric):
means = [] means = []
for qps in [2, 4, 8, 16, "inf"]: for qps in [2, 4, 8, 16, "inf"]:
target = df['Test name'].str.contains(model) target = df["Test name"].str.contains(model)
target = target & df['Engine'].str.contains(method) target = target & df["Engine"].str.contains(method)
target = target & df['Test name'].str.contains("qps_" + str(qps)) target = target & df["Test name"].str.contains("qps_" + str(qps))
filtered_df = df[target] filtered_df = df[target]
if filtered_df.empty: if filtered_df.empty:
means.append(0.) means.append(0.0)
else: else:
means.append(filtered_df[metric].values[0]) means.append(filtered_df[metric].values[0])
@ -45,7 +45,6 @@ def get_perf(df, method, model, metric):
def get_perf_w_std(df, method, model, metric): def get_perf_w_std(df, method, model, metric):
if metric in ["TTFT", "ITL"]: if metric in ["TTFT", "ITL"]:
mean = get_perf(df, method, model, "Mean " + metric + " (ms)") mean = get_perf(df, method, model, "Mean " + metric + " (ms)")
mean = mean.tolist() mean = mean.tolist()
@ -60,7 +59,8 @@ def get_perf_w_std(df, method, model, metric):
else: else:
assert metric == "Tput" assert metric == "Tput"
mean = get_perf(df, method, model, "Input Tput (tok/s)") + get_perf( mean = get_perf(df, method, model, "Input Tput (tok/s)") + get_perf(
df, method, model, "Output Tput (tok/s)") df, method, model, "Output Tput (tok/s)"
)
mean = mean.tolist() mean = mean.tolist()
std = None std = None
@ -80,18 +80,17 @@ def main(args):
# generate markdown table # generate markdown table
df = pd.DataFrame.from_dict(results) df = pd.DataFrame.from_dict(results)
md_table = tabulate(df, headers='keys', tablefmt='pipe', showindex=False) md_table = tabulate(df, headers="keys", tablefmt="pipe", showindex=False)
with open(args.description) as f: with open(args.description) as f:
description = f.read() description = f.read()
description = description.format( description = description.format(nightly_results_benchmarking_table=md_table)
nightly_results_benchmarking_table=md_table)
with open("nightly_results.md", "w") as f: with open("nightly_results.md", "w") as f:
f.write(description) f.write(description)
if __name__ == '__main__': if __name__ == "__main__":
args = parse_arguments() args = parse_arguments()
main(args) main(args)

View File

@ -34,10 +34,8 @@ serving_column_mapping = {
} }
if __name__ == "__main__": if __name__ == "__main__":
# collect results # collect results
for test_file in results_folder.glob("*.json"): for test_file in results_folder.glob("*.json"):
with open(test_file) as f: with open(test_file) as f:
raw_result = json.loads(f.read()) raw_result = json.loads(f.read())
@ -56,17 +54,16 @@ if __name__ == "__main__":
serving_results = pd.DataFrame.from_dict(serving_results) serving_results = pd.DataFrame.from_dict(serving_results)
if not serving_results.empty: if not serving_results.empty:
serving_results = serving_results[list( serving_results = serving_results[list(serving_column_mapping.keys())].rename(
serving_column_mapping.keys())].rename( columns=serving_column_mapping
columns=serving_column_mapping) )
serving_md_table_with_headers = tabulate(serving_results, serving_md_table_with_headers = tabulate(
headers='keys', serving_results, headers="keys", tablefmt="pipe", showindex=False
tablefmt='pipe', )
showindex=False)
# remove the first line of header # remove the first line of header
serving_md_table_lines = serving_md_table_with_headers.split('\n') serving_md_table_lines = serving_md_table_with_headers.split("\n")
serving_md_table_without_header = '\n'.join(serving_md_table_lines[2:]) serving_md_table_without_header = "\n".join(serving_md_table_lines[2:])
prefix = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") prefix = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
prefix = prefix + "_" + os.environ.get("CURRENT_LLM_SERVING_ENGINE") prefix = prefix + "_" + os.environ.get("CURRENT_LLM_SERVING_ENGINE")
@ -76,10 +73,9 @@ if __name__ == "__main__":
# document results with header. # document results with header.
# for those who wants to reproduce our benchmark. # for those who wants to reproduce our benchmark.
f.write(serving_md_table_with_headers) f.write(serving_md_table_with_headers)
f.write('\n') f.write("\n")
# document benchmarking results in json # document benchmarking results in json
with open(results_folder / f"{prefix}_nightly_results.json", "w") as f: with open(results_folder / f"{prefix}_nightly_results.json", "w") as f:
results = serving_results.to_dict(orient="records")
results = serving_results.to_dict(orient='records')
f.write(json.dumps(results)) f.write(json.dumps(results))

55
.buildkite/pyproject.toml Normal file
View File

@ -0,0 +1,55 @@
# This local pyproject file is part of the migration from yapf to ruff format.
# It uses the same core rules as the main pyproject.toml file, but with the
# following differences:
# - isort profile is set to black
# - ruff line length is overridden to 88
# - deprecated typing ignores (UP006, UP035) have been removed
[tool.isort]
profile = "black"
[tool.ruff]
line-length = 88
exclude = [
# External file, leaving license intact
"examples/other/fp8/quantizer/quantize.py",
"vllm/vllm_flash_attn/flash_attn_interface.pyi"
]
[tool.ruff.lint.per-file-ignores]
"vllm/third_party/**" = ["ALL"]
"vllm/version.py" = ["F401"]
"vllm/_version.py" = ["ALL"]
[tool.ruff.lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
# flake8-bugbear
"B",
# flake8-simplify
"SIM",
# isort
"I",
# flake8-logging-format
"G",
]
ignore = [
# star imports
"F405", "F403",
# lambda expression assignment
"E731",
# Loop control variable not used within loop body
"B007",
# f-string format
"UP032",
# Can remove once 3.10+ is the minimum Python version
"UP007",
]
[tool.ruff.format]
docstring-code-format = true

View File

@ -16,6 +16,8 @@ repos:
hooks: hooks:
- id: ruff - id: ruff
args: [--output-format, github, --fix] args: [--output-format, github, --fix]
- id: ruff-format
files: ^(.buildkite).*
- repo: https://github.com/codespell-project/codespell - repo: https://github.com/codespell-project/codespell
rev: v2.4.1 rev: v2.4.1
hooks: hooks:
@ -26,6 +28,8 @@ repos:
rev: 6.0.1 rev: 6.0.1
hooks: hooks:
- id: isort - id: isort
# necessary during the transition from yapf to ruff format
args: [--resolve-all-configs, --config-root, .]
- repo: https://github.com/pre-commit/mirrors-clang-format - repo: https://github.com/pre-commit/mirrors-clang-format
rev: v20.1.3 rev: v20.1.3
hooks: hooks:

View File

@ -53,6 +53,7 @@ include = ["vllm*"]
[tool.yapfignore] [tool.yapfignore]
ignore_patterns = [ ignore_patterns = [
".buildkite/**",
"build/**", "build/**",
] ]
@ -107,6 +108,7 @@ select = [
"SIM", "SIM",
# isort # isort
# "I", # "I",
# flake8-logging-format
"G", "G",
] ]
ignore = [ ignore = [