code refactor to improve readabliity

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
This commit is contained in:
Tsai, Louie 2025-12-19 23:42:19 -08:00
parent efa495545c
commit 63ebc2336d

View File

@ -1,26 +1,51 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations
import argparse import argparse
import html as _html
import json import json
import os import os
from dataclasses import dataclass
from importlib import util from importlib import util
from typing import List, Tuple
import pandas as pd import pandas as pd
pd.options.display.float_format = "{:.2f}".format pd.options.display.float_format = "{:.2f}".format
plotly_found = util.find_spec("plotly.express") is not None plotly_found = util.find_spec("plotly.express") is not None
DEFAULT_INFO_COLS = [
"Model",
"Dataset Name",
"Input Len",
"Output Len",
"TP Size",
"PP Size",
"# of max concurrency.",
"qps",
]
# -----------------------------
# Core data compare
# -----------------------------
def compare_data_columns( def compare_data_columns(
files, name_column, data_column, info_cols, drop_column, debug=False files: List[str],
name_column: str,
data_column: str,
info_cols: List[str],
drop_column: str,
debug: bool = False,
): ):
""" """
Align concatenation by keys derived from info_cols instead of row order. Align concatenation by keys derived from info_cols instead of row order.
- Pick one canonical key list: subset of info_cols present in ALL files. - Pick one canonical key list: subset of info_cols present in ALL files.
- For each file: set index to those keys, aggregate duplicates - For each file: set index to those keys, aggregate duplicates
- (mean for metric, first for names). (mean for metric, first for names).
- Concat along axis=1 (indexes align), then reset_index so callers can - Concat along axis=1 (indexes align), then reset_index so callers can
- group by columns. group by columns.
- If --debug, add a <file_label>_name column per file. - If --debug, add a <file_label>_name column per file.
""" """
print("\ncompare_data_column:", data_column) print("\ncompare_data_column:", data_column)
@ -94,7 +119,7 @@ def compare_data_columns(
frames.append(meta) frames.append(meta)
meta_added = True meta_added = True
# (NEW) debug: aligned test-name column per file # debug: aligned test-name column per file
if debug and name_column in df_idx.columns: if debug and name_column in df_idx.columns:
name_s = df_idx[name_column] name_s = df_idx[name_column]
if not name_s.index.is_unique: if not name_s.index.is_unique:
@ -106,24 +131,22 @@ def compare_data_columns(
raw_data_cols.append(file_label) raw_data_cols.append(file_label)
compare_frames.append(s) compare_frames.append(s)
# Generalize ratio: for any file N>=2, add ratio (fileN / file1) # ratio columns: fileN / file1 (throughput) or file1 / fileN (latency)
if len(compare_frames) >= 2: if len(compare_frames) >= 2:
base = compare_frames[0] base = compare_frames[0]
current = compare_frames[-1] current = compare_frames[-1]
if "P99" in data_column or "Median" in data_column: if "P99" in data_column or "Median" in data_column:
ratio = base / current # for latency ratio = base / current # for latency: larger means better
else: else:
ratio = current / base ratio = current / base # for throughput: larger means better
ratio = ratio.mask(base == 0) # avoid inf when baseline is 0 ratio = ratio.mask(base == 0)
ratio.name = f"Ratio 1 vs {len(compare_frames)}" ratio.name = f"Ratio 1 vs {len(compare_frames)}"
frames.append(ratio) frames.append(ratio)
# 4) concat on columns with aligned MultiIndex;
# then reset_index to return keys as columns
concat_df = pd.concat(frames, axis=1) concat_df = pd.concat(frames, axis=1)
concat_df = concat_df.reset_index(drop=True).reset_index()
if "index" in concat_df.columns: # NOTE: meta already contains key columns as normal columns, so we can drop the index cleanly.
concat_df = concat_df.drop(columns=["index"]) concat_df = concat_df.reset_index(drop=True)
# Ensure key/info columns appear first (in your info_cols order) # Ensure key/info columns appear first (in your info_cols order)
front = [c for c in info_cols if c in concat_df.columns] front = [c for c in info_cols if c in concat_df.columns]
@ -134,16 +157,18 @@ def compare_data_columns(
return concat_df, raw_data_cols return concat_df, raw_data_cols
# -----------------------------
# Split helper (restored)
# -----------------------------
def split_json_by_tp_pp( def split_json_by_tp_pp(
input_file: str = "benchmark_results.json", output_root: str = "." input_file: str = "benchmark_results.json", output_root: str = "."
) -> list[str]: ) -> List[str]:
""" """
Split a benchmark JSON into separate folders by (TP Size, PP Size). Split a benchmark JSON into separate folders by (TP Size, PP Size).
Creates: <output_root>/tp{TP}_pp{PP}/benchmark_results.json Creates: <output_root>/tp{TP}_pp{PP}/benchmark_results.json
Returns: list of file paths written. Returns: list of file paths written.
""" """
# Load JSON data into DataFrame
with open(input_file, encoding="utf-8") as f: with open(input_file, encoding="utf-8") as f:
data = json.load(f) data = json.load(f)
@ -161,9 +186,7 @@ def split_json_by_tp_pp(
(c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None (c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None
) )
if name_col: if name_col:
df = df[ df = df[df[name_col].astype(str).str.contains(r"serving", case=False, na=False)].copy()
df[name_col].astype(str).str.contains(r"serving", case=False, na=False)
].copy()
# Handle alias column names # Handle alias column names
rename_map = { rename_map = {
@ -172,9 +195,7 @@ def split_json_by_tp_pp(
"pp_size": "PP Size", "pp_size": "PP Size",
"pipeline_parallel_size": "PP Size", "pipeline_parallel_size": "PP Size",
} }
df.rename( df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True
)
# Ensure TP/PP columns exist (default to 1 if missing) # Ensure TP/PP columns exist (default to 1 if missing)
if "TP Size" not in df.columns: if "TP Size" not in df.columns:
@ -182,16 +203,10 @@ def split_json_by_tp_pp(
if "PP Size" not in df.columns: if "PP Size" not in df.columns:
df["PP Size"] = 1 df["PP Size"] = 1
# make sure TP/PP are numeric ints with no NaN df["TP Size"] = pd.to_numeric(df["TP Size"], errors="coerce").fillna(1).astype(int)
df["TP Size"] = ( df["PP Size"] = pd.to_numeric(df["PP Size"], errors="coerce").fillna(1).astype(int)
pd.to_numeric(df.get("TP Size", 1), errors="coerce").fillna(1).astype(int)
)
df["PP Size"] = (
pd.to_numeric(df.get("PP Size", 1), errors="coerce").fillna(1).astype(int)
)
# Split into separate folders saved_paths: List[str] = []
saved_paths: list[str] = []
for (tp, pp), group_df in df.groupby(["TP Size", "PP Size"], dropna=False): for (tp, pp), group_df in df.groupby(["TP Size", "PP Size"], dropna=False):
folder_name = os.path.join(output_root, f"tp{int(tp)}_pp{int(pp)}") folder_name = os.path.join(output_root, f"tp{int(tp)}_pp{int(pp)}")
os.makedirs(folder_name, exist_ok=True) os.makedirs(folder_name, exist_ok=True)
@ -203,32 +218,9 @@ def split_json_by_tp_pp(
return saved_paths return saved_paths
def _add_limit_line(fig, y_value, label): # -----------------------------
# Visible dashed line + annotation # Styling helpers
fig.add_hline( # -----------------------------
y=y_value,
line_dash="dash",
line_color="red" if "ttft" in label.lower() else "blue",
annotation_text=f"{label}: {y_value} ms",
annotation_position="top left",
)
# Optional: add a legend item (as a transparent helper trace)
if plot and plotly_found:
import plotly.graph_objects as go
fig.add_trace(
go.Scatter(
x=[None],
y=[None],
mode="lines",
line=dict(
dash="dash", color="red" if "ttft" in label.lower() else "blue"
),
name=f"{label}",
)
)
def _find_concurrency_col(df: pd.DataFrame) -> str: def _find_concurrency_col(df: pd.DataFrame) -> str:
for c in [ for c in [
"# of max concurrency.", "# of max concurrency.",
@ -239,26 +231,17 @@ def _find_concurrency_col(df: pd.DataFrame) -> str:
]: ]:
if c in df.columns: if c in df.columns:
return c return c
# Fallback: guess an integer-like column (harmless if unused)
for c in df.columns: for c in df.columns:
if df[c].dtype.kind in "iu" and df[c].nunique() > 1 and df[c].min() >= 1: if df[c].dtype.kind in "iu" and df[c].nunique() > 1 and df[c].min() >= 1:
return c return c
return "# of max concurrency." return "# of max concurrency."
def _highlight_threshold( def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.style.Styler":
df: pd.DataFrame, threshold: float
) -> "pd.io.formats.style.Styler":
"""Highlight numeric per-configuration columns with value <= threshold.""" """Highlight numeric per-configuration columns with value <= threshold."""
conc_col = _find_concurrency_col(df) conc_col = _find_concurrency_col(df)
key_cols = [ key_cols = [c for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col] if c in df.columns]
c conf_cols = [c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")]
for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col]
if c in df.columns
]
conf_cols = [
c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")
]
conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])] conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]
return df.style.map( return df.style.map(
lambda v: "background-color:#e6ffe6;font-weight:bold;" lambda v: "background-color:#e6ffe6;font-weight:bold;"
@ -267,45 +250,71 @@ def _highlight_threshold(
subset=conf_cols, subset=conf_cols,
) )
def highlight_ratio_columns(styler):
ratio_cols = [
c for c in styler.data.columns
if "ratio" in str(c).lower()
]
def highlight_ratio_columns(styler: "pd.io.formats.style.Styler"):
"""Highlight entire columns whose header contains 'Ratio'."""
ratio_cols = [c for c in styler.data.columns if "ratio" in str(c).lower()]
if not ratio_cols: if not ratio_cols:
return styler return styler
# Highlight entire column (cells) # highlight cells
styler = styler.apply( styler = styler.apply(
lambda _: ["background-color: #fff3b0"] * len(styler.data), lambda _: ["background-color: #fff3b0"] * len(styler.data),
subset=ratio_cols, subset=ratio_cols,
axis=0, axis=0,
) )
# Highlight column headers # highlight headers
styler = styler.set_table_styles( styler = styler.set_table_styles(
[ [
{ {"selector": f"th.col_heading.level0.col{i}", "props": [("background-color", "#fff3b0")]}
"selector": f"th.col_heading.level0.col{i}",
"props": [("background-color", "#fff3b0")],
}
for i, col in enumerate(styler.data.columns) for i, col in enumerate(styler.data.columns)
if col in ratio_cols if col in ratio_cols
], ],
overwrite=False, overwrite=False,
) )
return styler return styler
if __name__ == "__main__":
# -----------------------------
# Plot helper
# -----------------------------
def _add_limit_line(fig, y_value: float, label: str):
fig.add_hline(
y=y_value,
line_dash="dash",
line_color="red" if "ttft" in label.lower() else "blue",
annotation_text=f"{label}: {y_value} ms",
annotation_position="top left",
)
# If plotly is available, add a legend entry
if plotly_found:
import plotly.graph_objects as go
fig.add_trace(
go.Scatter(
x=[None],
y=[None],
mode="lines",
line=dict(dash="dash", color="red" if "ttft" in label.lower() else "blue"),
name=label,
)
)
# -----------------------------
# Refactored "main"
# -----------------------------
@dataclass(frozen=True)
class MetricPlan:
data_cols: List[str]
drop_column: str
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument("-f", "--file", action="append", type=str, help="input file name")
"-f", "--file", action="append", type=str, help="input file name" parser.add_argument("--debug", action="store_true", help="show all information for debugging")
)
parser.add_argument(
"--debug", action="store_true", help="show all information for debugging"
)
parser.add_argument( parser.add_argument(
"--plot", "--plot",
action=argparse.BooleanOptionalAction, action=argparse.BooleanOptionalAction,
@ -326,188 +335,187 @@ if __name__ == "__main__":
default="p99", default="p99",
help="take median|p99 for latency like TTFT/TPOT", help="take median|p99 for latency like TTFT/TPOT",
) )
parser.add_argument( parser.add_argument("--ttft-max-ms", type=float, default=3000.0, help="Reference limit for TTFT plots (ms)")
"--ttft-max-ms", parser.add_argument("--tpot-max-ms", type=float, default=100.0, help="Reference limit for TPOT plots (ms)")
type=float, return parser
default=3000.0,
help="Reference limit for TTFT plots (ms)",
)
parser.add_argument(
"--tpot-max-ms",
type=float,
default=100.0,
help="Reference limit for TPOT plots (ms)",
)
args = parser.parse_args()
def choose_metrics(latency: str) -> MetricPlan:
latency = (latency or "").lower()
drop_column = "P99" drop_column = "P99"
name_column = "Test name" if "median" in latency:
info_cols = [ return MetricPlan(
"Model", data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"],
"Dataset Name", drop_column=drop_column,
"Input Len", )
"Output Len", return MetricPlan(
"TP Size", data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"],
"PP Size", drop_column=drop_column,
"# of max concurrency.", )
"qps",
]
if "median" in args.latency:
data_cols_to_compare = ["Output Tput (tok/s)", "Median TTFT (ms)", "Median"]
html_msgs_for_data_cols = [
"Compare Output Tokens /n",
"Median TTFT /n",
"Median TPOT /n",
]
drop_column = "P99"
elif "p99" in args.latency:
data_cols_to_compare = ["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"]
html_msgs_for_data_cols = [
"Compare Output Tokens /n",
"P99 TTFT /n",
"P99 TPOT /n",
]
def prepare_input_files(args, info_cols: List[str]) -> Tuple[List[str], List[str]]:
if not args.file:
raise ValueError("No input files provided. Use -f/--file.")
if len(args.file) == 1: if len(args.file) == 1:
files = split_json_by_tp_pp(args.file[0], output_root="splits") files = split_json_by_tp_pp(args.file[0], output_root="splits")
info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")] info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")]
else: else:
files = args.file files = args.file
return files, info_cols
def get_y_axis_col(info_cols: List[str], xaxis: str) -> str:
y_axis_index = info_cols.index(xaxis) if xaxis in info_cols else 6
return info_cols[y_axis_index]
def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]:
filtered_info_cols = info_cols[:4]
group_cols = [c for c in filtered_info_cols if c in output_df.columns]
if not group_cols:
raise ValueError(
f"No valid group-by columns. Expected subset: {filtered_info_cols}, "
f"but DataFrame has: {list(output_df.columns)}"
)
return group_cols
def group_suffix(group_cols: List[str], name) -> str:
name_vals = name if isinstance(name, tuple) else (name,)
return " , ".join(f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals))
def group_filename(name, prefix: str = "perf_comparison_") -> str:
name_vals = name if isinstance(name, tuple) else (name,)
safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-")
return f"{prefix}{safe}.html"
def render_metric_table_html(display_group: pd.DataFrame, metric_label: str, suffix: str, args) -> str:
title = (
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
f'{_html.escape(metric_label)}'
f'{_html.escape(suffix)}'
f"</div>\n"
)
metric_name = metric_label.lower()
if "ttft" in metric_name:
styler = _highlight_threshold(display_group, args.ttft_max_ms)
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
styler = _highlight_threshold(display_group, args.tpot_max_ms)
else:
styler = display_group.style
# format numbers + highlight ratios
styler = styler.format(
{c: "{:.2f}" for c in display_group.select_dtypes("number").columns},
na_rep="",
)
styler = highlight_ratio_columns(styler)
return title + styler.to_html(table_attributes='border="1" class="dataframe"')
def maybe_write_plot(
main_fh,
sub_fh,
group_df: pd.DataFrame,
raw_data_cols: List[str],
metric_label: str,
y_axis_col: str,
args,
):
if not (args.plot and plotly_found):
return
import plotly.express as px
df = group_df[raw_data_cols].sort_values(by=y_axis_col)
df_melted = df.melt(
id_vars=y_axis_col,
var_name="Configuration",
value_name=metric_label,
)
fig = px.line(
df_melted,
x=y_axis_col,
y=metric_label,
color="Configuration",
title=f"{metric_label} vs {y_axis_col}",
markers=True,
)
metric_name = metric_label.lower()
if "ttft" in metric_name:
_add_limit_line(fig, args.ttft_max_ms, "TTFT limit")
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
_add_limit_line(fig, args.tpot_max_ms, "TPOT limit")
html = fig.to_html(full_html=True, include_plotlyjs="cdn")
main_fh.write(html)
sub_fh.write(html)
def write_report(files: List[str], info_cols: List[str], plan: MetricPlan, args):
name_column = "Test name"
y_axis_col = get_y_axis_col(info_cols, args.xaxis)
print("comparing : " + ", ".join(files)) print("comparing : " + ", ".join(files))
debug = args.debug
plot = args.plot with open("perf_comparison.html", "w") as main_fh:
# For Plot feature, assign y axis from one of info_cols for metric_label in plan.data_cols:
y_axis_index = info_cols.index(args.xaxis) if args.xaxis in info_cols else 6
with open("perf_comparison.html", "w") as text_file:
for i in range(len(data_cols_to_compare)):
output_df, raw_data_cols = compare_data_columns( output_df, raw_data_cols = compare_data_columns(
files, files,
name_column, name_column,
data_cols_to_compare[i], metric_label,
info_cols, info_cols,
drop_column, plan.drop_column,
debug=debug, debug=args.debug,
) )
# For Plot feature, insert y axis from one of info_cols raw_data_cols = list(raw_data_cols)
raw_data_cols.insert(0, info_cols[y_axis_index]) raw_data_cols.insert(0, y_axis_col)
group_cols = get_group_cols(output_df, info_cols)
filtered_info_cols = info_cols[:4]
existing_group_cols = [
c for c in filtered_info_cols if c in output_df.columns
]
if not existing_group_cols:
raise ValueError(
f"No valid group-by columns "
f"Expected subset: {filtered_info_cols}, "
f"but DataFrame has: {list(output_df.columns)}"
)
# output_df_sorted = output_df.sort_values(by=existing_group_cols)
output_df_sorted = output_df.sort_values(by=args.xaxis) output_df_sorted = output_df.sort_values(by=args.xaxis)
output_groups = output_df_sorted.groupby(existing_group_cols, dropna=False) for name, group_df in output_df_sorted.groupby(group_cols, dropna=False):
for name, group in output_groups: suffix = group_suffix(group_cols, name)
group_name = ( sub_path = group_filename(name)
",".join(map(str, name)).replace(",", "_").replace("/", "-")
)
group_html_name = "perf_comparison_" + group_name + ".html"
import html as _html
name_vals = name if isinstance(name, tuple) else (name,)
group_title_suffix = " , ".join(
f"{col} : [ {val} ] " for col, val in zip(existing_group_cols, name_vals)
)
# --------------------------------------------- # drop group columns from display only
# DROP group columns from DISPLAY ONLY display_group = group_df.drop(columns=group_cols, errors="ignore")
# ---------------------------------------------
display_group = group.drop(columns=existing_group_cols, errors="ignore")
metric_name = str(data_cols_to_compare[i]).lower() html = render_metric_table_html(display_group, metric_label, suffix, args)
if "tok/s" in metric_name:
styler = display_group.style main_fh.write(html)
styler = highlight_ratio_columns(styler) with open(sub_path, "a+") as sub_fh:
html = ( sub_fh.write(html)
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">' maybe_write_plot(
f'{_html.escape(data_cols_to_compare[i])}' main_fh,
f'{_html.escape(group_title_suffix)}' sub_fh,
f'</div>\n' group_df=group_df,
+ styler.to_html(table_attributes='border="1" class="dataframe"') raw_data_cols=raw_data_cols,
) metric_label=metric_label,
elif "ttft" in metric_name: y_axis_col=y_axis_col,
styler = _highlight_threshold(display_group, args.ttft_max_ms).format( args=args,
{c: "{:.2f}" for c in display_group.select_dtypes("number").columns},
na_rep="",
)
styler = highlight_ratio_columns(styler)
html = (
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
f'{_html.escape(data_cols_to_compare[i])}'
f'{_html.escape(group_title_suffix)}'
f'</div>\n'
+ styler.to_html(table_attributes='border="1" class="dataframe"')
) )
elif (
"tpot" in metric_name
or "median" in metric_name
or "p99" in metric_name
):
styler = _highlight_threshold(display_group, args.tpot_max_ms).format(
{c: "{:.2f}" for c in display_group.select_dtypes("number").columns},
na_rep="",
)
styler = highlight_ratio_columns(styler)
html = (
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
f'{_html.escape(data_cols_to_compare[i])}'
f'{_html.escape(group_title_suffix)}'
f'</div>\n'
+ styler.to_html(table_attributes='border="1" class="dataframe"')
)
text_file.write(html)
with open(group_html_name, "a+") as sub_text_file:
sub_text_file.write(html)
if plot and plotly_found: def main():
import plotly.express as px args = build_parser().parse_args()
df = group[raw_data_cols] info_cols = list(DEFAULT_INFO_COLS)
df_sorted = df.sort_values(by=info_cols[y_axis_index]) plan = choose_metrics(args.latency)
# Melt DataFrame for plotting
df_melted = df_sorted.melt(
id_vars=info_cols[y_axis_index],
var_name="Configuration",
value_name=data_cols_to_compare[i],
)
title = (
data_cols_to_compare[i] + " vs " + info_cols[y_axis_index]
)
# Create Plotly line chart
fig = px.line(
df_melted,
x=info_cols[y_axis_index],
y=data_cols_to_compare[i],
color="Configuration",
title=title,
markers=True,
)
# ---- Add threshold lines based on metric name ---- files, info_cols = prepare_input_files(args, info_cols)
if "ttft" in metric_name: write_report(files, info_cols, plan, args)
_add_limit_line(fig, args.ttft_max_ms, "TTFT limit")
elif (
"tpot" in metric_name if __name__ == "__main__":
or "median" in metric_name main()
or "p99" in metric_name
):
_add_limit_line(fig, args.tpot_max_ms, "TPOT limit")
# Export to HTML
text_file.write(
fig.to_html(full_html=True, include_plotlyjs="cdn")
)
sub_text_file.write(
fig.to_html(full_html=True, include_plotlyjs="cdn")
)