group-first report instead of data-column-first

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
This commit is contained in:
Tsai, Louie 2025-12-19 23:52:46 -08:00
parent 63ebc2336d
commit 0e01150cb4

View File

@ -9,7 +9,7 @@ import json
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from importlib import util from importlib import util
from typing import List, Tuple from typing import Dict, List, Tuple
import pandas as pd import pandas as pd
@ -51,11 +51,11 @@ def compare_data_columns(
print("\ncompare_data_column:", data_column) print("\ncompare_data_column:", data_column)
frames = [] frames = []
raw_data_cols = [] raw_data_cols: List[str] = []
compare_frames = [] compare_frames = []
# 1) choose a canonical key list from info_cols that exists in ALL files # 1) choose a canonical key list from info_cols that exists in ALL files
cols_per_file = [] cols_per_file: List[set] = []
for f in files: for f in files:
try: try:
df_tmp = pd.read_json(f, orient="records") df_tmp = pd.read_json(f, orient="records")
@ -143,10 +143,7 @@ def compare_data_columns(
ratio.name = f"Ratio 1 vs {len(compare_frames)}" ratio.name = f"Ratio 1 vs {len(compare_frames)}"
frames.append(ratio) frames.append(ratio)
concat_df = pd.concat(frames, axis=1) concat_df = pd.concat(frames, axis=1).reset_index(drop=True)
# NOTE: meta already contains key columns as normal columns, so we can drop the index cleanly.
concat_df = concat_df.reset_index(drop=True)
# Ensure key/info columns appear first (in your info_cols order) # Ensure key/info columns appear first (in your info_cols order)
front = [c for c in info_cols if c in concat_df.columns] front = [c for c in info_cols if c in concat_df.columns]
@ -158,7 +155,7 @@ def compare_data_columns(
# ----------------------------- # -----------------------------
# Split helper (restored) # Split helper
# ----------------------------- # -----------------------------
def split_json_by_tp_pp( def split_json_by_tp_pp(
input_file: str = "benchmark_results.json", output_root: str = "." input_file: str = "benchmark_results.json", output_root: str = "."
@ -231,6 +228,7 @@ def _find_concurrency_col(df: pd.DataFrame) -> str:
]: ]:
if c in df.columns: if c in df.columns:
return c return c
# Fallback: guess an integer-like column (harmless if unused)
for c in df.columns: for c in df.columns:
if df[c].dtype.kind in "iu" and df[c].nunique() > 1 and df[c].min() >= 1: if df[c].dtype.kind in "iu" and df[c].nunique() > 1 and df[c].min() >= 1:
return c return c
@ -240,9 +238,16 @@ def _find_concurrency_col(df: pd.DataFrame) -> str:
def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.style.Styler": def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.style.Styler":
"""Highlight numeric per-configuration columns with value <= threshold.""" """Highlight numeric per-configuration columns with value <= threshold."""
conc_col = _find_concurrency_col(df) conc_col = _find_concurrency_col(df)
key_cols = [c for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col] if c in df.columns] key_cols = [
conf_cols = [c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")] c
for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col]
if c in df.columns
]
conf_cols = [
c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")
]
conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])] conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]
return df.style.map( return df.style.map(
lambda v: "background-color:#e6ffe6;font-weight:bold;" lambda v: "background-color:#e6ffe6;font-weight:bold;"
if pd.notna(v) and v <= threshold if pd.notna(v) and v <= threshold
@ -257,17 +262,20 @@ def highlight_ratio_columns(styler: "pd.io.formats.style.Styler"):
if not ratio_cols: if not ratio_cols:
return styler return styler
# highlight cells # Highlight entire column (cells)
styler = styler.apply( styler = styler.apply(
lambda _: ["background-color: #fff3b0"] * len(styler.data), lambda _: ["background-color: #fff3b0"] * len(styler.data),
subset=ratio_cols, subset=ratio_cols,
axis=0, axis=0,
) )
# highlight headers # Highlight column headers
styler = styler.set_table_styles( styler = styler.set_table_styles(
[ [
{"selector": f"th.col_heading.level0.col{i}", "props": [("background-color", "#fff3b0")]} {
"selector": f"th.col_heading.level0.col{i}",
"props": [("background-color", "#fff3b0")],
}
for i, col in enumerate(styler.data.columns) for i, col in enumerate(styler.data.columns)
if col in ratio_cols if col in ratio_cols
], ],
@ -296,14 +304,17 @@ def _add_limit_line(fig, y_value: float, label: str):
x=[None], x=[None],
y=[None], y=[None],
mode="lines", mode="lines",
line=dict(dash="dash", color="red" if "ttft" in label.lower() else "blue"), line=dict(
dash="dash",
color="red" if "ttft" in label.lower() else "blue",
),
name=label, name=label,
) )
) )
# ----------------------------- # -----------------------------
# Refactored "main" # Refactored main + group-first report
# ----------------------------- # -----------------------------
@dataclass(frozen=True) @dataclass(frozen=True)
class MetricPlan: class MetricPlan:
@ -343,11 +354,14 @@ def build_parser() -> argparse.ArgumentParser:
def choose_metrics(latency: str) -> MetricPlan: def choose_metrics(latency: str) -> MetricPlan:
latency = (latency or "").lower() latency = (latency or "").lower()
drop_column = "P99" drop_column = "P99"
if "median" in latency: if "median" in latency:
return MetricPlan( return MetricPlan(
data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"], data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"],
drop_column=drop_column, drop_column=drop_column,
) )
# default: p99
return MetricPlan( return MetricPlan(
data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"], data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"],
drop_column=drop_column, drop_column=drop_column,
@ -357,11 +371,13 @@ def choose_metrics(latency: str) -> MetricPlan:
def prepare_input_files(args, info_cols: List[str]) -> Tuple[List[str], List[str]]: def prepare_input_files(args, info_cols: List[str]) -> Tuple[List[str], List[str]]:
if not args.file: if not args.file:
raise ValueError("No input files provided. Use -f/--file.") raise ValueError("No input files provided. Use -f/--file.")
if len(args.file) == 1: if len(args.file) == 1:
files = split_json_by_tp_pp(args.file[0], output_root="splits") files = split_json_by_tp_pp(args.file[0], output_root="splits")
info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")] info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")]
else: else:
files = args.file files = args.file
return files, info_cols return files, info_cols
@ -371,6 +387,7 @@ def get_y_axis_col(info_cols: List[str], xaxis: str) -> str:
def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]: def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]:
# Your current grouping rule: first 4 info columns
filtered_info_cols = info_cols[:4] filtered_info_cols = info_cols[:4]
group_cols = [c for c in filtered_info_cols if c in output_df.columns] group_cols = [c for c in filtered_info_cols if c in output_df.columns]
if not group_cols: if not group_cols:
@ -381,27 +398,38 @@ def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]:
return group_cols return group_cols
def group_suffix(group_cols: List[str], name) -> str: def normalize_group_key(name):
name_vals = name if isinstance(name, tuple) else (name,) """Pandas group key can be scalar (1 col) or tuple (N cols). Normalize to tuple."""
return " , ".join(f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals)) return name if isinstance(name, tuple) else (name,)
def group_filename(name, prefix: str = "perf_comparison_") -> str: def group_filename(name, prefix: str = "perf_comparison_") -> str:
name_vals = name if isinstance(name, tuple) else (name,) name_vals = normalize_group_key(name)
safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-") safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-")
return f"{prefix}{safe}.html" return f"{prefix}{safe}.html"
def render_metric_table_html(display_group: pd.DataFrame, metric_label: str, suffix: str, args) -> str: def build_group_suffix(group_cols: List[str], name) -> str:
name_vals = normalize_group_key(name)
return " , ".join(
f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals)
)
def render_metric_table_html(
display_group: pd.DataFrame,
metric_label: str,
group_suffix: str,
args,
) -> str:
title = ( title = (
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">' f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
f'{_html.escape(metric_label)}' f'{_html.escape(metric_label)}'
f'{_html.escape(suffix)}' f'{_html.escape(group_suffix)}'
f"</div>\n" f"</div>\n"
) )
metric_name = metric_label.lower() metric_name = metric_label.lower()
if "ttft" in metric_name: if "ttft" in metric_name:
styler = _highlight_threshold(display_group, args.ttft_max_ms) styler = _highlight_threshold(display_group, args.ttft_max_ms)
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name): elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
@ -409,7 +437,6 @@ def render_metric_table_html(display_group: pd.DataFrame, metric_label: str, suf
else: else:
styler = display_group.style styler = display_group.style
# format numbers + highlight ratios
styler = styler.format( styler = styler.format(
{c: "{:.2f}" for c in display_group.select_dtypes("number").columns}, {c: "{:.2f}" for c in display_group.select_dtypes("number").columns},
na_rep="", na_rep="",
@ -460,41 +487,106 @@ def maybe_write_plot(
sub_fh.write(html) sub_fh.write(html)
def write_report(files: List[str], info_cols: List[str], plan: MetricPlan, args): def build_group_keys(df: pd.DataFrame, group_cols: List[str], sort_cols: List[str] | None = None):
"""Return a stable list of group keys from df."""
if sort_cols:
df = df.sort_values(by=sort_cols)
gb = df.groupby(group_cols, dropna=False)
return [k for k, _ in gb]
def write_report_group_first(files: List[str], info_cols: List[str], plan: MetricPlan, args):
"""
Group-first layout:
For each group, emit tok/s then TTFT then TPOT (or Median variants) together.
"""
name_column = "Test name" name_column = "Test name"
y_axis_col = get_y_axis_col(info_cols, args.xaxis) y_axis_col = get_y_axis_col(info_cols, args.xaxis)
print("comparing : " + ", ".join(files)) print("comparing : " + ", ".join(files))
# Precompute per-metric dataframes once
metric_cache: Dict[str, Tuple[pd.DataFrame, List[str]]] = {}
group_cols_canonical: List[str] | None = None
for metric_label in plan.data_cols:
output_df, raw_data_cols = compare_data_columns(
files,
name_column,
metric_label,
info_cols,
plan.drop_column,
debug=args.debug,
)
# plot expects y-axis column at the front
raw_data_cols = list(raw_data_cols)
raw_data_cols.insert(0, y_axis_col)
group_cols = get_group_cols(output_df, info_cols)
if group_cols_canonical is None:
group_cols_canonical = group_cols
else:
# keep intersection (stable order)
group_cols_canonical = [c for c in group_cols_canonical if c in group_cols]
metric_cache[metric_label] = (output_df.sort_values(by=args.xaxis), raw_data_cols)
if not group_cols_canonical:
raise ValueError("No canonical group columns found across metrics.")
# Canonical group keys from first metric (typically tok/s)
first_metric = plan.data_cols[0]
first_df_sorted, _ = metric_cache[first_metric]
group_keys = build_group_keys(first_df_sorted, group_cols_canonical, sort_cols=[args.xaxis])
# Pre-build groupby objects per metric
metric_groupbys = {
metric_label: df.groupby(group_cols_canonical, dropna=False)
for metric_label, (df, _) in metric_cache.items()
}
with open("perf_comparison.html", "w") as main_fh: with open("perf_comparison.html", "w") as main_fh:
for metric_label in plan.data_cols: for gkey in group_keys:
output_df, raw_data_cols = compare_data_columns( gkey_tuple = normalize_group_key(gkey)
files, suffix = build_group_suffix(group_cols_canonical, gkey_tuple)
name_column, sub_path = group_filename(gkey_tuple)
metric_label,
info_cols, # Optional group header (separates each group visually)
plan.drop_column, group_header = (
debug=args.debug, f'<div style="font-size: 1.4em; font-weight: 700; margin: 18px 0 10px 0;">'
f'{_html.escape(suffix)}'
f"</div>\n"
) )
raw_data_cols = list(raw_data_cols) main_fh.write(group_header)
raw_data_cols.insert(0, y_axis_col) with open(sub_path, "w") as sub_fh:
sub_fh.write(group_header)
group_cols = get_group_cols(output_df, info_cols) for metric_label in plan.data_cols:
gb = metric_groupbys[metric_label]
df_sorted, raw_data_cols = metric_cache[metric_label]
output_df_sorted = output_df.sort_values(by=args.xaxis) try:
for name, group_df in output_df_sorted.groupby(group_cols, dropna=False): group_df = gb.get_group(gkey)
suffix = group_suffix(group_cols, name) except KeyError:
sub_path = group_filename(name) missing = (
f'<div style="font-size: 1.1em; font-weight: 600; margin: 10px 0;">'
f'{_html.escape(metric_label)} — missing for this group'
f"</div>\n"
)
main_fh.write(missing)
sub_fh.write(missing)
continue
# drop group columns from display only # Display-only: drop group columns
display_group = group_df.drop(columns=group_cols, errors="ignore") display_group = group_df.drop(columns=group_cols_canonical, errors="ignore")
html = render_metric_table_html(display_group, metric_label, suffix, args) html = render_metric_table_html(display_group, metric_label, suffix, args)
main_fh.write(html) main_fh.write(html)
with open(sub_path, "a+") as sub_fh:
sub_fh.write(html) sub_fh.write(html)
maybe_write_plot( maybe_write_plot(
main_fh, main_fh,
sub_fh, sub_fh,
@ -513,7 +605,9 @@ def main():
plan = choose_metrics(args.latency) plan = choose_metrics(args.latency)
files, info_cols = prepare_input_files(args, info_cols) files, info_cols = prepare_input_files(args, info_cols)
write_report(files, info_cols, plan, args)
# Group-first report layout
write_report_group_first(files, info_cols, plan, args)
if __name__ == "__main__": if __name__ == "__main__":