mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-10 18:50:15 +08:00
group-first report instead of data-column-first
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
This commit is contained in:
parent
63ebc2336d
commit
0e01150cb4
@ -9,7 +9,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from importlib import util
|
from importlib import util
|
||||||
from typing import List, Tuple
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
@ -51,11 +51,11 @@ def compare_data_columns(
|
|||||||
print("\ncompare_data_column:", data_column)
|
print("\ncompare_data_column:", data_column)
|
||||||
|
|
||||||
frames = []
|
frames = []
|
||||||
raw_data_cols = []
|
raw_data_cols: List[str] = []
|
||||||
compare_frames = []
|
compare_frames = []
|
||||||
|
|
||||||
# 1) choose a canonical key list from info_cols that exists in ALL files
|
# 1) choose a canonical key list from info_cols that exists in ALL files
|
||||||
cols_per_file = []
|
cols_per_file: List[set] = []
|
||||||
for f in files:
|
for f in files:
|
||||||
try:
|
try:
|
||||||
df_tmp = pd.read_json(f, orient="records")
|
df_tmp = pd.read_json(f, orient="records")
|
||||||
@ -143,10 +143,7 @@ def compare_data_columns(
|
|||||||
ratio.name = f"Ratio 1 vs {len(compare_frames)}"
|
ratio.name = f"Ratio 1 vs {len(compare_frames)}"
|
||||||
frames.append(ratio)
|
frames.append(ratio)
|
||||||
|
|
||||||
concat_df = pd.concat(frames, axis=1)
|
concat_df = pd.concat(frames, axis=1).reset_index(drop=True)
|
||||||
|
|
||||||
# NOTE: meta already contains key columns as normal columns, so we can drop the index cleanly.
|
|
||||||
concat_df = concat_df.reset_index(drop=True)
|
|
||||||
|
|
||||||
# Ensure key/info columns appear first (in your info_cols order)
|
# Ensure key/info columns appear first (in your info_cols order)
|
||||||
front = [c for c in info_cols if c in concat_df.columns]
|
front = [c for c in info_cols if c in concat_df.columns]
|
||||||
@ -158,7 +155,7 @@ def compare_data_columns(
|
|||||||
|
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Split helper (restored)
|
# Split helper
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
def split_json_by_tp_pp(
|
def split_json_by_tp_pp(
|
||||||
input_file: str = "benchmark_results.json", output_root: str = "."
|
input_file: str = "benchmark_results.json", output_root: str = "."
|
||||||
@ -231,6 +228,7 @@ def _find_concurrency_col(df: pd.DataFrame) -> str:
|
|||||||
]:
|
]:
|
||||||
if c in df.columns:
|
if c in df.columns:
|
||||||
return c
|
return c
|
||||||
|
# Fallback: guess an integer-like column (harmless if unused)
|
||||||
for c in df.columns:
|
for c in df.columns:
|
||||||
if df[c].dtype.kind in "iu" and df[c].nunique() > 1 and df[c].min() >= 1:
|
if df[c].dtype.kind in "iu" and df[c].nunique() > 1 and df[c].min() >= 1:
|
||||||
return c
|
return c
|
||||||
@ -240,9 +238,16 @@ def _find_concurrency_col(df: pd.DataFrame) -> str:
|
|||||||
def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.style.Styler":
|
def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.style.Styler":
|
||||||
"""Highlight numeric per-configuration columns with value <= threshold."""
|
"""Highlight numeric per-configuration columns with value <= threshold."""
|
||||||
conc_col = _find_concurrency_col(df)
|
conc_col = _find_concurrency_col(df)
|
||||||
key_cols = [c for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col] if c in df.columns]
|
key_cols = [
|
||||||
conf_cols = [c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")]
|
c
|
||||||
|
for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col]
|
||||||
|
if c in df.columns
|
||||||
|
]
|
||||||
|
conf_cols = [
|
||||||
|
c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")
|
||||||
|
]
|
||||||
conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]
|
conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]
|
||||||
|
|
||||||
return df.style.map(
|
return df.style.map(
|
||||||
lambda v: "background-color:#e6ffe6;font-weight:bold;"
|
lambda v: "background-color:#e6ffe6;font-weight:bold;"
|
||||||
if pd.notna(v) and v <= threshold
|
if pd.notna(v) and v <= threshold
|
||||||
@ -257,17 +262,20 @@ def highlight_ratio_columns(styler: "pd.io.formats.style.Styler"):
|
|||||||
if not ratio_cols:
|
if not ratio_cols:
|
||||||
return styler
|
return styler
|
||||||
|
|
||||||
# highlight cells
|
# Highlight entire column (cells)
|
||||||
styler = styler.apply(
|
styler = styler.apply(
|
||||||
lambda _: ["background-color: #fff3b0"] * len(styler.data),
|
lambda _: ["background-color: #fff3b0"] * len(styler.data),
|
||||||
subset=ratio_cols,
|
subset=ratio_cols,
|
||||||
axis=0,
|
axis=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# highlight headers
|
# Highlight column headers
|
||||||
styler = styler.set_table_styles(
|
styler = styler.set_table_styles(
|
||||||
[
|
[
|
||||||
{"selector": f"th.col_heading.level0.col{i}", "props": [("background-color", "#fff3b0")]}
|
{
|
||||||
|
"selector": f"th.col_heading.level0.col{i}",
|
||||||
|
"props": [("background-color", "#fff3b0")],
|
||||||
|
}
|
||||||
for i, col in enumerate(styler.data.columns)
|
for i, col in enumerate(styler.data.columns)
|
||||||
if col in ratio_cols
|
if col in ratio_cols
|
||||||
],
|
],
|
||||||
@ -296,14 +304,17 @@ def _add_limit_line(fig, y_value: float, label: str):
|
|||||||
x=[None],
|
x=[None],
|
||||||
y=[None],
|
y=[None],
|
||||||
mode="lines",
|
mode="lines",
|
||||||
line=dict(dash="dash", color="red" if "ttft" in label.lower() else "blue"),
|
line=dict(
|
||||||
|
dash="dash",
|
||||||
|
color="red" if "ttft" in label.lower() else "blue",
|
||||||
|
),
|
||||||
name=label,
|
name=label,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Refactored "main"
|
# Refactored main + group-first report
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class MetricPlan:
|
class MetricPlan:
|
||||||
@ -343,11 +354,14 @@ def build_parser() -> argparse.ArgumentParser:
|
|||||||
def choose_metrics(latency: str) -> MetricPlan:
|
def choose_metrics(latency: str) -> MetricPlan:
|
||||||
latency = (latency or "").lower()
|
latency = (latency or "").lower()
|
||||||
drop_column = "P99"
|
drop_column = "P99"
|
||||||
|
|
||||||
if "median" in latency:
|
if "median" in latency:
|
||||||
return MetricPlan(
|
return MetricPlan(
|
||||||
data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"],
|
data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"],
|
||||||
drop_column=drop_column,
|
drop_column=drop_column,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# default: p99
|
||||||
return MetricPlan(
|
return MetricPlan(
|
||||||
data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"],
|
data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"],
|
||||||
drop_column=drop_column,
|
drop_column=drop_column,
|
||||||
@ -357,11 +371,13 @@ def choose_metrics(latency: str) -> MetricPlan:
|
|||||||
def prepare_input_files(args, info_cols: List[str]) -> Tuple[List[str], List[str]]:
|
def prepare_input_files(args, info_cols: List[str]) -> Tuple[List[str], List[str]]:
|
||||||
if not args.file:
|
if not args.file:
|
||||||
raise ValueError("No input files provided. Use -f/--file.")
|
raise ValueError("No input files provided. Use -f/--file.")
|
||||||
|
|
||||||
if len(args.file) == 1:
|
if len(args.file) == 1:
|
||||||
files = split_json_by_tp_pp(args.file[0], output_root="splits")
|
files = split_json_by_tp_pp(args.file[0], output_root="splits")
|
||||||
info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")]
|
info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")]
|
||||||
else:
|
else:
|
||||||
files = args.file
|
files = args.file
|
||||||
|
|
||||||
return files, info_cols
|
return files, info_cols
|
||||||
|
|
||||||
|
|
||||||
@ -371,6 +387,7 @@ def get_y_axis_col(info_cols: List[str], xaxis: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]:
|
def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]:
|
||||||
|
# Your current grouping rule: first 4 info columns
|
||||||
filtered_info_cols = info_cols[:4]
|
filtered_info_cols = info_cols[:4]
|
||||||
group_cols = [c for c in filtered_info_cols if c in output_df.columns]
|
group_cols = [c for c in filtered_info_cols if c in output_df.columns]
|
||||||
if not group_cols:
|
if not group_cols:
|
||||||
@ -381,27 +398,38 @@ def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]:
|
|||||||
return group_cols
|
return group_cols
|
||||||
|
|
||||||
|
|
||||||
def group_suffix(group_cols: List[str], name) -> str:
|
def normalize_group_key(name):
|
||||||
name_vals = name if isinstance(name, tuple) else (name,)
|
"""Pandas group key can be scalar (1 col) or tuple (N cols). Normalize to tuple."""
|
||||||
return " , ".join(f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals))
|
return name if isinstance(name, tuple) else (name,)
|
||||||
|
|
||||||
|
|
||||||
def group_filename(name, prefix: str = "perf_comparison_") -> str:
|
def group_filename(name, prefix: str = "perf_comparison_") -> str:
|
||||||
name_vals = name if isinstance(name, tuple) else (name,)
|
name_vals = normalize_group_key(name)
|
||||||
safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-")
|
safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-")
|
||||||
return f"{prefix}{safe}.html"
|
return f"{prefix}{safe}.html"
|
||||||
|
|
||||||
|
|
||||||
def render_metric_table_html(display_group: pd.DataFrame, metric_label: str, suffix: str, args) -> str:
|
def build_group_suffix(group_cols: List[str], name) -> str:
|
||||||
|
name_vals = normalize_group_key(name)
|
||||||
|
return " , ".join(
|
||||||
|
f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def render_metric_table_html(
|
||||||
|
display_group: pd.DataFrame,
|
||||||
|
metric_label: str,
|
||||||
|
group_suffix: str,
|
||||||
|
args,
|
||||||
|
) -> str:
|
||||||
title = (
|
title = (
|
||||||
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
|
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
|
||||||
f'{_html.escape(metric_label)}'
|
f'{_html.escape(metric_label)}'
|
||||||
f' — {_html.escape(suffix)}'
|
f' — {_html.escape(group_suffix)}'
|
||||||
f"</div>\n"
|
f"</div>\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
metric_name = metric_label.lower()
|
metric_name = metric_label.lower()
|
||||||
|
|
||||||
if "ttft" in metric_name:
|
if "ttft" in metric_name:
|
||||||
styler = _highlight_threshold(display_group, args.ttft_max_ms)
|
styler = _highlight_threshold(display_group, args.ttft_max_ms)
|
||||||
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
|
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
|
||||||
@ -409,7 +437,6 @@ def render_metric_table_html(display_group: pd.DataFrame, metric_label: str, suf
|
|||||||
else:
|
else:
|
||||||
styler = display_group.style
|
styler = display_group.style
|
||||||
|
|
||||||
# format numbers + highlight ratios
|
|
||||||
styler = styler.format(
|
styler = styler.format(
|
||||||
{c: "{:.2f}" for c in display_group.select_dtypes("number").columns},
|
{c: "{:.2f}" for c in display_group.select_dtypes("number").columns},
|
||||||
na_rep="—",
|
na_rep="—",
|
||||||
@ -460,41 +487,106 @@ def maybe_write_plot(
|
|||||||
sub_fh.write(html)
|
sub_fh.write(html)
|
||||||
|
|
||||||
|
|
||||||
def write_report(files: List[str], info_cols: List[str], plan: MetricPlan, args):
|
def build_group_keys(df: pd.DataFrame, group_cols: List[str], sort_cols: List[str] | None = None):
|
||||||
|
"""Return a stable list of group keys from df."""
|
||||||
|
if sort_cols:
|
||||||
|
df = df.sort_values(by=sort_cols)
|
||||||
|
gb = df.groupby(group_cols, dropna=False)
|
||||||
|
return [k for k, _ in gb]
|
||||||
|
|
||||||
|
|
||||||
|
def write_report_group_first(files: List[str], info_cols: List[str], plan: MetricPlan, args):
|
||||||
|
"""
|
||||||
|
Group-first layout:
|
||||||
|
For each group, emit tok/s then TTFT then TPOT (or Median variants) together.
|
||||||
|
"""
|
||||||
name_column = "Test name"
|
name_column = "Test name"
|
||||||
y_axis_col = get_y_axis_col(info_cols, args.xaxis)
|
y_axis_col = get_y_axis_col(info_cols, args.xaxis)
|
||||||
|
|
||||||
print("comparing : " + ", ".join(files))
|
print("comparing : " + ", ".join(files))
|
||||||
|
|
||||||
|
# Precompute per-metric dataframes once
|
||||||
|
metric_cache: Dict[str, Tuple[pd.DataFrame, List[str]]] = {}
|
||||||
|
group_cols_canonical: List[str] | None = None
|
||||||
|
|
||||||
|
for metric_label in plan.data_cols:
|
||||||
|
output_df, raw_data_cols = compare_data_columns(
|
||||||
|
files,
|
||||||
|
name_column,
|
||||||
|
metric_label,
|
||||||
|
info_cols,
|
||||||
|
plan.drop_column,
|
||||||
|
debug=args.debug,
|
||||||
|
)
|
||||||
|
|
||||||
|
# plot expects y-axis column at the front
|
||||||
|
raw_data_cols = list(raw_data_cols)
|
||||||
|
raw_data_cols.insert(0, y_axis_col)
|
||||||
|
|
||||||
|
group_cols = get_group_cols(output_df, info_cols)
|
||||||
|
if group_cols_canonical is None:
|
||||||
|
group_cols_canonical = group_cols
|
||||||
|
else:
|
||||||
|
# keep intersection (stable order)
|
||||||
|
group_cols_canonical = [c for c in group_cols_canonical if c in group_cols]
|
||||||
|
|
||||||
|
metric_cache[metric_label] = (output_df.sort_values(by=args.xaxis), raw_data_cols)
|
||||||
|
|
||||||
|
if not group_cols_canonical:
|
||||||
|
raise ValueError("No canonical group columns found across metrics.")
|
||||||
|
|
||||||
|
# Canonical group keys from first metric (typically tok/s)
|
||||||
|
first_metric = plan.data_cols[0]
|
||||||
|
first_df_sorted, _ = metric_cache[first_metric]
|
||||||
|
group_keys = build_group_keys(first_df_sorted, group_cols_canonical, sort_cols=[args.xaxis])
|
||||||
|
|
||||||
|
# Pre-build groupby objects per metric
|
||||||
|
metric_groupbys = {
|
||||||
|
metric_label: df.groupby(group_cols_canonical, dropna=False)
|
||||||
|
for metric_label, (df, _) in metric_cache.items()
|
||||||
|
}
|
||||||
|
|
||||||
with open("perf_comparison.html", "w") as main_fh:
|
with open("perf_comparison.html", "w") as main_fh:
|
||||||
for metric_label in plan.data_cols:
|
for gkey in group_keys:
|
||||||
output_df, raw_data_cols = compare_data_columns(
|
gkey_tuple = normalize_group_key(gkey)
|
||||||
files,
|
suffix = build_group_suffix(group_cols_canonical, gkey_tuple)
|
||||||
name_column,
|
sub_path = group_filename(gkey_tuple)
|
||||||
metric_label,
|
|
||||||
info_cols,
|
# Optional group header (separates each group visually)
|
||||||
plan.drop_column,
|
group_header = (
|
||||||
debug=args.debug,
|
f'<div style="font-size: 1.4em; font-weight: 700; margin: 18px 0 10px 0;">'
|
||||||
|
f'{_html.escape(suffix)}'
|
||||||
|
f"</div>\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
raw_data_cols = list(raw_data_cols)
|
main_fh.write(group_header)
|
||||||
raw_data_cols.insert(0, y_axis_col)
|
with open(sub_path, "w") as sub_fh:
|
||||||
|
sub_fh.write(group_header)
|
||||||
|
|
||||||
group_cols = get_group_cols(output_df, info_cols)
|
for metric_label in plan.data_cols:
|
||||||
|
gb = metric_groupbys[metric_label]
|
||||||
|
df_sorted, raw_data_cols = metric_cache[metric_label]
|
||||||
|
|
||||||
output_df_sorted = output_df.sort_values(by=args.xaxis)
|
try:
|
||||||
for name, group_df in output_df_sorted.groupby(group_cols, dropna=False):
|
group_df = gb.get_group(gkey)
|
||||||
suffix = group_suffix(group_cols, name)
|
except KeyError:
|
||||||
sub_path = group_filename(name)
|
missing = (
|
||||||
|
f'<div style="font-size: 1.1em; font-weight: 600; margin: 10px 0;">'
|
||||||
|
f'{_html.escape(metric_label)} — missing for this group'
|
||||||
|
f"</div>\n"
|
||||||
|
)
|
||||||
|
main_fh.write(missing)
|
||||||
|
sub_fh.write(missing)
|
||||||
|
continue
|
||||||
|
|
||||||
# drop group columns from display only
|
# Display-only: drop group columns
|
||||||
display_group = group_df.drop(columns=group_cols, errors="ignore")
|
display_group = group_df.drop(columns=group_cols_canonical, errors="ignore")
|
||||||
|
|
||||||
html = render_metric_table_html(display_group, metric_label, suffix, args)
|
html = render_metric_table_html(display_group, metric_label, suffix, args)
|
||||||
|
|
||||||
main_fh.write(html)
|
main_fh.write(html)
|
||||||
with open(sub_path, "a+") as sub_fh:
|
|
||||||
sub_fh.write(html)
|
sub_fh.write(html)
|
||||||
|
|
||||||
maybe_write_plot(
|
maybe_write_plot(
|
||||||
main_fh,
|
main_fh,
|
||||||
sub_fh,
|
sub_fh,
|
||||||
@ -513,7 +605,9 @@ def main():
|
|||||||
plan = choose_metrics(args.latency)
|
plan = choose_metrics(args.latency)
|
||||||
|
|
||||||
files, info_cols = prepare_input_files(args, info_cols)
|
files, info_cols = prepare_input_files(args, info_cols)
|
||||||
write_report(files, info_cols, plan, args)
|
|
||||||
|
# Group-first report layout
|
||||||
|
write_report_group_first(files, info_cols, plan, args)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user