diff --git a/.buildkite/performance-benchmarks/scripts/compare-json-results.py b/.buildkite/performance-benchmarks/scripts/compare-json-results.py index 9d2b212d8b3b0..7ad92c2db40d4 100644 --- a/.buildkite/performance-benchmarks/scripts/compare-json-results.py +++ b/.buildkite/performance-benchmarks/scripts/compare-json-results.py @@ -9,7 +9,6 @@ import json import os from dataclasses import dataclass from importlib import util -from typing import Dict, List, Tuple import pandas as pd @@ -36,10 +35,10 @@ pd.set_option("display.float_format", lambda x: f"{x:.2f}") # Core data compare # ----------------------------- def compare_data_columns( - files: List[str], + files: list[str], name_column: str, data_column: str, - info_cols: List[str], + info_cols: list[str], drop_column: str, debug: bool = False, ): @@ -55,10 +54,10 @@ def compare_data_columns( print("\ncompare_data_column:", data_column) frames = [] - raw_data_cols: List[str] = [] + raw_data_cols: list[str] = [] compare_frames = [] - cols_per_file: List[set] = [] + cols_per_file: list[set] = [] for f in files: try: df_tmp = pd.read_json(f, orient="records") @@ -150,7 +149,7 @@ def compare_data_columns( # ----------------------------- def split_json_by_tp_pp( input_file: str = "benchmark_results.json", output_root: str = "." -) -> List[str]: +) -> list[str]: with open(input_file, encoding="utf-8") as f: data = json.load(f) @@ -166,7 +165,9 @@ def split_json_by_tp_pp( (c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None ) if name_col: - df = df[df[name_col].astype(str).str.contains(r"serving", case=False, na=False)].copy() + df = df[ + df[name_col].astype(str).str.contains(r"serving", case=False, na=False) + ].copy() rename_map = { "tp_size": "TP Size", @@ -174,7 +175,9 @@ def split_json_by_tp_pp( "pp_size": "PP Size", "pipeline_parallel_size": "PP Size", } - df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True) + df.rename( + columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True + ) if "TP Size" not in df.columns: df["TP Size"] = 1 @@ -184,7 +187,7 @@ def split_json_by_tp_pp( df["TP Size"] = pd.to_numeric(df["TP Size"], errors="coerce").fillna(1).astype(int) df["PP Size"] = pd.to_numeric(df["PP Size"], errors="coerce").fillna(1).astype(int) - saved_paths: List[str] = [] + saved_paths: list[str] = [] for (tp, pp), group_df in df.groupby(["TP Size", "PP Size"], dropna=False): folder_name = os.path.join(output_root, f"tp{int(tp)}_pp{int(pp)}") os.makedirs(folder_name, exist_ok=True) @@ -215,7 +218,9 @@ def _find_concurrency_col(df: pd.DataFrame) -> str: return "# of max concurrency." -def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.style.Styler": +def _highlight_threshold( + df: pd.DataFrame, threshold: float +) -> pd.io.formats.style.Styler: conc_col = _find_concurrency_col(df) key_cols = [ c @@ -235,7 +240,7 @@ def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.s ) -def highlight_ratio_columns(styler: "pd.io.formats.style.Styler"): +def highlight_ratio_columns(styler: pd.io.formats.style.Styler): ratio_cols = [c for c in styler.data.columns if "ratio" in str(c).lower()] if not ratio_cols: return styler @@ -260,7 +265,9 @@ def highlight_ratio_columns(styler: "pd.io.formats.style.Styler"): return styler -def _apply_two_decimals(styler: "pd.io.formats.style.Styler") -> "pd.io.formats.style.Styler": +def _apply_two_decimals( + styler: pd.io.formats.style.Styler, +) -> pd.io.formats.style.Styler: df = styler.data num_cols = df.select_dtypes("number").columns if len(num_cols) == 0: @@ -271,11 +278,15 @@ def _apply_two_decimals(styler: "pd.io.formats.style.Styler") -> "pd.io.formats. # ----------------------------- # Valid max concurrency summary helpers # ----------------------------- -def _config_value_columns(df: pd.DataFrame, conc_col: str) -> List[str]: - key_cols = [c for c in ["Model", "Dataset Name", "Input Len", "Output Len"] if c in df.columns] +def _config_value_columns(df: pd.DataFrame, conc_col: str) -> list[str]: + key_cols = [ + c + for c in ["Model", "Dataset Name", "Input Len", "Output Len"] + if c in df.columns + ] exclude = set(key_cols + [conc_col, "qps", "QPS"]) - cols: List[str] = [] + cols: list[str] = [] for c in df.columns: if c in exclude: continue @@ -289,7 +300,9 @@ def _config_value_columns(df: pd.DataFrame, conc_col: str) -> List[str]: return cols -def _max_concurrency_ok(df: pd.DataFrame, conc_col: str, cfg_col: str, threshold: float): +def _max_concurrency_ok( + df: pd.DataFrame, conc_col: str, cfg_col: str, threshold: float +): if df is None or conc_col not in df.columns or cfg_col not in df.columns: return pd.NA @@ -309,7 +322,12 @@ def _max_concurrency_ok(df: pd.DataFrame, conc_col: str, cfg_col: str, threshold def _value_at_concurrency(df: pd.DataFrame, conc_col: str, cfg_col: str, conc_value): - if df is None or conc_col not in df.columns or cfg_col not in df.columns or pd.isna(conc_value): + if ( + df is None + or conc_col not in df.columns + or cfg_col not in df.columns + or pd.isna(conc_value) + ): return pd.NA d = df[[conc_col, cfg_col]].copy() @@ -336,9 +354,21 @@ def build_valid_max_concurrency_summary_html( if ttft_group_df is None and tpot_group_df is None: return "" - ttft_cols = _config_value_columns(ttft_group_df, conc_col) if ttft_group_df is not None else [] - tpot_cols = _config_value_columns(tpot_group_df, conc_col) if tpot_group_df is not None else [] - tput_cols = _config_value_columns(tput_group_df, conc_col) if tput_group_df is not None else [] + ttft_cols = ( + _config_value_columns(ttft_group_df, conc_col) + if ttft_group_df is not None + else [] + ) + tpot_cols = ( + _config_value_columns(tpot_group_df, conc_col) + if tpot_group_df is not None + else [] + ) + tput_cols = ( + _config_value_columns(tput_group_df, conc_col) + if tput_group_df is not None + else [] + ) if ttft_group_df is not None and tpot_group_df is not None: cfg_cols = [c for c in ttft_cols if c in tpot_cols] @@ -352,13 +382,37 @@ def build_valid_max_concurrency_summary_html( rows = [] for cfg in cfg_cols: - ttft_max = _max_concurrency_ok(ttft_group_df, conc_col, cfg, args.ttft_max_ms) if ttft_group_df is not None else pd.NA - tpot_max = _max_concurrency_ok(tpot_group_df, conc_col, cfg, args.tpot_max_ms) if tpot_group_df is not None else pd.NA - both = pd.NA if (pd.isna(ttft_max) or pd.isna(tpot_max)) else min(ttft_max, tpot_max) + ttft_max = ( + _max_concurrency_ok(ttft_group_df, conc_col, cfg, args.ttft_max_ms) + if ttft_group_df is not None + else pd.NA + ) + tpot_max = ( + _max_concurrency_ok(tpot_group_df, conc_col, cfg, args.tpot_max_ms) + if tpot_group_df is not None + else pd.NA + ) + both = ( + pd.NA + if (pd.isna(ttft_max) or pd.isna(tpot_max)) + else min(ttft_max, tpot_max) + ) - tput_at_both = _value_at_concurrency(tput_group_df, conc_col, cfg, both) if tput_group_df is not None else pd.NA - ttft_at_both = _value_at_concurrency(ttft_group_df, conc_col, cfg, both) if ttft_group_df is not None else pd.NA - tpot_at_both = _value_at_concurrency(tpot_group_df, conc_col, cfg, both) if tpot_group_df is not None else pd.NA + tput_at_both = ( + _value_at_concurrency(tput_group_df, conc_col, cfg, both) + if tput_group_df is not None + else pd.NA + ) + ttft_at_both = ( + _value_at_concurrency(ttft_group_df, conc_col, cfg, both) + if ttft_group_df is not None + else pd.NA + ) + tpot_at_both = ( + _value_at_concurrency(tpot_group_df, conc_col, cfg, both) + if tpot_group_df is not None + else pd.NA + ) rows.append( { @@ -388,7 +442,7 @@ def build_valid_max_concurrency_summary_html( if c == "Configuration": continue # default argument binds per-column formatter correctly - formatters[c] = (lambda v: "—" if pd.isna(v) else f"{float(v):.2f}") + formatters[c] = lambda v: "—" if pd.isna(v) else f"{float(v):.2f}" styler = summary_df.style.format(formatters) @@ -399,9 +453,9 @@ def build_valid_max_concurrency_summary_html( styler = styler.map(_green, subset=[both_col]) title = ( - f'