diff --git a/.buildkite/performance-benchmarks/scripts/compare-json-results.py b/.buildkite/performance-benchmarks/scripts/compare-json-results.py
index 9d2b212d8b3b0..7ad92c2db40d4 100644
--- a/.buildkite/performance-benchmarks/scripts/compare-json-results.py
+++ b/.buildkite/performance-benchmarks/scripts/compare-json-results.py
@@ -9,7 +9,6 @@ import json
 import os
 from dataclasses import dataclass
 from importlib import util
-from typing import Dict, List, Tuple
 
 import pandas as pd
 
@@ -36,10 +35,10 @@ pd.set_option("display.float_format", lambda x: f"{x:.2f}")
 # Core data compare
 # -----------------------------
 def compare_data_columns(
-    files: List[str],
+    files: list[str],
     name_column: str,
     data_column: str,
-    info_cols: List[str],
+    info_cols: list[str],
     drop_column: str,
     debug: bool = False,
 ):
@@ -55,10 +54,10 @@ def compare_data_columns(
     print("\ncompare_data_column:", data_column)
 
     frames = []
-    raw_data_cols: List[str] = []
+    raw_data_cols: list[str] = []
     compare_frames = []
 
-    cols_per_file: List[set] = []
+    cols_per_file: list[set] = []
     for f in files:
         try:
             df_tmp = pd.read_json(f, orient="records")
@@ -150,7 +149,7 @@ def compare_data_columns(
 # -----------------------------
 def split_json_by_tp_pp(
     input_file: str = "benchmark_results.json", output_root: str = "."
-) -> List[str]:
+) -> list[str]:
     with open(input_file, encoding="utf-8") as f:
         data = json.load(f)
 
@@ -166,7 +165,9 @@ def split_json_by_tp_pp(
         (c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None
     )
     if name_col:
-        df = df[df[name_col].astype(str).str.contains(r"serving", case=False, na=False)].copy()
+        df = df[
+            df[name_col].astype(str).str.contains(r"serving", case=False, na=False)
+        ].copy()
 
     rename_map = {
         "tp_size": "TP Size",
@@ -174,7 +175,9 @@ def split_json_by_tp_pp(
         "pp_size": "PP Size",
         "pipeline_parallel_size": "PP Size",
     }
-    df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
+    df.rename(
+        columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True
+    )
 
     if "TP Size" not in df.columns:
         df["TP Size"] = 1
@@ -184,7 +187,7 @@ def split_json_by_tp_pp(
     df["TP Size"] = pd.to_numeric(df["TP Size"], errors="coerce").fillna(1).astype(int)
     df["PP Size"] = pd.to_numeric(df["PP Size"], errors="coerce").fillna(1).astype(int)
 
-    saved_paths: List[str] = []
+    saved_paths: list[str] = []
     for (tp, pp), group_df in df.groupby(["TP Size", "PP Size"], dropna=False):
         folder_name = os.path.join(output_root, f"tp{int(tp)}_pp{int(pp)}")
         os.makedirs(folder_name, exist_ok=True)
@@ -215,7 +218,9 @@ def _find_concurrency_col(df: pd.DataFrame) -> str:
     return "# of max concurrency."
 
 
-def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.style.Styler":
+def _highlight_threshold(
+    df: pd.DataFrame, threshold: float
+) -> pd.io.formats.style.Styler:
     conc_col = _find_concurrency_col(df)
     key_cols = [
         c
@@ -235,7 +240,7 @@ def _highlight_threshold(df: pd.DataFrame, threshold: float) -> "pd.io.formats.s
     )
 
 
-def highlight_ratio_columns(styler: "pd.io.formats.style.Styler"):
+def highlight_ratio_columns(styler: pd.io.formats.style.Styler):
     ratio_cols = [c for c in styler.data.columns if "ratio" in str(c).lower()]
     if not ratio_cols:
         return styler
@@ -260,7 +265,9 @@ def highlight_ratio_columns(styler: "pd.io.formats.style.Styler"):
     return styler
 
 
-def _apply_two_decimals(styler: "pd.io.formats.style.Styler") -> "pd.io.formats.style.Styler":
+def _apply_two_decimals(
+    styler: pd.io.formats.style.Styler,
+) -> pd.io.formats.style.Styler:
     df = styler.data
     num_cols = df.select_dtypes("number").columns
     if len(num_cols) == 0:
@@ -271,11 +278,15 @@ def _apply_two_decimals(styler: "pd.io.formats.style.Styler") -> "pd.io.formats.
 # -----------------------------
 # Valid max concurrency summary helpers
 # -----------------------------
-def _config_value_columns(df: pd.DataFrame, conc_col: str) -> List[str]:
-    key_cols = [c for c in ["Model", "Dataset Name", "Input Len", "Output Len"] if c in df.columns]
+def _config_value_columns(df: pd.DataFrame, conc_col: str) -> list[str]:
+    key_cols = [
+        c
+        for c in ["Model", "Dataset Name", "Input Len", "Output Len"]
+        if c in df.columns
+    ]
     exclude = set(key_cols + [conc_col, "qps", "QPS"])
 
-    cols: List[str] = []
+    cols: list[str] = []
     for c in df.columns:
         if c in exclude:
             continue
@@ -289,7 +300,9 @@ def _config_value_columns(df: pd.DataFrame, conc_col: str) -> List[str]:
     return cols
 
 
-def _max_concurrency_ok(df: pd.DataFrame, conc_col: str, cfg_col: str, threshold: float):
+def _max_concurrency_ok(
+    df: pd.DataFrame, conc_col: str, cfg_col: str, threshold: float
+):
     if df is None or conc_col not in df.columns or cfg_col not in df.columns:
         return pd.NA
 
@@ -309,7 +322,12 @@ def _max_concurrency_ok(df: pd.DataFrame, conc_col: str, cfg_col: str, threshold
 
 
 def _value_at_concurrency(df: pd.DataFrame, conc_col: str, cfg_col: str, conc_value):
-    if df is None or conc_col not in df.columns or cfg_col not in df.columns or pd.isna(conc_value):
+    if (
+        df is None
+        or conc_col not in df.columns
+        or cfg_col not in df.columns
+        or pd.isna(conc_value)
+    ):
         return pd.NA
 
     d = df[[conc_col, cfg_col]].copy()
@@ -336,9 +354,21 @@ def build_valid_max_concurrency_summary_html(
     if ttft_group_df is None and tpot_group_df is None:
         return ""
 
-    ttft_cols = _config_value_columns(ttft_group_df, conc_col) if ttft_group_df is not None else []
-    tpot_cols = _config_value_columns(tpot_group_df, conc_col) if tpot_group_df is not None else []
-    tput_cols = _config_value_columns(tput_group_df, conc_col) if tput_group_df is not None else []
+    ttft_cols = (
+        _config_value_columns(ttft_group_df, conc_col)
+        if ttft_group_df is not None
+        else []
+    )
+    tpot_cols = (
+        _config_value_columns(tpot_group_df, conc_col)
+        if tpot_group_df is not None
+        else []
+    )
+    tput_cols = (
+        _config_value_columns(tput_group_df, conc_col)
+        if tput_group_df is not None
+        else []
+    )
 
     if ttft_group_df is not None and tpot_group_df is not None:
         cfg_cols = [c for c in ttft_cols if c in tpot_cols]
@@ -352,13 +382,37 @@ def build_valid_max_concurrency_summary_html(
 
     rows = []
     for cfg in cfg_cols:
-        ttft_max = _max_concurrency_ok(ttft_group_df, conc_col, cfg, args.ttft_max_ms) if ttft_group_df is not None else pd.NA
-        tpot_max = _max_concurrency_ok(tpot_group_df, conc_col, cfg, args.tpot_max_ms) if tpot_group_df is not None else pd.NA
-        both = pd.NA if (pd.isna(ttft_max) or pd.isna(tpot_max)) else min(ttft_max, tpot_max)
+        ttft_max = (
+            _max_concurrency_ok(ttft_group_df, conc_col, cfg, args.ttft_max_ms)
+            if ttft_group_df is not None
+            else pd.NA
+        )
+        tpot_max = (
+            _max_concurrency_ok(tpot_group_df, conc_col, cfg, args.tpot_max_ms)
+            if tpot_group_df is not None
+            else pd.NA
+        )
+        both = (
+            pd.NA
+            if (pd.isna(ttft_max) or pd.isna(tpot_max))
+            else min(ttft_max, tpot_max)
+        )
 
-        tput_at_both = _value_at_concurrency(tput_group_df, conc_col, cfg, both) if tput_group_df is not None else pd.NA
-        ttft_at_both = _value_at_concurrency(ttft_group_df, conc_col, cfg, both) if ttft_group_df is not None else pd.NA
-        tpot_at_both = _value_at_concurrency(tpot_group_df, conc_col, cfg, both) if tpot_group_df is not None else pd.NA
+        tput_at_both = (
+            _value_at_concurrency(tput_group_df, conc_col, cfg, both)
+            if tput_group_df is not None
+            else pd.NA
+        )
+        ttft_at_both = (
+            _value_at_concurrency(ttft_group_df, conc_col, cfg, both)
+            if ttft_group_df is not None
+            else pd.NA
+        )
+        tpot_at_both = (
+            _value_at_concurrency(tpot_group_df, conc_col, cfg, both)
+            if tpot_group_df is not None
+            else pd.NA
+        )
 
         rows.append(
             {
@@ -388,7 +442,7 @@ def build_valid_max_concurrency_summary_html(
         if c == "Configuration":
             continue
         # default argument binds per-column formatter correctly
-        formatters[c] = (lambda v: "—" if pd.isna(v) else f"{float(v):.2f}")
+        formatters[c] = lambda v: "—" if pd.isna(v) else f"{float(v):.2f}"
 
     styler = summary_df.style.format(formatters)
 
@@ -399,9 +453,9 @@ def build_valid_max_concurrency_summary_html(
         styler = styler.map(_green, subset=[both_col])
 
     title = (
-        f'<div style="font-size: 1.15em; font-weight: 700; margin: 12px 0 6px 0;">'
-        f'Valid Max Concurrency Summary'
-        f"</div>\n"
+        '<div style="font-size: 1.15em; font-weight: 700; margin: 12px 0 6px 0;">'
+        "Valid Max Concurrency Summary"
+        "</div>\n"
     )
     return title + styler.to_html(table_attributes='border="1" class="dataframe"')
 
@@ -439,14 +493,18 @@ def _add_limit_line(fig, y_value: float, label: str):
 # -----------------------------
 @dataclass(frozen=True)
 class MetricPlan:
-    data_cols: List[str]
+    data_cols: list[str]
     drop_column: str
 
 
 def build_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser()
-    parser.add_argument("-f", "--file", action="append", type=str, help="input file name")
-    parser.add_argument("--debug", action="store_true", help="show all information for debugging")
+    parser.add_argument(
+        "-f", "--file", action="append", type=str, help="input file name"
+    )
+    parser.add_argument(
+        "--debug", action="store_true", help="show all information for debugging"
+    )
     parser.add_argument(
         "--plot",
         action=argparse.BooleanOptionalAction,
@@ -467,8 +525,18 @@ def build_parser() -> argparse.ArgumentParser:
         default="p99",
         help="take median|p99 for latency like TTFT/TPOT",
     )
-    parser.add_argument("--ttft-max-ms", type=float, default=3000.0, help="Reference limit for TTFT plots (ms)")
-    parser.add_argument("--tpot-max-ms", type=float, default=100.0, help="Reference limit for TPOT plots (ms)")
+    parser.add_argument(
+        "--ttft-max-ms",
+        type=float,
+        default=3000.0,
+        help="Reference limit for TTFT plots (ms)",
+    )
+    parser.add_argument(
+        "--tpot-max-ms",
+        type=float,
+        default=100.0,
+        help="Reference limit for TPOT plots (ms)",
+    )
     return parser
 
 
@@ -488,7 +556,7 @@ def choose_metrics(latency: str) -> MetricPlan:
     )
 
 
-def prepare_input_files(args, info_cols: List[str]) -> Tuple[List[str], List[str]]:
+def prepare_input_files(args, info_cols: list[str]) -> tuple[list[str], list[str]]:
     if not args.file:
         raise ValueError("No input files provided. Use -f/--file.")
 
@@ -501,12 +569,12 @@ def prepare_input_files(args, info_cols: List[str]) -> Tuple[List[str], List[str
     return files, info_cols
 
 
-def get_y_axis_col(info_cols: List[str], xaxis: str) -> str:
+def get_y_axis_col(info_cols: list[str], xaxis: str) -> str:
     y_axis_index = info_cols.index(xaxis) if xaxis in info_cols else 6
     return info_cols[y_axis_index]
 
 
-def get_group_cols(output_df: pd.DataFrame, info_cols: List[str]) -> List[str]:
+def get_group_cols(output_df: pd.DataFrame, info_cols: list[str]) -> list[str]:
     filtered_info_cols = info_cols[:4]
     group_cols = [c for c in filtered_info_cols if c in output_df.columns]
     if not group_cols:
@@ -527,11 +595,9 @@ def group_filename(name, prefix: str = "perf_comparison_") -> str:
     return f"{prefix}{safe}.html"
 
 
-def build_group_suffix(group_cols: List[str], name) -> str:
+def build_group_suffix(group_cols: list[str], name) -> str:
     name_vals = normalize_group_key(name)
-    return " , ".join(
-        f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals)
-    )
+    return " , ".join(f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals))
 
 
 def render_metric_table_html(
@@ -542,8 +608,8 @@ def render_metric_table_html(
 ) -> str:
     title = (
         f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
-        f'{_html.escape(metric_label)}'
-        f' — {_html.escape(group_suffix)}'
+        f"{_html.escape(metric_label)}"
+        f" — {_html.escape(group_suffix)}"
         f"</div>\n"
     )
 
@@ -565,7 +631,7 @@ def maybe_write_plot(
     main_fh,
     sub_fh,
     group_df: pd.DataFrame,
-    raw_data_cols: List[str],
+    raw_data_cols: list[str],
     metric_label: str,
     y_axis_col: str,
     args,
@@ -606,21 +672,25 @@ def maybe_write_plot(
     sub_fh.write(html)
 
 
-def build_group_keys(df: pd.DataFrame, group_cols: List[str], sort_cols: List[str] | None = None):
+def build_group_keys(
+    df: pd.DataFrame, group_cols: list[str], sort_cols: list[str] | None = None
+):
     if sort_cols:
         df = df.sort_values(by=sort_cols)
     gb = df.groupby(group_cols, dropna=False)
     return [k for k, _ in gb]
 
 
-def write_report_group_first(files: List[str], info_cols: List[str], plan: MetricPlan, args):
+def write_report_group_first(
+    files: list[str], info_cols: list[str], plan: MetricPlan, args
+):
     name_column = "Test name"
     y_axis_col = get_y_axis_col(info_cols, args.xaxis)
 
     print("comparing : " + ", ".join(files))
 
-    metric_cache: Dict[str, Tuple[pd.DataFrame, List[str]]] = {}
-    group_cols_canonical: List[str] | None = None
+    metric_cache: dict[str, tuple[pd.DataFrame, list[str]]] = {}
+    group_cols_canonical: list[str] | None = None
 
     for metric_label in plan.data_cols:
         output_df, raw_data_cols = compare_data_columns(
@@ -641,14 +711,19 @@ def write_report_group_first(files: List[str], info_cols: List[str], plan: Metri
         else:
             group_cols_canonical = [c for c in group_cols_canonical if c in group_cols]
 
-        metric_cache[metric_label] = (output_df.sort_values(by=args.xaxis), raw_data_cols)
+        metric_cache[metric_label] = (
+            output_df.sort_values(by=args.xaxis),
+            raw_data_cols,
+        )
 
     if not group_cols_canonical:
         raise ValueError("No canonical group columns found across metrics.")
 
     first_metric = plan.data_cols[0]
     first_df_sorted, _ = metric_cache[first_metric]
-    group_keys = build_group_keys(first_df_sorted, group_cols_canonical, sort_cols=[args.xaxis])
+    group_keys = build_group_keys(
+        first_df_sorted, group_cols_canonical, sort_cols=[args.xaxis]
+    )
 
     metric_groupbys = {
         metric_label: df.groupby(group_cols_canonical, dropna=False)
@@ -660,11 +735,11 @@ def write_report_group_first(files: List[str], info_cols: List[str], plan: Metri
             gkey_tuple = normalize_group_key(gkey)
             suffix = build_group_suffix(group_cols_canonical, gkey_tuple)
             sub_path = group_filename(gkey_tuple)
-
             group_header = (
-                f'<div style="font-size: 1.4em; font-weight: 700; margin: 18px 0 10px 0;">'
-                f'{_html.escape(suffix)}'
-                f"</div>\n"
+                '<div style="font-size: 1.4em; font-weight: 700; '
+                'margin: 18px 0 10px 0;">'
+                f"{_html.escape(suffix)}"
+                "</div>\n"
             )
 
             main_fh.write(group_header)
@@ -684,10 +759,12 @@ def write_report_group_first(files: List[str], info_cols: List[str], plan: Metri
                         group_df = gb.get_group(gkey)
                     except KeyError:
                         missing = (
-                            f'<div style="font-size: 1.1em; font-weight: 600; margin: 10px 0;">'
-                            f'{_html.escape(metric_label)} — missing for this group'
-                            f"</div>\n"
+                            '<div style="font-size: 1.1em; font-weight: 600; '
+                            'margin: 10px 0;">'
+                            f"{_html.escape(metric_label)} — missing for this group"
+                            "</div>\n"
                         )
+
                         main_fh.write(missing)
                         sub_fh.write(missing)
                         continue
@@ -703,9 +780,13 @@ def write_report_group_first(files: List[str], info_cols: List[str], plan: Metri
                     elif mn in ("p99", "median") or "tpot" in mn:
                         tpot_group_df = group_df
 
-                    display_group = group_df.drop(columns=group_cols_canonical, errors="ignore")
+                    display_group = group_df.drop(
+                        columns=group_cols_canonical, errors="ignore"
+                    )
 
-                    html = render_metric_table_html(display_group, metric_label, suffix, args)
+                    html = render_metric_table_html(
+                        display_group, metric_label, suffix, args
+                    )
                     main_fh.write(html)
                     sub_fh.write(html)
 
@@ -741,4 +822,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-