vllm/tools/compare_intermediate.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Script to compare intermediate logging outputs from two different runs.

This script compares the tensor outputs from two different intermediate logging
directories and generates a report of the differences.

Usage:
    python compare_intermediate.py --dir1 /path/to/first/log/dir --dir2 /path/to/second/log/dir [options]

Options:
    --dir1 DIR           First intermediate logging directory
    --dir2 DIR           Second intermediate logging directory
    --output FILE        Output file for the report (default: stdout)
    --format {md,json}   Output format (default: md)
    --rtol FLOAT         Relative tolerance for tensor comparison (default: 1e-5)
    --atol FLOAT         Absolute tolerance for tensor comparison (default: 1e-8)
    --steps STEPS        Comma-separated list of steps to compare (default: all)
    --modules MODULES    Comma-separated list of module name patterns to compare (default: all)
    --verbose            Include detailed information about each tensor
"""

import argparse
import json
import re
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional

import torch


def load_tensor(path: Path) -> torch.Tensor:
    """Load a tensor from a .pt file."""
    try:
        return torch.load(path, map_location="cpu")
    except Exception as e:
        print(f"Error loading tensor from {path}: {e}")
        return None


def load_json(path: Path) -> Dict:
    """Load a JSON file."""
    try:
        with open(path, "r") as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading JSON from {path}: {e}")
        return {}


def extract_diff_metatada(exception_str: str) -> Dict:
    try:
        num_diff_elements = int(
            re.search(r"Mismatched elements: (\d+) /", exception_str).group(1)
        )
        total_elements = int(
            re.search(r"Mismatched elements: \d+ / (\d+)", exception_str).group(1)
        )
        max_abs_diff = float(
            re.search(
                r"Greatest absolute difference: ([\d\.e-]+)", exception_str
            ).group(1)
        )
        max_rel_diff = float(
            re.search(
                r"Greatest relative difference: ([\d\.e-]+)", exception_str
            ).group(1)
        )
        return {
            "num_diff_elements": num_diff_elements,
            "total_elements": total_elements,
            "max_abs_diff": max_abs_diff,
            "max_rel_diff": max_rel_diff,
        }
    except Exception:
        return {"error": exception_str}


def compare_tensors(
    tensor1: torch.Tensor, tensor2: torch.Tensor, rtol: float, atol: float
) -> Dict:
    """Compare two tensors and return a dictionary with comparison results."""
    if tensor1 is None or tensor2 is None:
        return {"match": False, "error": "One or both tensors are None"}

    if tensor1.shape != tensor2.shape:
        return {
            "match": False,
            "error": f"Shape mismatch: {tensor1.shape} vs {tensor2.shape}",
        }

    if tensor1.dtype != tensor2.dtype:
        return {
            "match": False,
            "error": f"Dtype mismatch: {tensor1.dtype} vs {tensor2.dtype}",
        }

    # Check if tensors are close using PyTorch's assert_close
    try:
        torch.testing.assert_close(tensor1, tensor2, rtol=rtol, atol=atol)
    except Exception as e:
        return {"match": False, **extract_diff_metatada(str(e))}
    return {"match": True}


def compare_json_values(value1: Any, value2: Any) -> Dict:
    """Compare two JSON values and return a dictionary with comparison results."""
    if type(value1) is not type(value2):
        return {
            "match": False,
            "error": f"Type mismatch: {type(value1).__name__} vs {type(value2).__name__}",
        }

    if isinstance(value1, dict):
        # Compare dictionaries
        all_keys = set(value1.keys()) | set(value2.keys())
        mismatches = {}

        for key in all_keys:
            if key not in value1:
                mismatches[key] = {"error": "Missing in first dict"}
            elif key not in value2:
                mismatches[key] = {"error": "Missing in second dict"}
            else:
                comparison = compare_json_values(value1[key], value2[key])
                if not comparison["match"]:
                    mismatches[key] = comparison

        if mismatches:
            return {"match": False, "mismatches": mismatches}
        return {"match": True}

    elif isinstance(value1, list):
        # Compare lists
        if len(value1) != len(value2):
            return {
                "match": False,
                "error": f"Length mismatch: {len(value1)} vs {len(value2)}",
            }

        mismatches = {}
        for i, (item1, item2) in enumerate(zip(value1, value2)):
            comparison = compare_json_values(item1, item2)
            if not comparison["match"]:
                mismatches[i] = comparison

        if mismatches:
            return {"match": False, "mismatches": mismatches}
        return {"match": True}

    else:
        # Compare primitive values
        if value1 == value2:
            return {"match": True}
        else:
            return {"match": False, "value1": value1, "value2": value2}


def find_tensor_files(directory: Path) -> Dict[str, Dict[str, Dict[str, List[Path]]]]:
    """
    Find all tensor files in the given directory.

    Returns a dictionary with the structure:
    {
        "step_0": {
            "module_name_123456": {
                "inputs": [Path("inputs_0_cuda_0.pt"), ...],
                "outputs": [Path("output_cuda_0.pt"), ...]
            },
            ...
        },
        ...
    }
    """
    result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

    # Find all step directories
    step_dirs = [d for d in directory.glob("step_*") if d.is_dir()]

    for step_dir in step_dirs:
        step_name = step_dir.name

        # Find all module directories
        module_dirs = [d for d in step_dir.glob("*") if d.is_dir()]

        for module_dir in module_dirs:
            module_name = module_dir.name

            # Find input tensor files
            input_tensors = list(module_dir.glob("inputs_*.pt"))
            if input_tensors:
                result[step_name][module_name]["inputs"] = input_tensors

            # Find output tensor files
            output_tensors = list(module_dir.glob("output*.pt"))
            if output_tensors:
                result[step_name][module_name]["outputs"] = output_tensors

            # Find JSON metadata files
            inputs_json = module_dir / "inputs.json"
            if inputs_json.exists():
                result[step_name][module_name]["inputs_json"] = [inputs_json]

            outputs_json = module_dir / "outputs.json"
            if outputs_json.exists():
                result[step_name][module_name]["outputs_json"] = [outputs_json]

    return result


def filter_steps_and_modules(
    tensor_files: Dict[str, Dict[str, Dict[str, List[Path]]]],
    steps: Optional[List[str]] = None,
    module_patterns: Optional[List[str]] = None,
) -> Dict[str, Dict[str, Dict[str, List[Path]]]]:
    """Filter tensor files by steps and module patterns."""
    result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

    # Filter steps
    if steps:
        step_names = [f"step_{step}" for step in steps]
        steps_to_include = {step: True for step in step_names}
    else:
        steps_to_include = {step: True for step in tensor_files.keys()}

    # Compile module patterns
    if module_patterns:
        compiled_patterns = [re.compile(pattern) for pattern in module_patterns]
    else:
        compiled_patterns = None

    for step_name, modules in tensor_files.items():
        if step_name not in steps_to_include:
            continue

        for module_name, file_types in modules.items():
            # Check if module matches any pattern
            if compiled_patterns:
                if not any(
                    pattern.search(module_name) for pattern in compiled_patterns
                ):
                    continue

            result[step_name][module_name] = file_types

    return result


def compare_directories(
    dir1: Path,
    dir2: Path,
    rtol: Optional[float] = None,
    atol: Optional[float] = None,
    steps: Optional[List[str]] = None,
    module_patterns: Optional[List[str]] = None,
) -> Dict:
    """Compare two intermediate logging directories and return a report."""
    # Find tensor files in both directories
    tensor_files1 = find_tensor_files(dir1)
    tensor_files2 = find_tensor_files(dir2)

    # Filter by steps and modules
    if steps or module_patterns:
        tensor_files1 = filter_steps_and_modules(tensor_files1, steps, module_patterns)
        tensor_files2 = filter_steps_and_modules(tensor_files2, steps, module_patterns)

    # Get all steps and modules from both directories
    all_steps = set(tensor_files1.keys()) | set(tensor_files2.keys())

    report = {
        "dir1": str(dir1),
        "dir2": str(dir2),
        "rtol": rtol,
        "atol": atol,
        "steps": {},
    }

    # Compare each step
    for step in sorted(all_steps):
        step_report = {
            "modules": {},
            "summary": {
                "total_modules": 0,
                "matching_modules": 0,
                "mismatched_modules": 0,
                "missing_modules": 0,
            },
        }

        # Get all modules from both directories for this step
        modules1 = tensor_files1.get(step, {})
        modules2 = tensor_files2.get(step, {})
        # TODO: read from module calls.txt to get the full module list
        # TODO: check if module calls txt exsits
        dir1_module_call_file = dir1 / step / "module_calls.txt"
        if dir1_module_call_file.exists():
            with open(dir1 / step / "module_calls.txt", "r") as f:
                all_modules = f.read().splitlines()
        else:
            print(
                "Warnings: the module call orders are missed, ordering using module alphbetics"
            )
            all_modules = sorted(set(modules1.keys()) | set(modules2.keys()))
        step_report["module_call_list"] = []
        for module in all_modules:
            module_report = {
                "inputs": {},
                "outputs": {},
                "summary": {
                    "total_tensors": 0,
                    "matching_tensors": 0,
                    "mismatched_tensors": 0,
                    "missing_tensors": 0,
                },
            }

            # Check if module exists in both directories
            if module not in modules1:
                module_report["error"] = f"Module missing in {dir1}"
                step_report["summary"]["missing_modules"] += 1
                step_report["modules"][module] = module_report
                continue

            if module not in modules2:
                module_report["error"] = f"Module missing in {dir2}"
                step_report["summary"]["missing_modules"] += 1
                step_report["modules"][module] = module_report
                continue

            # Compare JSON metadata
            for json_type in ["inputs_json", "outputs_json"]:
                json_files1 = modules1[module].get(json_type, [])
                json_files2 = modules2[module].get(json_type, [])

                if json_files1 and json_files2:
                    json1 = load_json(json_files1[0])
                    json2 = load_json(json_files2[0])

                    json_comparison = compare_json_values(json1, json2)
                    json_name = json_type.replace("_json", "")
                    module_report[f"{json_name}_metadata"] = json_comparison

                    # Add file paths for manual checking when there's a mismatch
                    if not json_comparison.get("match", True):
                        module_report[f"{json_name}_metadata"]["file1"] = str(
                            json_files1[0]
                        )
                        module_report[f"{json_name}_metadata"]["file2"] = str(
                            json_files2[0]
                        )

            # Compare input tensors
            input_tensors1 = {p.name: p for p in modules1[module].get("inputs", [])}
            input_tensors2 = {p.name: p for p in modules2[module].get("inputs", [])}
            all_input_names = set(input_tensors1.keys()) | set(input_tensors2.keys())

            for tensor_name in sorted(all_input_names):
                if tensor_name not in input_tensors1:
                    module_report["inputs"][tensor_name] = {
                        "match": False,
                        "error": f"Tensor missing in {dir1}",
                    }
                    module_report["summary"]["missing_tensors"] += 1
                elif tensor_name not in input_tensors2:
                    module_report["inputs"][tensor_name] = {
                        "match": False,
                        "error": f"Tensor missing in {dir2}",
                    }
                    module_report["summary"]["missing_tensors"] += 1
                else:
                    tensor1 = load_tensor(input_tensors1[tensor_name])
                    tensor2 = load_tensor(input_tensors2[tensor_name])

                    comparison = compare_tensors(tensor1, tensor2, rtol, atol)
                    # Add file paths for manual checking when there's a mismatch
                    if not comparison.get("match", False):
                        comparison["file1"] = str(input_tensors1[tensor_name])
                        comparison["file2"] = str(input_tensors2[tensor_name])

                    module_report["inputs"][tensor_name] = comparison

                    if comparison.get("match", False):
                        module_report["summary"]["matching_tensors"] += 1
                    else:
                        module_report["summary"]["mismatched_tensors"] += 1

                module_report["summary"]["total_tensors"] += 1

            # Compare output tensors
            output_tensors1 = {p.name: p for p in modules1[module].get("outputs", [])}
            output_tensors2 = {p.name: p for p in modules2[module].get("outputs", [])}
            all_output_names = set(output_tensors1.keys()) | set(output_tensors2.keys())

            for tensor_name in sorted(all_output_names):
                if tensor_name not in output_tensors1:
                    module_report["outputs"][tensor_name] = {
                        "match": False,
                        "error": f"Tensor missing in {dir1}",
                    }
                    module_report["summary"]["missing_tensors"] += 1
                elif tensor_name not in output_tensors2:
                    module_report["outputs"][tensor_name] = {
                        "match": False,
                        "error": f"Tensor missing in {dir2}",
                    }
                    module_report["summary"]["missing_tensors"] += 1
                else:
                    tensor1 = load_tensor(output_tensors1[tensor_name])
                    tensor2 = load_tensor(output_tensors2[tensor_name])

                    comparison = compare_tensors(tensor1, tensor2, rtol, atol)
                    # Add file paths for manual checking when there's a mismatch
                    if not comparison.get("match", False):
                        comparison["file1"] = str(output_tensors1[tensor_name])
                        comparison["file2"] = str(output_tensors2[tensor_name])

                    module_report["outputs"][tensor_name] = comparison

                    if comparison.get("match", False):
                        module_report["summary"]["matching_tensors"] += 1
                    else:
                        module_report["summary"]["mismatched_tensors"] += 1

                module_report["summary"]["total_tensors"] += 1

            # Update module status
            if module_report["summary"]["mismatched_tensors"] > 0:
                step_report["summary"]["mismatched_modules"] += 1
            else:
                step_report["summary"]["matching_modules"] += 1

            step_report["summary"]["total_modules"] += 1
            step_report["modules"][module] = module_report
            step_report["module_call_list"].append(module)

        report["steps"][step] = step_report

    # Add overall summary
    report["summary"] = {
        "total_steps": len(all_steps),
        "total_modules": sum(
            step_report["summary"]["total_modules"]
            for step_report in report["steps"].values()
        ),
        "matching_modules": sum(
            step_report["summary"]["matching_modules"]
            for step_report in report["steps"].values()
        ),
        "mismatched_modules": sum(
            step_report["summary"]["mismatched_modules"]
            for step_report in report["steps"].values()
        ),
        "missing_modules": sum(
            step_report["summary"]["missing_modules"]
            for step_report in report["steps"].values()
        ),
        "total_tensors": sum(
            module_report["summary"]["total_tensors"]
            for step_report in report["steps"].values()
            for module_name, module_report in step_report["modules"].items()
            if "summary" in module_report
        ),
        "matching_tensors": sum(
            module_report["summary"]["matching_tensors"]
            for step_report in report["steps"].values()
            for module_name, module_report in step_report["modules"].items()
            if "summary" in module_report
        ),
        "mismatched_tensors": sum(
            module_report["summary"]["mismatched_tensors"]
            for step_report in report["steps"].values()
            for module_name, module_report in step_report["modules"].items()
            if "summary" in module_report
        ),
        "missing_tensors": sum(
            module_report["summary"]["missing_tensors"]
            for step_report in report["steps"].values()
            for module_name, module_report in step_report["modules"].items()
            if "summary" in module_report
        ),
    }

    return report


def generate_markdown_report(report: Dict, verbose: bool = False) -> str:
    """Generate a markdown report from the comparison results."""
    lines = []

    # Add header
    lines.append("# Intermediate Logging Comparison Report")
    lines.append("")
    lines.append("Comparing intermediate logging outputs between:")
    lines.append(f"- **Directory 1**: `{report['dir1']}`")
    lines.append(f"- **Directory 2**: `{report['dir2']}`")
    lines.append("")
    lines.append(f"Comparison parameters:")
    lines.append(f"- Relative tolerance (rtol): {report['rtol']}")
    lines.append(f"- Absolute tolerance (atol): {report['atol']}")
    lines.append("")

    # Add overall summary
    summary = report["summary"]
    lines.append("## Overall Summary")
    lines.append("")
    lines.append("| Category | Total | Matching | Mismatched | Missing |")
    lines.append("|----------|-------|----------|------------|---------|")
    lines.append(f"| Steps | {summary['total_steps']} | - | - | - |")
    lines.append(
        f"| Modules | {summary['total_modules']} | {summary['matching_modules']} | {summary['mismatched_modules']} | {summary['missing_modules']} |"
    )
    lines.append(
        f"| Tensors | {summary['total_tensors']} | {summary['matching_tensors']} | {summary['mismatched_tensors']} | {summary['missing_tensors']} |"
    )
    lines.append("")

    # Add step details
    for step_name, step_report in sorted(report["steps"].items()):
        step_summary = step_report["summary"]

        lines.append(f"## {step_name}")
        lines.append("")
        lines.append(
            f"**Summary**: {step_summary['matching_modules']} matching modules, {step_summary['mismatched_modules']} mismatched modules, {step_summary['missing_modules']} missing modules"
        )
        lines.append("")

        # Add module details
        for module_name in step_report["module_call_list"]:
            module_report = step_report["modules"][module_name]
            if "error" in module_report:
                lines.append(f"### ❌ {module_name}")
                lines.append("")
                lines.append(f"**Error**: {module_report['error']}")
                lines.append("")
                continue

            module_summary = module_report["summary"]

            # Determine module status
            if module_summary["mismatched_tensors"] > 0:
                status = "❌"
            else:
                status = "✅"

            lines.append(f"### {status} {module_name}")
            lines.append("")
            lines.append(
                f"**Summary**: {module_summary['matching_tensors']} matching tensors, {module_summary['mismatched_tensors']} mismatched tensors, {module_summary['missing_tensors']} missing tensors"
            )
            lines.append("")

            # Add metadata comparison results if available
            for metadata_type in ["inputs_metadata", "outputs_metadata"]:
                if metadata_type in module_report:
                    metadata_comparison = module_report[metadata_type]
                    if not metadata_comparison.get("match", True):
                        file_paths = ""
                        if (
                            "file1" in metadata_comparison
                            and "file2" in metadata_comparison
                        ):
                            file_paths = f" - Files: `{metadata_comparison['file1']}` vs `{metadata_comparison['file2']}`"

                        lines.append(
                            f"**{metadata_type.capitalize()}**: Mismatch detected{file_paths}"
                        )
                        if verbose and "mismatches" in metadata_comparison:
                            lines.append("```json")
                            lines.append(
                                json.dumps(metadata_comparison["mismatches"], indent=2)
                            )
                            lines.append("```")
                        lines.append("")

            # Add tensor comparison details
            if module_summary["mismatched_tensors"] > 0 or verbose:
                # Add input tensor details
                if module_report["inputs"]:
                    lines.append("#### Input Tensors")
                    lines.append("")
                    lines.append("| Tensor | Status | Details |")
                    lines.append("|--------|--------|---------|")

                    for tensor_name, comparison in sorted(
                        module_report["inputs"].items()
                    ):
                        if comparison.get("match", False):
                            status = "✅"
                            details = "Tensors match"
                        elif "error" in comparison:
                            status = "❌"
                            details = comparison["error"]
                        else:
                            status = "❌"
                            details = f"Max abs diff: {comparison.get('max_abs_diff', 'N/A'):.2e}, "
                            details = f"Max relative diff: {comparison.get('max_rel_diff', 'N/A'):.2e}, "
                            details += f"Diff elements: {comparison.get('num_diff_elements', 'N/A')}/{comparison.get('total_elements', 'N/A')}"
                            if "file1" in comparison and "file2" in comparison:
                                details += f"<br>Files: `{comparison['file1']}` vs `{comparison['file2']}`"

                        lines.append(f"| {tensor_name} | {status} | {details} |")

                    lines.append("")

                # Add output tensor details
                if module_report["outputs"]:
                    lines.append("#### Output Tensors")
                    lines.append("")
                    lines.append("| Tensor | Status | Details |")
                    lines.append("|--------|--------|---------|")

                    for tensor_name, comparison in sorted(
                        module_report["outputs"].items()
                    ):
                        if comparison.get("match", False):
                            status = "✅"
                            details = "Tensors match"
                        elif "error" in comparison:
                            status = "❌"
                            details = comparison["error"]
                        else:
                            status = "❌"
                            details = f"Max abs diff: {comparison.get('max_abs_diff', 'N/A')}, "
                            details = f"Max relative diff: {comparison.get('max_rel_diff', 'N/A')}, "
                            details += f"Diff elements: {comparison.get('num_diff_elements', 'N/A')}/{comparison.get('total_elements', 'N/A')}"

                        lines.append(f"| {tensor_name} | {status} | {details} |")

                    lines.append("")

    return "\n".join(lines)


def main():
    parser = argparse.ArgumentParser(
        description="Compare intermediate logging outputs from two different runs."
    )
    parser.add_argument(
        "--dir1", required=True, help="First intermediate logging directory"
    )
    parser.add_argument(
        "--dir2", required=True, help="Second intermediate logging directory"
    )
    parser.add_argument("--output", help="Output file for the report (default: stdout)")
    parser.add_argument(
        "--rtol",
        type=float,
        default=None,
        help="Relative tolerance for tensor comparison (default: 1e-5)",
    )
    parser.add_argument(
        "--atol",
        type=float,
        default=None,
        help="Absolute tolerance for tensor comparison (default: 1e-8)",
    )
    parser.add_argument(
        "--steps", help="Comma-separated list of steps to compare (default: all)"
    )
    parser.add_argument(
        "--modules",
        help="Comma-separated list of module name patterns to compare (default: all)",
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        help="Include detailed information about each tensor",
    )

    args = parser.parse_args()

    # Parse steps and modules
    steps = args.steps.split(",") if args.steps else None
    module_patterns = args.modules.split(",") if args.modules else None

    # Compare directories
    report = compare_directories(
        Path(args.dir1),
        Path(args.dir2),
        rtol=args.rtol,
        atol=args.atol,
        steps=steps,
        module_patterns=module_patterns,
    )

    # Generate report
    output = generate_markdown_report(report, verbose=args.verbose)

    # Write report
    if args.output:
        with open(args.output, "w") as f:
            f.write(output)
            print(f"Report written to {args.output}")
    else:
        print(output)


if __name__ == "__main__":
    main()


def invoke_main() -> None:
    main()