remove feature for il_tool_compare

Signed-off-by: Lucia Fang <fanglu@fb.com>
2026-05-21 13:27:06 +08:00 · 2025-08-05 09:47:16 -07:00 · 2025-08-05 09:47:16 -07:00 · ba90794ff1
commit ba90794ff1
parent ab4ab0fd28
2 changed files with 1 additions and 697 deletions
--- a/docs/contributing/intermediate_logging.md
+++ b/docs/contributing/intermediate_logging.md
@ -12,8 +12,6 @@ The intermediate tensor logging feature enables you to:
 - Filter tensors by device
 - Filter whole model fwd step id

-This is manily useful for debugging model accucacy gaps with 2 runs
-
 ## Usage

 ### Enabling via parameters or config file
@ -80,56 +78,4 @@ When you enable intermediate logging, the system creates a timestamped directory
        └── ...
 ```

-Each tensor is saved in two formats:
-1. `.json` files containing metadata and small tensor values
-2. `.pt` files containing the full PyTorch tensors (can be loaded with `torch.load()`)
-
-## Comparing Intermediate Logging Results
-
-vLLM provides a tool called `compare_intermediate.py` to compare intermediate tensors between two different runs. This is particularly useful for debugging accuracy differences or verifying that code changes don't affect model outputs.
-
-### Usage
-
-```bash
-python tools/compare_intermediate.py --dir1 /path/to/first/log/dir --dir2 /path/to/second/log/dir [options]
-```
-
-### Options
-
-| Option | Description | Default |
-|--------|-------------|---------|
-| `--dir1` | First intermediate logging directory | (required) |
-| `--dir2` | Second intermediate logging directory | (required) |
-| `--output` | Output file for the report | stdout |
-| `--rtol` | Relative tolerance for tensor comparison | 1e-5 |
-| `--atol` | Absolute tolerance for tensor comparison | 1e-8 |
-| `--steps` | Comma-separated list of steps to compare | all |
-| `--modules` | Comma-separated list of module name patterns to compare | all |
-| `--verbose` | Include detailed information about each tensor | false |
-
-### Example
-
-```bash
-# Compare all tensors from two different runs
-python tools/compare_intermediate.py --dir1 /tmp/vllm_intermediates/run1 --dir2 /tmp/vllm_intermediates/run2
-
-# Compare only specific modules and steps with custom tolerance
-python tools/compare_intermediate.py \
-  --dir1 /tmp/vllm_intermediates/run1 \
-  --dir2 /tmp/vllm_intermediates/run2 \
-  --steps 0,1 \
-  --modules ".*attention.*,.*mlp.*" \
-  --rtol 1e-4 \
-  --atol 1e-7 \
-  --output comparison_report.md
-```
-
-### Output
-
-The tool generates a detailed markdown report that includes:
-
- Overall summary of matching and mismatched tensors
- Per-module comparison results
- Detailed tensor differences (when using `--verbose`)
-
-This makes it easy to identify which specific tensors differ between runs and by how much.
+Each tensor is saved in a `.pt` file containing the full PyTorch tensors (can be loaded with `torch.load()`)
--- a/tools/compare_intermediate.py
+++ b/tools/compare_intermediate.py
@ -1,642 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-Script to compare intermediate logging outputs from two different runs.
-
-This script compares the tensor outputs from two different intermediate logging
-directories and generates a report of the differences.
-
-Usage:
-    python compare_intermediate.py --dir1 /path/to/first/log/dir \
-        --dir2 /path/to/second/log/dir [options]
-
-Options:
-    --dir1 DIR           First intermediate logging directory
-    --dir2 DIR           Second intermediate logging directory
-    --output FILE        Output file for the report (default: stdout)
-    --rtol FLOAT         Relative tolerance for tensor comparison 
-                         (default: 1e-5)
-    --atol FLOAT         Absolute tolerance for tensor comparison 
-                         (default: 1e-8)
-    --steps STEPS        Comma-separated list of steps to compare (default: all)
-    --modules MODULES    Comma-separated list of module name patterns to compare
-                         (default: all)
-    --verbose            Include detailed information about each tensor
-"""
-
-import argparse
-import json
-from collections import defaultdict
-from pathlib import Path
-from typing import Optional
-
-import regex as re
-import torch
-
-
-def load_tensor(path: Path) -> torch.Tensor:
-    """Load a tensor from a .pt file."""
-    try:
-        return torch.load(path, map_location="cpu")
-    except Exception as e:
-        print(f"Error loading tensor from {path}: {e}")
-        return None
-
-
-def extract_diff_metatada(exception_str: str) -> dict:
-    try:
-        num_diff_elements = int(
-            re.search(r"Mismatched elements: (\d+) /", exception_str).group(1))
-        total_elements = int(
-            re.search(r"Mismatched elements: \d+ / (\d+)",
-                      exception_str).group(1))
-        max_abs_diff = float(
-            re.search(r"Greatest absolute difference: ([\d\.e-]+)",
-                      exception_str).group(1))
-        max_rel_diff = float(
-            re.search(r"Greatest relative difference: ([\d\.e-]+)",
-                      exception_str).group(1))
-        return {
-            "num_diff_elements": num_diff_elements,
-            "total_elements": total_elements,
-            "max_abs_diff": max_abs_diff,
-            "max_rel_diff": max_rel_diff,
-        }
-    except Exception:
-        return {"error": exception_str}
-
-
-def compare_tensors(tensor1: torch.Tensor, tensor2: torch.Tensor, rtol: float,
-                    atol: float) -> dict:
-    """Compare two tensors and return a dictionary with comparison results."""
-    if tensor1 is None or tensor2 is None:
-        return {"match": False, "error": "One or both tensors are None"}
-
-    if tensor1.shape != tensor2.shape:
-        return {
-            "match": False,
-            "error": f"Shape mismatch: {tensor1.shape} vs {tensor2.shape}",
-        }
-
-    if tensor1.dtype != tensor2.dtype:
-        return {
-            "match": False,
-            "error": f"Dtype mismatch: {tensor1.dtype} vs {tensor2.dtype}",
-        }
-
-    # Check if tensors are close using PyTorch's assert_close
-    try:
-        torch.testing.assert_close(tensor1, tensor2, rtol=rtol, atol=atol)
-    except Exception as e:
-        return {"match": False, **extract_diff_metatada(str(e))}
-    return {"match": True}
-
-
-def find_tensor_files(
-        directory: Path) -> dict[str, dict[str, dict[str, list[Path]]]]:
-    """
-    Find all tensor files in the given directory.
-
-    Returns a dictionary with the structure:
-    {
-        "step_0": {
-            "module_name_123456": {
-                "inputs": [Path("inputs_0_cuda_0.pt"), ...],
-                "outputs": [Path("output_cuda_0.pt"), ...]
-            },
-            ...
-        },
-        ...
-    }
-    """
-    result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
-
-    # Find all step directories
-    step_dirs = [d for d in directory.glob("step_*") if d.is_dir()]
-
-    for step_dir in step_dirs:
-        step_name = step_dir.name
-
-        # Find all module directories
-        module_dirs = [d for d in step_dir.glob("*") if d.is_dir()]
-
-        for module_dir in module_dirs:
-            module_name = module_dir.name
-
-            # Find input tensor files
-            input_tensors = list(module_dir.glob("inputs_*.pt"))
-            if input_tensors:
-                result[step_name][module_name]["inputs"] = input_tensors
-
-            # Find output tensor files
-            output_tensors = list(module_dir.glob("output*.pt"))
-            if output_tensors:
-                result[step_name][module_name]["outputs"] = output_tensors
-
-    return result
-
-
-def filter_steps_and_modules(
-    tensor_files: dict[str, dict[str, dict[str, list[Path]]]],
-    steps: Optional[list[str]] = None,
-    module_patterns: Optional[list[str]] = None,
-) -> dict[str, dict[str, dict[str, list[Path]]]]:
-    """Filter tensor files by steps and module patterns."""
-    result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
-
-    # Filter steps
-    if steps:
-        step_names = [f"step_{step}" for step in steps]
-        steps_to_include = {step: True for step in step_names}
-    else:
-        steps_to_include = {step: True for step in tensor_files}
-
-    # Compile module patterns
-    if module_patterns:
-        compiled_patterns = [
-            re.compile(pattern) for pattern in module_patterns
-        ]
-    else:
-        compiled_patterns = None
-
-    for step_name, modules in tensor_files.items():
-        if step_name not in steps_to_include:
-            continue
-
-        for module_name, file_types in modules.items():
-            # Check if module matches any pattern
-            if compiled_patterns and not any(
-                    pattern.search(module_name)
-                    for pattern in compiled_patterns):
-                continue
-
-            result[step_name][module_name] = file_types
-
-    return result
-
-
-def compare_directories(
-    dir1: Path,
-    dir2: Path,
-    rtol: Optional[float] = None,
-    atol: Optional[float] = None,
-    steps: Optional[list[str]] = None,
-    module_patterns: Optional[list[str]] = None,
-) -> dict:
-    """Compare two intermediate logging directories and return a report."""
-    # Find tensor files in both directories
-    tensor_files1 = find_tensor_files(dir1)
-    tensor_files2 = find_tensor_files(dir2)
-
-    # Filter by steps and modules
-    if steps or module_patterns:
-        tensor_files1 = filter_steps_and_modules(tensor_files1, steps,
-                                                 module_patterns)
-        tensor_files2 = filter_steps_and_modules(tensor_files2, steps,
-                                                 module_patterns)
-
-    # Get all steps and modules from both directories
-    all_steps = set(tensor_files1.keys()) | set(tensor_files2.keys())
-
-    report = {
-        "dir1": str(dir1),
-        "dir2": str(dir2),
-        "rtol": rtol,
-        "atol": atol,
-        "steps": {},
-    }
-
-    # Compare each step
-    for step in sorted(all_steps):
-        step_report = {
-            "modules": {},
-            "summary": {
-                "total_modules": 0,
-                "matching_modules": 0,
-                "mismatched_modules": 0,
-                "missing_modules": 0,
-            },
-        }
-
-        # Get all modules from both directories for this step
-        modules1 = tensor_files1.get(step, {})
-        modules2 = tensor_files2.get(step, {})
-        # TODO: read from module calls.txt to get the full module list
-        # TODO: check if module calls txt exsits
-        dir1_module_call_file = dir1 / step / "module_calls.txt"
-        if dir1_module_call_file.exists():
-            with open(dir1 / step / "module_calls.txt") as f:
-                all_modules = f.read().splitlines()
-        else:
-            print(
-                "Warnings: the module call orders are missed, ordering using "
-                "module alphbetics")
-            all_modules = sorted(set(modules1.keys()) | set(modules2.keys()))
-        step_report["module_call_list"] = []
-        for module in all_modules:
-            module_report = {
-                "inputs": {},
-                "outputs": {},
-                "summary": {
-                    "total_tensors": 0,
-                    "matching_tensors": 0,
-                    "mismatched_tensors": 0,
-                    "missing_tensors": 0,
-                },
-            }
-
-            # Check if module exists in both directories
-            if module not in modules1:
-                module_report["error"] = f"Module missing in {dir1}"
-                step_report["summary"]["missing_modules"] += 1
-                step_report["modules"][module] = module_report
-                continue
-
-            if module not in modules2:
-                module_report["error"] = f"Module missing in {dir2}"
-                step_report["summary"]["missing_modules"] += 1
-                step_report["modules"][module] = module_report
-                continue
-
-            # Compare input tensors
-            input_tensors1 = {
-                p.name: p
-                for p in modules1[module].get("inputs", [])
-            }
-            input_tensors2 = {
-                p.name: p
-                for p in modules2[module].get("inputs", [])
-            }
-            all_input_names = set(input_tensors1.keys()) | set(
-                input_tensors2.keys())
-
-            for tensor_name in sorted(all_input_names):
-                if tensor_name not in input_tensors1:
-                    module_report["inputs"][tensor_name] = {
-                        "match": False,
-                        "error": f"Tensor missing in {dir1}",
-                    }
-                    module_report["summary"]["missing_tensors"] += 1
-                elif tensor_name not in input_tensors2:
-                    module_report["inputs"][tensor_name] = {
-                        "match": False,
-                        "error": f"Tensor missing in {dir2}",
-                    }
-                    module_report["summary"]["missing_tensors"] += 1
-                else:
-                    tensor1 = load_tensor(input_tensors1[tensor_name])
-                    tensor2 = load_tensor(input_tensors2[tensor_name])
-
-                    comparison = compare_tensors(tensor1, tensor2, rtol, atol)
-                    # Add file paths for manual checking when there's a mismatch
-                    if not comparison.get("match", False):
-                        comparison["file1"] = str(input_tensors1[tensor_name])
-                        comparison["file2"] = str(input_tensors2[tensor_name])
-
-                    module_report["inputs"][tensor_name] = comparison
-
-                    if comparison.get("match", False):
-                        module_report["summary"]["matching_tensors"] += 1
-                    else:
-                        module_report["summary"]["mismatched_tensors"] += 1
-
-                module_report["summary"]["total_tensors"] += 1
-
-            # Compare output tensors
-            output_tensors1 = {
-                p.name: p
-                for p in modules1[module].get("outputs", [])
-            }
-            output_tensors2 = {
-                p.name: p
-                for p in modules2[module].get("outputs", [])
-            }
-            all_output_names = set(output_tensors1.keys()) | set(
-                output_tensors2.keys())
-
-            for tensor_name in sorted(all_output_names):
-                if tensor_name not in output_tensors1:
-                    module_report["outputs"][tensor_name] = {
-                        "match": False,
-                        "error": f"Tensor missing in {dir1}",
-                    }
-                    module_report["summary"]["missing_tensors"] += 1
-                elif tensor_name not in output_tensors2:
-                    module_report["outputs"][tensor_name] = {
-                        "match": False,
-                        "error": f"Tensor missing in {dir2}",
-                    }
-                    module_report["summary"]["missing_tensors"] += 1
-                else:
-                    tensor1 = load_tensor(output_tensors1[tensor_name])
-                    tensor2 = load_tensor(output_tensors2[tensor_name])
-
-                    comparison = compare_tensors(tensor1, tensor2, rtol, atol)
-                    # Add file paths for manual checking when there's a mismatch
-                    if not comparison.get("match", False):
-                        comparison["file1"] = str(output_tensors1[tensor_name])
-                        comparison["file2"] = str(output_tensors2[tensor_name])
-
-                    module_report["outputs"][tensor_name] = comparison
-
-                    if comparison.get("match", False):
-                        module_report["summary"]["matching_tensors"] += 1
-                    else:
-                        module_report["summary"]["mismatched_tensors"] += 1
-
-                module_report["summary"]["total_tensors"] += 1
-
-            # Update module status
-            if module_report["summary"]["mismatched_tensors"] > 0:
-                step_report["summary"]["mismatched_modules"] += 1
-            else:
-                step_report["summary"]["matching_modules"] += 1
-
-            step_report["summary"]["total_modules"] += 1
-            step_report["modules"][module] = module_report
-            step_report["module_call_list"].append(module)
-
-        report["steps"][step] = step_report
-
-    # Add overall summary
-    report["summary"] = {
-        "total_steps":
-        len(all_steps),
-        "total_modules":
-        sum(step_report["summary"]["total_modules"]
-            for step_report in report["steps"].values()),
-        "matching_modules":
-        sum(step_report["summary"]["matching_modules"]
-            for step_report in report["steps"].values()),
-        "mismatched_modules":
-        sum(step_report["summary"]["mismatched_modules"]
-            for step_report in report["steps"].values()),
-        "missing_modules":
-        sum(step_report["summary"]["missing_modules"]
-            for step_report in report["steps"].values()),
-        "total_tensors":
-        sum(module_report["summary"]["total_tensors"]
-            for step_report in report["steps"].values()
-            for module_name, module_report in step_report["modules"].items()
-            if "summary" in module_report),
-        "matching_tensors":
-        sum(module_report["summary"]["matching_tensors"]
-            for step_report in report["steps"].values()
-            for module_name, module_report in step_report["modules"].items()
-            if "summary" in module_report),
-        "mismatched_tensors":
-        sum(module_report["summary"]["mismatched_tensors"]
-            for step_report in report["steps"].values()
-            for module_name, module_report in step_report["modules"].items()
-            if "summary" in module_report),
-        "missing_tensors":
-        sum(module_report["summary"]["missing_tensors"]
-            for step_report in report["steps"].values()
-            for module_name, module_report in step_report["modules"].items()
-            if "summary" in module_report),
-    }
-
-    return report
-
-
-def generate_markdown_report(report: dict, verbose: bool = False) -> str:
-    """Generate a markdown report from the comparison results."""
-    lines = []
-
-    # Add header
-    lines.append("# Intermediate Logging Comparison Report")
-    lines.append("")
-    lines.append("Comparing intermediate logging outputs "
-                 "between:")
-    lines.append(f"- **Directory 1**: `{report['dir1']}`")
-    lines.append(f"- **Directory 2**: `{report['dir2']}`")
-    lines.append("")
-    lines.append("Comparison parameters:")
-    lines.append(f"- Relative tolerance (rtol): {report['rtol']}")
-    lines.append(f"- Absolute tolerance (atol): {report['atol']}")
-    lines.append("")
-
-    # Add overall summary
-    summary = report["summary"]
-    lines.append("## Overall Summary")
-    lines.append("")
-    lines.append("| Category | Total | Matching | Mismatched | Missing |")
-    lines.append("|----------|-------|----------|------------|---------|")
-    lines.append(f"| Steps | {summary['total_steps']} | - | - | - |")
-    lines.append(
-        f"| Modules | {summary['total_modules']} | "
-        f"{summary['matching_modules']} | {summary['mismatched_modules']} | "
-        f"{summary['missing_modules']} |")
-    lines.append(
-        f"| Tensors | {summary['total_tensors']} | "
-        f"{summary['matching_tensors']} | {summary['mismatched_tensors']} | "
-        f"{summary['missing_tensors']} |")
-    lines.append("")
-
-    # Add step details
-    for step_name, step_report in sorted(report["steps"].items()):
-        step_summary = step_report["summary"]
-
-        lines.append(f"## {step_name}")
-        lines.append("")
-        lines.append(
-            f"**Summary**: {step_summary['matching_modules']} matching "
-            f"modules, {step_summary['mismatched_modules']} mismatched "
-            f"modules, {step_summary['missing_modules']} missing modules")
-        lines.append("")
-
-        # Add module details
-        for module_name in step_report["module_call_list"]:
-            module_report = step_report["modules"][module_name]
-            if "error" in module_report:
-                lines.append(f"### ❌ {module_name}")
-                lines.append("")
-                lines.append(f"**Error**: {module_report['error']}")
-                lines.append("")
-                continue
-
-            module_summary = module_report["summary"]
-
-            # Determine module status
-            status = "❌" if module_summary["mismatched_tensors"] > 0 else "✅"
-
-            lines.append(f"### {status} {module_name}")
-            lines.append("")
-            lines.append(
-                f"**Summary**: {module_summary['matching_tensors']} matching "
-                f"tensors, {module_summary['mismatched_tensors']} mismatched "
-                f"tensors, {module_summary['missing_tensors']} missing tensors"
-            )
-            lines.append("")
-
-            # Add metadata comparison results if available
-            for metadata_type in ["inputs_metadata", "outputs_metadata"]:
-                if metadata_type in module_report:
-                    metadata_comparison = module_report[metadata_type]
-                    if not metadata_comparison.get("match", True):
-                        file_paths = ""
-                        if ("file1" in metadata_comparison
-                                and "file2" in metadata_comparison):
-                            file_paths = (
-                                f" - Files: "
-                                f"`{metadata_comparison['file1']}` "
-                                f"vs `{metadata_comparison['file2']}`")
-
-                        lines.append(
-                            f"**{metadata_type.capitalize()}**: Mismatch "
-                            f"detected{file_paths}")
-                        if verbose and "mismatches" in metadata_comparison:
-                            lines.append("```json")
-                            lines.append(
-                                json.dumps(metadata_comparison["mismatches"],
-                                           indent=2))
-                            lines.append("```")
-                        lines.append("")
-
-            # Add tensor comparison details
-            if module_summary["mismatched_tensors"] > 0 or verbose:
-                # Add input tensor details
-                if module_report["inputs"]:
-                    lines.append("#### Input Tensors")
-                    lines.append("")
-                    lines.append("| Tensor | Status | Details |")
-                    lines.append("|--------|--------|---------|")
-
-                    for tensor_name, comparison in sorted(
-                            module_report["inputs"].items()):
-                        if comparison.get("match", False):
-                            status = "✅"
-                            details = "Tensors match"
-                        elif "error" in comparison:
-                            status = "❌"
-                            details = comparison["error"]
-                        else:
-                            status = "❌"
-                            details = (
-                                f"Max abs diff: "
-                                f"{comparison.get('max_abs_diff', 'N/A')}, ")
-                            details += (
-                                f"Max relative diff: "
-                                f"{comparison.get('max_rel_diff', 'N/A')}, ")
-                            details += (
-                                f"Diff elements: "
-                                f"{comparison.get('num_diff_elements', 'N/A')}/"
-                                f"{comparison.get('total_elements', 'N/A')}")
-                            if "file1" in comparison and "file2" in comparison:
-                                details += (
-                                    f"<br>Files: `{comparison['file1']}` vs "
-                                    f"`{comparison['file2']}`")
-
-                        lines.append(
-                            f"| {tensor_name} | {status} | {details} |")
-
-                    lines.append("")
-
-                # Add output tensor details
-                if module_report["outputs"]:
-                    lines.append("#### Output Tensors")
-                    lines.append("")
-                    lines.append("| Tensor | Status | Details |")
-                    lines.append("|--------|--------|---------|")
-
-                    for tensor_name, comparison in sorted(
-                            module_report["outputs"].items()):
-                        if comparison.get("match", False):
-                            status = "✅"
-                            details = "Tensors match"
-                        elif "error" in comparison:
-                            status = "❌"
-                            details = comparison["error"]
-                        else:
-                            status = "❌"
-                            details = (
-                                f"Max abs diff: "
-                                f"{comparison.get('max_abs_diff', 'N/A')}, ")
-                            details += (
-                                f"Max relative diff: "
-                                f"{comparison.get('max_rel_diff', 'N/A')}, ")
-                            details += (
-                                f"Diff elements: "
-                                f"{comparison.get('num_diff_elements', 'N/A')}/"
-                                f"{comparison.get('total_elements', 'N/A')}")
-
-                        lines.append(
-                            f"| {tensor_name} | {status} | {details} |")
-
-                    lines.append("")
-
-    return "\n".join(lines)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description=
-        "Compare intermediate logging outputs from two different runs.")
-    parser.add_argument("--dir1",
-                        required=True,
-                        help="First intermediate logging directory")
-    parser.add_argument("--dir2",
-                        required=True,
-                        help="Second intermediate logging directory")
-    parser.add_argument("--output",
-                        help="Output file for the report (default: stdout)")
-    parser.add_argument(
-        "--rtol",
-        type=float,
-        default=None,
-        help="Relative tolerance for tensor comparison (default: 1e-5)",
-    )
-    parser.add_argument(
-        "--atol",
-        type=float,
-        default=None,
-        help="Absolute tolerance for tensor comparison (default: 1e-8)",
-    )
-    parser.add_argument(
-        "--steps",
-        help="Comma-separated list of steps to compare (default: all)")
-    parser.add_argument(
-        "--modules",
-        help="Comma-separated list of module name patterns to compare "
-        "(default: all)",
-    )
-    parser.add_argument(
-        "--verbose",
-        action="store_true",
-        help="Include detailed information about each tensor",
-    )
-
-    args = parser.parse_args()
-
-    # Parse steps and modules
-    steps = args.steps.split(",") if args.steps else None
-    module_patterns = args.modules.split(",") if args.modules else None
-
-    # Compare directories
-    report = compare_directories(
-        Path(args.dir1),
-        Path(args.dir2),
-        rtol=args.rtol,
-        atol=args.atol,
-        steps=steps,
-        module_patterns=module_patterns,
-    )
-
-    # Generate report
-    output = generate_markdown_report(report, verbose=args.verbose)
-
-    # Write report
-    if args.output:
-        with open(args.output, "w") as f:
-            f.write(output)
-            print(f"Report written to {args.output}")
-    else:
-        print(output)
-
-
-if __name__ == "__main__":
-    main()
-
-
-def invoke_main() -> None:
-    main()