remove feature for il_tool_compare

Signed-off-by: Lucia Fang <fanglu@fb.com>
This commit is contained in:
Lucia Fang 2025-08-05 09:47:16 -07:00
parent ab4ab0fd28
commit ba90794ff1
2 changed files with 1 additions and 697 deletions

View File

@ -12,8 +12,6 @@ The intermediate tensor logging feature enables you to:
- Filter tensors by device
- Filter whole model fwd step id
This is manily useful for debugging model accucacy gaps with 2 runs
## Usage
### Enabling via parameters or config file
@ -80,56 +78,4 @@ When you enable intermediate logging, the system creates a timestamped directory
└── ...
```
Each tensor is saved in two formats:
1. `.json` files containing metadata and small tensor values
2. `.pt` files containing the full PyTorch tensors (can be loaded with `torch.load()`)
## Comparing Intermediate Logging Results
vLLM provides a tool called `compare_intermediate.py` to compare intermediate tensors between two different runs. This is particularly useful for debugging accuracy differences or verifying that code changes don't affect model outputs.
### Usage
```bash
python tools/compare_intermediate.py --dir1 /path/to/first/log/dir --dir2 /path/to/second/log/dir [options]
```
### Options
| Option | Description | Default |
|--------|-------------|---------|
| `--dir1` | First intermediate logging directory | (required) |
| `--dir2` | Second intermediate logging directory | (required) |
| `--output` | Output file for the report | stdout |
| `--rtol` | Relative tolerance for tensor comparison | 1e-5 |
| `--atol` | Absolute tolerance for tensor comparison | 1e-8 |
| `--steps` | Comma-separated list of steps to compare | all |
| `--modules` | Comma-separated list of module name patterns to compare | all |
| `--verbose` | Include detailed information about each tensor | false |
### Example
```bash
# Compare all tensors from two different runs
python tools/compare_intermediate.py --dir1 /tmp/vllm_intermediates/run1 --dir2 /tmp/vllm_intermediates/run2
# Compare only specific modules and steps with custom tolerance
python tools/compare_intermediate.py \
--dir1 /tmp/vllm_intermediates/run1 \
--dir2 /tmp/vllm_intermediates/run2 \
--steps 0,1 \
--modules ".*attention.*,.*mlp.*" \
--rtol 1e-4 \
--atol 1e-7 \
--output comparison_report.md
```
### Output
The tool generates a detailed markdown report that includes:
- Overall summary of matching and mismatched tensors
- Per-module comparison results
- Detailed tensor differences (when using `--verbose`)
This makes it easy to identify which specific tensors differ between runs and by how much.
Each tensor is saved in a `.pt` file containing the full PyTorch tensors (can be loaded with `torch.load()`)

View File

@ -1,642 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Script to compare intermediate logging outputs from two different runs.
This script compares the tensor outputs from two different intermediate logging
directories and generates a report of the differences.
Usage:
python compare_intermediate.py --dir1 /path/to/first/log/dir \
--dir2 /path/to/second/log/dir [options]
Options:
--dir1 DIR First intermediate logging directory
--dir2 DIR Second intermediate logging directory
--output FILE Output file for the report (default: stdout)
--rtol FLOAT Relative tolerance for tensor comparison
(default: 1e-5)
--atol FLOAT Absolute tolerance for tensor comparison
(default: 1e-8)
--steps STEPS Comma-separated list of steps to compare (default: all)
--modules MODULES Comma-separated list of module name patterns to compare
(default: all)
--verbose Include detailed information about each tensor
"""
import argparse
import json
from collections import defaultdict
from pathlib import Path
from typing import Optional
import regex as re
import torch
def load_tensor(path: Path) -> torch.Tensor:
"""Load a tensor from a .pt file."""
try:
return torch.load(path, map_location="cpu")
except Exception as e:
print(f"Error loading tensor from {path}: {e}")
return None
def extract_diff_metatada(exception_str: str) -> dict:
try:
num_diff_elements = int(
re.search(r"Mismatched elements: (\d+) /", exception_str).group(1))
total_elements = int(
re.search(r"Mismatched elements: \d+ / (\d+)",
exception_str).group(1))
max_abs_diff = float(
re.search(r"Greatest absolute difference: ([\d\.e-]+)",
exception_str).group(1))
max_rel_diff = float(
re.search(r"Greatest relative difference: ([\d\.e-]+)",
exception_str).group(1))
return {
"num_diff_elements": num_diff_elements,
"total_elements": total_elements,
"max_abs_diff": max_abs_diff,
"max_rel_diff": max_rel_diff,
}
except Exception:
return {"error": exception_str}
def compare_tensors(tensor1: torch.Tensor, tensor2: torch.Tensor, rtol: float,
atol: float) -> dict:
"""Compare two tensors and return a dictionary with comparison results."""
if tensor1 is None or tensor2 is None:
return {"match": False, "error": "One or both tensors are None"}
if tensor1.shape != tensor2.shape:
return {
"match": False,
"error": f"Shape mismatch: {tensor1.shape} vs {tensor2.shape}",
}
if tensor1.dtype != tensor2.dtype:
return {
"match": False,
"error": f"Dtype mismatch: {tensor1.dtype} vs {tensor2.dtype}",
}
# Check if tensors are close using PyTorch's assert_close
try:
torch.testing.assert_close(tensor1, tensor2, rtol=rtol, atol=atol)
except Exception as e:
return {"match": False, **extract_diff_metatada(str(e))}
return {"match": True}
def find_tensor_files(
directory: Path) -> dict[str, dict[str, dict[str, list[Path]]]]:
"""
Find all tensor files in the given directory.
Returns a dictionary with the structure:
{
"step_0": {
"module_name_123456": {
"inputs": [Path("inputs_0_cuda_0.pt"), ...],
"outputs": [Path("output_cuda_0.pt"), ...]
},
...
},
...
}
"""
result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# Find all step directories
step_dirs = [d for d in directory.glob("step_*") if d.is_dir()]
for step_dir in step_dirs:
step_name = step_dir.name
# Find all module directories
module_dirs = [d for d in step_dir.glob("*") if d.is_dir()]
for module_dir in module_dirs:
module_name = module_dir.name
# Find input tensor files
input_tensors = list(module_dir.glob("inputs_*.pt"))
if input_tensors:
result[step_name][module_name]["inputs"] = input_tensors
# Find output tensor files
output_tensors = list(module_dir.glob("output*.pt"))
if output_tensors:
result[step_name][module_name]["outputs"] = output_tensors
return result
def filter_steps_and_modules(
tensor_files: dict[str, dict[str, dict[str, list[Path]]]],
steps: Optional[list[str]] = None,
module_patterns: Optional[list[str]] = None,
) -> dict[str, dict[str, dict[str, list[Path]]]]:
"""Filter tensor files by steps and module patterns."""
result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# Filter steps
if steps:
step_names = [f"step_{step}" for step in steps]
steps_to_include = {step: True for step in step_names}
else:
steps_to_include = {step: True for step in tensor_files}
# Compile module patterns
if module_patterns:
compiled_patterns = [
re.compile(pattern) for pattern in module_patterns
]
else:
compiled_patterns = None
for step_name, modules in tensor_files.items():
if step_name not in steps_to_include:
continue
for module_name, file_types in modules.items():
# Check if module matches any pattern
if compiled_patterns and not any(
pattern.search(module_name)
for pattern in compiled_patterns):
continue
result[step_name][module_name] = file_types
return result
def compare_directories(
dir1: Path,
dir2: Path,
rtol: Optional[float] = None,
atol: Optional[float] = None,
steps: Optional[list[str]] = None,
module_patterns: Optional[list[str]] = None,
) -> dict:
"""Compare two intermediate logging directories and return a report."""
# Find tensor files in both directories
tensor_files1 = find_tensor_files(dir1)
tensor_files2 = find_tensor_files(dir2)
# Filter by steps and modules
if steps or module_patterns:
tensor_files1 = filter_steps_and_modules(tensor_files1, steps,
module_patterns)
tensor_files2 = filter_steps_and_modules(tensor_files2, steps,
module_patterns)
# Get all steps and modules from both directories
all_steps = set(tensor_files1.keys()) | set(tensor_files2.keys())
report = {
"dir1": str(dir1),
"dir2": str(dir2),
"rtol": rtol,
"atol": atol,
"steps": {},
}
# Compare each step
for step in sorted(all_steps):
step_report = {
"modules": {},
"summary": {
"total_modules": 0,
"matching_modules": 0,
"mismatched_modules": 0,
"missing_modules": 0,
},
}
# Get all modules from both directories for this step
modules1 = tensor_files1.get(step, {})
modules2 = tensor_files2.get(step, {})
# TODO: read from module calls.txt to get the full module list
# TODO: check if module calls txt exsits
dir1_module_call_file = dir1 / step / "module_calls.txt"
if dir1_module_call_file.exists():
with open(dir1 / step / "module_calls.txt") as f:
all_modules = f.read().splitlines()
else:
print(
"Warnings: the module call orders are missed, ordering using "
"module alphbetics")
all_modules = sorted(set(modules1.keys()) | set(modules2.keys()))
step_report["module_call_list"] = []
for module in all_modules:
module_report = {
"inputs": {},
"outputs": {},
"summary": {
"total_tensors": 0,
"matching_tensors": 0,
"mismatched_tensors": 0,
"missing_tensors": 0,
},
}
# Check if module exists in both directories
if module not in modules1:
module_report["error"] = f"Module missing in {dir1}"
step_report["summary"]["missing_modules"] += 1
step_report["modules"][module] = module_report
continue
if module not in modules2:
module_report["error"] = f"Module missing in {dir2}"
step_report["summary"]["missing_modules"] += 1
step_report["modules"][module] = module_report
continue
# Compare input tensors
input_tensors1 = {
p.name: p
for p in modules1[module].get("inputs", [])
}
input_tensors2 = {
p.name: p
for p in modules2[module].get("inputs", [])
}
all_input_names = set(input_tensors1.keys()) | set(
input_tensors2.keys())
for tensor_name in sorted(all_input_names):
if tensor_name not in input_tensors1:
module_report["inputs"][tensor_name] = {
"match": False,
"error": f"Tensor missing in {dir1}",
}
module_report["summary"]["missing_tensors"] += 1
elif tensor_name not in input_tensors2:
module_report["inputs"][tensor_name] = {
"match": False,
"error": f"Tensor missing in {dir2}",
}
module_report["summary"]["missing_tensors"] += 1
else:
tensor1 = load_tensor(input_tensors1[tensor_name])
tensor2 = load_tensor(input_tensors2[tensor_name])
comparison = compare_tensors(tensor1, tensor2, rtol, atol)
# Add file paths for manual checking when there's a mismatch
if not comparison.get("match", False):
comparison["file1"] = str(input_tensors1[tensor_name])
comparison["file2"] = str(input_tensors2[tensor_name])
module_report["inputs"][tensor_name] = comparison
if comparison.get("match", False):
module_report["summary"]["matching_tensors"] += 1
else:
module_report["summary"]["mismatched_tensors"] += 1
module_report["summary"]["total_tensors"] += 1
# Compare output tensors
output_tensors1 = {
p.name: p
for p in modules1[module].get("outputs", [])
}
output_tensors2 = {
p.name: p
for p in modules2[module].get("outputs", [])
}
all_output_names = set(output_tensors1.keys()) | set(
output_tensors2.keys())
for tensor_name in sorted(all_output_names):
if tensor_name not in output_tensors1:
module_report["outputs"][tensor_name] = {
"match": False,
"error": f"Tensor missing in {dir1}",
}
module_report["summary"]["missing_tensors"] += 1
elif tensor_name not in output_tensors2:
module_report["outputs"][tensor_name] = {
"match": False,
"error": f"Tensor missing in {dir2}",
}
module_report["summary"]["missing_tensors"] += 1
else:
tensor1 = load_tensor(output_tensors1[tensor_name])
tensor2 = load_tensor(output_tensors2[tensor_name])
comparison = compare_tensors(tensor1, tensor2, rtol, atol)
# Add file paths for manual checking when there's a mismatch
if not comparison.get("match", False):
comparison["file1"] = str(output_tensors1[tensor_name])
comparison["file2"] = str(output_tensors2[tensor_name])
module_report["outputs"][tensor_name] = comparison
if comparison.get("match", False):
module_report["summary"]["matching_tensors"] += 1
else:
module_report["summary"]["mismatched_tensors"] += 1
module_report["summary"]["total_tensors"] += 1
# Update module status
if module_report["summary"]["mismatched_tensors"] > 0:
step_report["summary"]["mismatched_modules"] += 1
else:
step_report["summary"]["matching_modules"] += 1
step_report["summary"]["total_modules"] += 1
step_report["modules"][module] = module_report
step_report["module_call_list"].append(module)
report["steps"][step] = step_report
# Add overall summary
report["summary"] = {
"total_steps":
len(all_steps),
"total_modules":
sum(step_report["summary"]["total_modules"]
for step_report in report["steps"].values()),
"matching_modules":
sum(step_report["summary"]["matching_modules"]
for step_report in report["steps"].values()),
"mismatched_modules":
sum(step_report["summary"]["mismatched_modules"]
for step_report in report["steps"].values()),
"missing_modules":
sum(step_report["summary"]["missing_modules"]
for step_report in report["steps"].values()),
"total_tensors":
sum(module_report["summary"]["total_tensors"]
for step_report in report["steps"].values()
for module_name, module_report in step_report["modules"].items()
if "summary" in module_report),
"matching_tensors":
sum(module_report["summary"]["matching_tensors"]
for step_report in report["steps"].values()
for module_name, module_report in step_report["modules"].items()
if "summary" in module_report),
"mismatched_tensors":
sum(module_report["summary"]["mismatched_tensors"]
for step_report in report["steps"].values()
for module_name, module_report in step_report["modules"].items()
if "summary" in module_report),
"missing_tensors":
sum(module_report["summary"]["missing_tensors"]
for step_report in report["steps"].values()
for module_name, module_report in step_report["modules"].items()
if "summary" in module_report),
}
return report
def generate_markdown_report(report: dict, verbose: bool = False) -> str:
"""Generate a markdown report from the comparison results."""
lines = []
# Add header
lines.append("# Intermediate Logging Comparison Report")
lines.append("")
lines.append("Comparing intermediate logging outputs "
"between:")
lines.append(f"- **Directory 1**: `{report['dir1']}`")
lines.append(f"- **Directory 2**: `{report['dir2']}`")
lines.append("")
lines.append("Comparison parameters:")
lines.append(f"- Relative tolerance (rtol): {report['rtol']}")
lines.append(f"- Absolute tolerance (atol): {report['atol']}")
lines.append("")
# Add overall summary
summary = report["summary"]
lines.append("## Overall Summary")
lines.append("")
lines.append("| Category | Total | Matching | Mismatched | Missing |")
lines.append("|----------|-------|----------|------------|---------|")
lines.append(f"| Steps | {summary['total_steps']} | - | - | - |")
lines.append(
f"| Modules | {summary['total_modules']} | "
f"{summary['matching_modules']} | {summary['mismatched_modules']} | "
f"{summary['missing_modules']} |")
lines.append(
f"| Tensors | {summary['total_tensors']} | "
f"{summary['matching_tensors']} | {summary['mismatched_tensors']} | "
f"{summary['missing_tensors']} |")
lines.append("")
# Add step details
for step_name, step_report in sorted(report["steps"].items()):
step_summary = step_report["summary"]
lines.append(f"## {step_name}")
lines.append("")
lines.append(
f"**Summary**: {step_summary['matching_modules']} matching "
f"modules, {step_summary['mismatched_modules']} mismatched "
f"modules, {step_summary['missing_modules']} missing modules")
lines.append("")
# Add module details
for module_name in step_report["module_call_list"]:
module_report = step_report["modules"][module_name]
if "error" in module_report:
lines.append(f"### ❌ {module_name}")
lines.append("")
lines.append(f"**Error**: {module_report['error']}")
lines.append("")
continue
module_summary = module_report["summary"]
# Determine module status
status = "" if module_summary["mismatched_tensors"] > 0 else ""
lines.append(f"### {status} {module_name}")
lines.append("")
lines.append(
f"**Summary**: {module_summary['matching_tensors']} matching "
f"tensors, {module_summary['mismatched_tensors']} mismatched "
f"tensors, {module_summary['missing_tensors']} missing tensors"
)
lines.append("")
# Add metadata comparison results if available
for metadata_type in ["inputs_metadata", "outputs_metadata"]:
if metadata_type in module_report:
metadata_comparison = module_report[metadata_type]
if not metadata_comparison.get("match", True):
file_paths = ""
if ("file1" in metadata_comparison
and "file2" in metadata_comparison):
file_paths = (
f" - Files: "
f"`{metadata_comparison['file1']}` "
f"vs `{metadata_comparison['file2']}`")
lines.append(
f"**{metadata_type.capitalize()}**: Mismatch "
f"detected{file_paths}")
if verbose and "mismatches" in metadata_comparison:
lines.append("```json")
lines.append(
json.dumps(metadata_comparison["mismatches"],
indent=2))
lines.append("```")
lines.append("")
# Add tensor comparison details
if module_summary["mismatched_tensors"] > 0 or verbose:
# Add input tensor details
if module_report["inputs"]:
lines.append("#### Input Tensors")
lines.append("")
lines.append("| Tensor | Status | Details |")
lines.append("|--------|--------|---------|")
for tensor_name, comparison in sorted(
module_report["inputs"].items()):
if comparison.get("match", False):
status = ""
details = "Tensors match"
elif "error" in comparison:
status = ""
details = comparison["error"]
else:
status = ""
details = (
f"Max abs diff: "
f"{comparison.get('max_abs_diff', 'N/A')}, ")
details += (
f"Max relative diff: "
f"{comparison.get('max_rel_diff', 'N/A')}, ")
details += (
f"Diff elements: "
f"{comparison.get('num_diff_elements', 'N/A')}/"
f"{comparison.get('total_elements', 'N/A')}")
if "file1" in comparison and "file2" in comparison:
details += (
f"<br>Files: `{comparison['file1']}` vs "
f"`{comparison['file2']}`")
lines.append(
f"| {tensor_name} | {status} | {details} |")
lines.append("")
# Add output tensor details
if module_report["outputs"]:
lines.append("#### Output Tensors")
lines.append("")
lines.append("| Tensor | Status | Details |")
lines.append("|--------|--------|---------|")
for tensor_name, comparison in sorted(
module_report["outputs"].items()):
if comparison.get("match", False):
status = ""
details = "Tensors match"
elif "error" in comparison:
status = ""
details = comparison["error"]
else:
status = ""
details = (
f"Max abs diff: "
f"{comparison.get('max_abs_diff', 'N/A')}, ")
details += (
f"Max relative diff: "
f"{comparison.get('max_rel_diff', 'N/A')}, ")
details += (
f"Diff elements: "
f"{comparison.get('num_diff_elements', 'N/A')}/"
f"{comparison.get('total_elements', 'N/A')}")
lines.append(
f"| {tensor_name} | {status} | {details} |")
lines.append("")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description=
"Compare intermediate logging outputs from two different runs.")
parser.add_argument("--dir1",
required=True,
help="First intermediate logging directory")
parser.add_argument("--dir2",
required=True,
help="Second intermediate logging directory")
parser.add_argument("--output",
help="Output file for the report (default: stdout)")
parser.add_argument(
"--rtol",
type=float,
default=None,
help="Relative tolerance for tensor comparison (default: 1e-5)",
)
parser.add_argument(
"--atol",
type=float,
default=None,
help="Absolute tolerance for tensor comparison (default: 1e-8)",
)
parser.add_argument(
"--steps",
help="Comma-separated list of steps to compare (default: all)")
parser.add_argument(
"--modules",
help="Comma-separated list of module name patterns to compare "
"(default: all)",
)
parser.add_argument(
"--verbose",
action="store_true",
help="Include detailed information about each tensor",
)
args = parser.parse_args()
# Parse steps and modules
steps = args.steps.split(",") if args.steps else None
module_patterns = args.modules.split(",") if args.modules else None
# Compare directories
report = compare_directories(
Path(args.dir1),
Path(args.dir2),
rtol=args.rtol,
atol=args.atol,
steps=steps,
module_patterns=module_patterns,
)
# Generate report
output = generate_markdown_report(report, verbose=args.verbose)
# Write report
if args.output:
with open(args.output, "w") as f:
f.write(output)
print(f"Report written to {args.output}")
else:
print(output)
if __name__ == "__main__":
main()
def invoke_main() -> None:
main()