From bc3b20f81f91d2fd447d06c9cae8f523983b9767 Mon Sep 17 00:00:00 2001 From: LiuXiaoxuanPKU Date: Sun, 3 Aug 2025 20:06:15 -0700 Subject: [PATCH] accepted length code --- benchmarks/run.sh | 95 +++++++++++++-- benchmarks/visualize/common.py | 63 ++++++++++ benchmarks/visualize/vis_acc.py | 143 +++++++++++++++-------- benchmarks/visualize/vis_acc_diff.py | 4 +- benchmarks/visualize/vis_prob_entropy.py | 38 ++++++ 5 files changed, 286 insertions(+), 57 deletions(-) create mode 100644 benchmarks/visualize/common.py create mode 100644 benchmarks/visualize/vis_prob_entropy.py diff --git a/benchmarks/run.sh b/benchmarks/run.sh index 8b35a237807a7..1deed20d1d26e 100644 --- a/benchmarks/run.sh +++ b/benchmarks/run.sh @@ -66,6 +66,15 @@ # --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' +python benchmarks/benchmark_throughput.py \ + --model meta-llama/Meta-Llama-3.1-8B-Instruct\ + --dataset-name hf \ + --dataset-path philschmid/mt-bench \ + --prefix-len 0 \ + --output-len 512 \ + --num-prompts 200 \ + --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' + # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3.1-8B-Instruct \ @@ -119,18 +128,86 @@ # --dataset-name hf \ # --dataset-path AI-MO/aimo-validation-aime \ # --prefix-len 0 \ -# --output-len 5120 \ +# --output-len 1024 \ # --num-prompts 90 \ # --speculative_config '{"method": "eagle3", "num_speculative_tokens": 20, "model": "yuhuili/EAGLE3-DeepSeek-R1-Distill-LLaMA-8B"}' -python benchmarks/benchmark_throughput.py \ - --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ - --dataset-name hf \ - --dataset-path AI-MO/aimo-validation-aime \ - --prefix-len 0 \ - --output-len 5120 \ - --num-prompts 90 \ - --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' +# python benchmarks/benchmark_throughput.py \ +# --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ +# --dataset-name hf \ +# --dataset-path AI-MO/aimo-validation-aime \ +# --prefix-len 0 \ +# --output-len 1024 \ +# --num-prompts 90 \ +# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' + + +# python benchmarks/benchmark_throughput.py \ +# --model meta-llama/Meta-Llama-3.1-8B-Instruct \ +# --dataset-name sharegpt \ +# --dataset-path /data/lily/ShareGPT_V3_unfiltered_cleaned_split.json \ +# --prefix-len 0 \ +# --output-len 512 \ +# --num-prompts 200 \ +# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' + + +# python benchmarks/benchmark_throughput.py \ +# --model meta-llama/Meta-Llama-3.1-8B-Instruct \ +# --dataset-name hf \ +# --dataset-path philschmid/mt-bench \ +# --prefix-len 0 \ +# --output-len 512 \ +# --num-prompts 200 \ +# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' + +# python benchmarks/benchmark_throughput.py \ +# --model meta-llama/Meta-Llama-3.1-8B-Instruct \ +# --dataset-name hf \ +# --dataset-path philschmid/mt-bench \ +# --prefix-len 0 \ +# --output-len 512 \ +# --num-prompts 200 \ +# --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}' + + +# python benchmarks/benchmark_throughput.py \ +# --model meta-llama/Meta-Llama-3.1-8B-Instruct \ +# --dataset-name hf \ +# --dataset-path abisee/cnn_dailymail \ +# --prefix-len 0 \ +# --output-len 512 \ +# --num-prompts 200 \ +# --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}' + +# python benchmarks/benchmark_throughput.py \ +# --model meta-llama/Meta-Llama-3.1-8B-Instruct \ +# --dataset-name hf \ +# --dataset-path abisee/cnn_dailymail \ +# --prefix-len 0 \ +# --output-len 512 \ +# --num-prompts 200 \ +# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' + + +# python benchmarks/benchmark_throughput.py \ +# --model meta-llama/Meta-Llama-3.1-8B-Instruct \ +# --dataset-name hf \ +# --dataset-path philschmid/mt-bench \ +# --prefix-len 0 \ +# --output-len 512 \ +# --num-prompts 10 \ +# --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' + + +# python benchmarks/benchmark_throughput.py \ +# --model meta-llama/Meta-Llama-3.1-8B-Instruct \ +# --dataset-name hf \ +# --dataset-path abisee/cnn_dailymail \ +# --prefix-len 0 \ +# --output-len 512 \ +# --num-prompts 200 \ +# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' diff --git a/benchmarks/visualize/common.py b/benchmarks/visualize/common.py new file mode 100644 index 0000000000000..42b3da815b0a2 --- /dev/null +++ b/benchmarks/visualize/common.py @@ -0,0 +1,63 @@ +import json +from dataclasses import dataclass + +MODEL_TO_NAMES = { + "r1-distill-llama-8B" : "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "llama3-8B" : "meta-llama/Meta-Llama-3-8B-Instruct", + "llama3.1-8B" : "meta-llama/Llama-3.1-8B-Instruct", + "llama3.1-70B" : "meta-llama/Llama-3.1-70B-Instruct", +} + +@dataclass +class AccStats: + lens: list[int] + probs: list[float] = None + entropies: list[float] = None + + def __post_init__(self): + if self.probs is not None: + assert len(self.lens) == len(self.probs), "Length of lens and probs must match" + if self.entropies is not None: + assert len(self.lens) == len(self.entropies), "Length of lens and entropies must match" + + # remove the prefill accepted lens + self.lens = self.lens[1:] + + # remove the last proposed tokens + if self.probs: + self.probs = self.probs[:-1] + if self.entropies: + self.entropies = self.entropies[:-1] + + @property + def length(self): + return len(self.lens) + +# def cleanup(acc_stats: AccStats) -> +# # Remove the prefill phase +# data = data[1:] +# # Cap the maximum value to 10 +# data = [min(x, 10) for x in data] +# return data + +def load_data(datapath, tokenizer, verbose=False): + acceptance_stats = [] + with open(datapath, "r") as f: + lines = f.readlines() + for line in lines: + data = json.loads(line) + stat = AccStats( + lens=data['acc']['acc_len'], + probs=data['acc'].get('acc_prob', None), + entropies=data['acc'].get('acc_entropy', None) + ) + acceptance_stats.append(stat) + if verbose: + print("Input:", tokenizer.decode(data['prompt_token_ids'])) + print("Output:", tokenizer.decode(data['generated_token_ids'])) + print("=============================================") + + max_length = max(stats.length for stats in acceptance_stats) + + print(f"Load {len(acceptance_stats)} with max length {max_length}") + return acceptance_stats diff --git a/benchmarks/visualize/vis_acc.py b/benchmarks/visualize/vis_acc.py index d28c13009f961..26146cbb97a91 100644 --- a/benchmarks/visualize/vis_acc.py +++ b/benchmarks/visualize/vis_acc.py @@ -2,56 +2,107 @@ import json import seaborn as sns import matplotlib.pyplot as plt from transformers import AutoTokenizer +from .common import MODEL_TO_NAMES, load_data +import requests +import os +from pathlib import Path +class AcceptanceStatsClient: + """Client for fetching and processing acceptance statistics data.""" + + def __init__(self, model_name, method, dataset, data_path=None): + """Initialize the client with model and dataset info.""" + self.model_name = model_name + self.method = method + self.dataset = dataset + + if data_path is None: + self.data_path = f"/data/lily/batch-sd/data/{model_name}/{method}_{dataset}_acceptance_stats.jsonl" + else: + self.data_path = data_path + + self.tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model_name], use_fast=False) + self.acceptance_stats = None + + def load_data(self): + """Load the acceptance statistics from file.""" + self.acceptance_stats = load_data(self.data_path, self.tokenizer) + return self.acceptance_stats + + def plot_heatmap(self, output_dir="figures"): + """Plot the acceptance statistics as a heatmap.""" + if self.acceptance_stats is None: + self.load_data() + + fig, ax = plt.subplots(figsize=(12, 8)) + sns.heatmap(self.acceptance_stats, cmap="YlGnBu") + plt.xlabel("Position") + plt.ylabel("Request ID") + + # Add Y-axis labels on the right + ax2 = ax.twinx() + ax2.set_ylim(ax.get_ylim()) + ax2.set_yticks([]) + ax2.set_ylabel("# of Accepted Tokens", labelpad=10) + + plt.title(f"Acceptance Statistics: {self.model_name} - {self.method} - {self.dataset}") + plt.tight_layout() + + # Create output directory if it doesn't exist + output_path = Path(output_dir) / self.model_name + os.makedirs(output_path, exist_ok=True) + + output_file = output_path / f"{self.method}_{self.dataset}_acceptance_stats.pdf" + plt.savefig(output_file) + print(f"Saved heatmap to {output_file}") + return fig + + def get_summary_stats(self): + """Get summary statistics about the acceptance data.""" + if self.acceptance_stats is None: + self.load_data() + + # Calculate average acceptance rate for each position + avg_by_position = [sum(col)/len(col) for col in zip(*self.acceptance_stats) if sum(1 for v in col if v >= 0) > 0] + + # Calculate average acceptance rate for each request + avg_by_request = [sum(row)/len(row) for row in self.acceptance_stats] + + return { + "total_requests": len(self.acceptance_stats), + "max_position": len(avg_by_position), + "avg_acceptance_rate": sum(avg_by_request)/len(avg_by_request), + "avg_by_position": avg_by_position, + "avg_by_request": avg_by_request + } -model = "r1-distill-llama-8B" -MODEL_TO_NAMES = { - "r1-distill-llama-8B" : "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" -} -method = "ngram" -dataset = "aime" -datapath = f"/data/lily/batch-sd/data/{model}/{method}_{dataset}_acceptance_stats.jsonl" +# Example model configuration +model = "llama3.1-8B" +# model = "r1-distill-llama-8B" +method = "eagle3" +dataset = "mtbench" +# dataset = "aime" +# method = "ngram" +# dataset = "cnndailymail" +# datapath = f"/data/lily/batch-sd/data/{model}/{method}_{dataset}_acceptance_stats.jsonl" +datapath = "acceptance_stats.jsonl" tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model], use_fast=False) -def cleanup(data): - # Remove the prefill phase - data = data[1:] - # Cap the maximum value to 10 - data = [min(x, 10) for x in data] - return data -def load_data(datapath): - acceptance_stats = [] - with open(datapath, "r") as f: - lines = f.readlines() - for line in lines: - data = json.loads(line) - acceptance_stats.append(cleanup(data['acc'])) - print("Input:", tokenizer.decode(data['prompt_token_ids'])) - print("Output:", tokenizer.decode(data['generated_token_ids'])) - print("=============================================") - - # Pad the acceptance stats to the same length - max_length = max(len(stats) for stats in acceptance_stats) - for i in range(len(acceptance_stats)): - acceptance_stats[i] += [-2] * (max_length - len(acceptance_stats[i])) - - print(f"Load {len(acceptance_stats)} with max length {max_length}") - return acceptance_stats +if __name__ == "__main__": + # Use the client instead of directly loading data + client = AcceptanceStatsClient(model, method, dataset, datapath) + acceptance_stats = client.load_data() + + # Get summary statistics + summary = client.get_summary_stats() + print("Summary Statistics:") + print(f"Total Requests: {summary['total_requests']}") + print(f"Max Position: {summary['max_position']}") + print(f"Average Acceptance Rate: {summary['avg_acceptance_rate']:.2f}") -acceptance_stats = load_data(datapath) + # Create heatmap visualization + plot_heatmap = False + if plot_heatmap: + client.plot_heatmap() - -fig, ax = plt.subplots() -sns.heatmap(acceptance_stats, cmap="YlGnBu") -plt.xlabel("Position") -plt.ylabel("Request ID") -# Add Y-axis labels on the right -ax2 = ax.twinx() -ax2.set_ylim(ax.get_ylim()) # Match y-axis range -ax2.set_yticks([]) # Remove right tick marks if undesired -ax2.set_ylabel("# of Accepted Tokens", labelpad=10) # Set right y-axis label - - -plt.tight_layout() -plt.savefig(f"figures/{model}/{method}_{dataset}_acceptance_stats.png") diff --git a/benchmarks/visualize/vis_acc_diff.py b/benchmarks/visualize/vis_acc_diff.py index 1b45d4ccd44e6..a9044ea226a10 100644 --- a/benchmarks/visualize/vis_acc_diff.py +++ b/benchmarks/visualize/vis_acc_diff.py @@ -5,7 +5,7 @@ from matplotlib.colors import LinearSegmentedColormap model = "llama3.1-8B" dataset = "instructcode" -method1 = "eagle" +method1 = "ngram" method2 = "eagle3" def get_datapath(method): @@ -66,4 +66,4 @@ ax2.set_ylabel("# of Accepted Tokens", labelpad=10) # Set right y-axis l plt.title(f"Diff between {method2} - {method1} acceptance stats for {dataset}") plt.tight_layout() -plt.savefig(f"figures/{model}/diff_{method2}_{method1}_{dataset}_acceptance_stats.png") +plt.savefig(f"figures/{model}/diff_{method2}_{method1}_{dataset}_acceptance_stats.pdf") diff --git a/benchmarks/visualize/vis_prob_entropy.py b/benchmarks/visualize/vis_prob_entropy.py new file mode 100644 index 0000000000000..70411fce3151b --- /dev/null +++ b/benchmarks/visualize/vis_prob_entropy.py @@ -0,0 +1,38 @@ +from transformers import AutoTokenizer +from common import MODEL_TO_NAMES, load_data +import matplotlib.pyplot as plt + + +def plot_prob_entropy(acceptance_stats, + output_path): + + acc_probs = [] + rej_probs = [] + for stat in acceptance_stats: + for i, acc_len in enumerate(stat.lens): + acc_probs.extend(stat.probs[i][:acc_len-1]) + rej_probs.extend(stat.probs[i][acc_len-1:]) + + fig, ax = plt.subplots(figsize=(12, 8)) + plt.hist(acc_probs, bins=100, alpha=0.5, + label='Accepted Probabilities', color='green') + plt.hist(rej_probs, bins=100, alpha=0.5, + label='Rejected Probabilities', color='red') + plt.xlabel('Probability') + plt.ylabel('Frequency') + plt.title('Distribution of Accepted and Rejected Probabilities') + plt.legend() + plt.tight_layout() + plt.savefig(output_path) + + +if __name__ == "__main__": + datapath = "/data/lily/sd-benchmark-paper/batch-sd/acceptance_stats.jsonl" + model = "llama3.1-8B" + tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model], + use_fast=False) + acceptance_stats = load_data(datapath, tokenizer) + plot_prob_entropy(acceptance_stats, output_path="prob_entropy_figures") + + +