accepted length code

2026-06-06 22:35:47 +08:00 · 2025-08-03 20:06:15 -07:00 · 2025-08-03 20:06:15 -07:00 · bc3b20f81f
commit bc3b20f81f
parent 54be44ee74
5 changed files with 286 additions and 57 deletions
--- a/benchmarks/run.sh
+++ b/benchmarks/run.sh
@ -66,6 +66,15 @@
 #     --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}'
 python benchmarks/benchmark_throughput.py \
    --model meta-llama/Meta-Llama-3.1-8B-Instruct\
    --dataset-name hf \
    --dataset-path philschmid/mt-bench  \
    --prefix-len 0 \
    --output-len 512 \
    --num-prompts 200 \
    --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
@ -119,18 +128,86 @@
 #     --dataset-name hf \
 #     --dataset-path AI-MO/aimo-validation-aime \
 #     --prefix-len 0 \
-#     --output-len 5120 \
+#     --output-len 1024 \
 #     --num-prompts 90 \
 #     --speculative_config '{"method": "eagle3", "num_speculative_tokens": 20, "model": "yuhuili/EAGLE3-DeepSeek-R1-Distill-LLaMA-8B"}'
-python benchmarks/benchmark_throughput.py \
+# python benchmarks/benchmark_throughput.py \
-    --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
+#     --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
-    --dataset-name hf \
+#     --dataset-name hf \
-    --dataset-path AI-MO/aimo-validation-aime \
+#     --dataset-path AI-MO/aimo-validation-aime \
-    --prefix-len 0 \
+#     --prefix-len 0 \
-    --output-len 5120 \
+#     --output-len 1024 \
-    --num-prompts 90 \
+#     --num-prompts 90 \
-    --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
+#     --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
 #     --dataset-name sharegpt \
 #     --dataset-path /data/lily/ShareGPT_V3_unfiltered_cleaned_split.json  \
 #     --prefix-len 0 \
 #     --output-len 512 \
 #     --num-prompts 200 \
 #     --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
 #     --dataset-name hf \
 #     --dataset-path philschmid/mt-bench \
 #     --prefix-len 0 \
 #     --output-len 512 \
 #     --num-prompts 200 \
 #     --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
 #     --dataset-name hf \
 #     --dataset-path philschmid/mt-bench \
 #     --prefix-len 0 \
 #     --output-len 512 \
 #     --num-prompts 200 \
 #     --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
 #     --dataset-name hf \
 #     --dataset-path abisee/cnn_dailymail \
 #     --prefix-len 0 \
 #     --output-len 512 \
 #     --num-prompts 200 \
 #     --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
 #     --dataset-name hf \
 #     --dataset-path abisee/cnn_dailymail \
 #     --prefix-len 0 \
 #     --output-len 512 \
 #     --num-prompts 200 \
 #     --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
 #     --dataset-name hf \
 #     --dataset-path philschmid/mt-bench \
 #     --prefix-len 0 \
 #     --output-len 512 \
 #     --num-prompts 10 \
 #     --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}'
 # python benchmarks/benchmark_throughput.py \
 #     --model meta-llama/Meta-Llama-3.1-8B-Instruct \
 #     --dataset-name hf \
 #     --dataset-path abisee/cnn_dailymail \
 #     --prefix-len 0 \
 #     --output-len 512 \
 #     --num-prompts 200 \
 #     --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
--- a/benchmarks/visualize/common.py
+++ b/benchmarks/visualize/common.py
@ -0,0 +1,63 @@
 import json
 from dataclasses import dataclass
 MODEL_TO_NAMES = {
    "r1-distill-llama-8B" : "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
    "llama3-8B" : "meta-llama/Meta-Llama-3-8B-Instruct",
    "llama3.1-8B" : "meta-llama/Llama-3.1-8B-Instruct",
    "llama3.1-70B" : "meta-llama/Llama-3.1-70B-Instruct",
 }
@dataclass
 class AccStats:
    lens: list[int]
    probs: list[float] = None
    entropies: list[float] = None
    def __post_init__(self):
        if self.probs is not None:
            assert len(self.lens) == len(self.probs), "Length of lens and probs must match"
        if self.entropies is not None:
            assert len(self.lens) == len(self.entropies), "Length of lens and entropies must match"
        # remove the prefill accepted lens
        self.lens = self.lens[1:]
        # remove the last proposed tokens
        if self.probs:
            self.probs = self.probs[:-1]
        if self.entropies:
            self.entropies = self.entropies[:-1]
    @property
    def length(self):
        return len(self.lens)
 # def cleanup(acc_stats: AccStats) -> 
 #     # Remove the prefill phase
 #     data = data[1:]
 #     # Cap the maximum value to 10
 #     data = [min(x, 10) for x in data]
 #     return data
 def load_data(datapath, tokenizer, verbose=False):
    acceptance_stats = []
    with open(datapath, "r") as f:
        lines = f.readlines()
        for line in lines:  
            data = json.loads(line)
            stat = AccStats(
                lens=data['acc']['acc_len'],
                probs=data['acc'].get('acc_prob', None),
                entropies=data['acc'].get('acc_entropy', None)
            )
            acceptance_stats.append(stat)
            if verbose:
                print("Input:", tokenizer.decode(data['prompt_token_ids']))
                print("Output:", tokenizer.decode(data['generated_token_ids']))
                print("=============================================")
    max_length = max(stats.length for stats in acceptance_stats)
    print(f"Load {len(acceptance_stats)} with max length {max_length}")
    return acceptance_stats
--- a/benchmarks/visualize/vis_acc.py
+++ b/benchmarks/visualize/vis_acc.py
@ -2,56 +2,107 @@ import json
 import seaborn as sns
 import matplotlib.pyplot as plt
 from transformers import AutoTokenizer
 from .common import MODEL_TO_NAMES, load_data
 import requests
 import os
 from pathlib import Path
 class AcceptanceStatsClient:
    """Client for fetching and processing acceptance statistics data."""
    def __init__(self, model_name, method, dataset, data_path=None):
        """Initialize the client with model and dataset info."""
        self.model_name = model_name
        self.method = method
        self.dataset = dataset
        if data_path is None:
            self.data_path = f"/data/lily/batch-sd/data/{model_name}/{method}_{dataset}_acceptance_stats.jsonl"
        else:
            self.data_path = data_path
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model_name], use_fast=False)
        self.acceptance_stats = None
    def load_data(self):
        """Load the acceptance statistics from file."""
        self.acceptance_stats = load_data(self.data_path, self.tokenizer)
        return self.acceptance_stats
    def plot_heatmap(self, output_dir="figures"):
        """Plot the acceptance statistics as a heatmap."""
        if self.acceptance_stats is None:
            self.load_data()
        fig, ax = plt.subplots(figsize=(12, 8))
        sns.heatmap(self.acceptance_stats, cmap="YlGnBu")
        plt.xlabel("Position")
        plt.ylabel("Request ID")
        # Add Y-axis labels on the right
        ax2 = ax.twinx()
        ax2.set_ylim(ax.get_ylim())
        ax2.set_yticks([])
        ax2.set_ylabel("# of Accepted Tokens", labelpad=10)
        plt.title(f"Acceptance Statistics: {self.model_name} - {self.method} - {self.dataset}")
        plt.tight_layout()
        # Create output directory if it doesn't exist
        output_path = Path(output_dir) / self.model_name
        os.makedirs(output_path, exist_ok=True)
        output_file = output_path / f"{self.method}_{self.dataset}_acceptance_stats.pdf"
        plt.savefig(output_file)
        print(f"Saved heatmap to {output_file}")
        return fig
    def get_summary_stats(self):
        """Get summary statistics about the acceptance data."""
        if self.acceptance_stats is None:
            self.load_data()
        # Calculate average acceptance rate for each position
        avg_by_position = [sum(col)/len(col) for col in zip(*self.acceptance_stats) if sum(1 for v in col if v >= 0) > 0]
        # Calculate average acceptance rate for each request
        avg_by_request = [sum(row)/len(row) for row in self.acceptance_stats]
        return {
            "total_requests": len(self.acceptance_stats),
            "max_position": len(avg_by_position),
            "avg_acceptance_rate": sum(avg_by_request)/len(avg_by_request),
            "avg_by_position": avg_by_position,
            "avg_by_request": avg_by_request
        }
-model = "r1-distill-llama-8B"
+# Example model configuration
-MODEL_TO_NAMES = {
+model = "llama3.1-8B"
-    "r1-distill-llama-8B" : "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+# model = "r1-distill-llama-8B"
-}
+method = "eagle3"
-method = "ngram"
+dataset = "mtbench"
-dataset = "aime"
+# dataset = "aime"
-datapath = f"/data/lily/batch-sd/data/{model}/{method}_{dataset}_acceptance_stats.jsonl"
+# method = "ngram"
 # dataset = "cnndailymail"
 # datapath = f"/data/lily/batch-sd/data/{model}/{method}_{dataset}_acceptance_stats.jsonl"
 datapath = "acceptance_stats.jsonl"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model], use_fast=False)
 def cleanup(data):
    # Remove the prefill phase
    data = data[1:]
    # Cap the maximum value to 10
    data = [min(x, 10) for x in data]
    return data
-def load_data(datapath):
+if __name__ == "__main__":
-    acceptance_stats = []
+    # Use the client instead of directly loading data
-    with open(datapath, "r") as f:
+    client = AcceptanceStatsClient(model, method, dataset, datapath)
-        lines = f.readlines()
+    acceptance_stats = client.load_data()
-        for line in lines:  
+    
-            data = json.loads(line)
+    # Get summary statistics
-            acceptance_stats.append(cleanup(data['acc']))
+    summary = client.get_summary_stats()
-            print("Input:", tokenizer.decode(data['prompt_token_ids']))
+    print("Summary Statistics:")
-            print("Output:", tokenizer.decode(data['generated_token_ids']))
+    print(f"Total Requests: {summary['total_requests']}")
-            print("=============================================")
+    print(f"Max Position: {summary['max_position']}")
-            
+    print(f"Average Acceptance Rate: {summary['avg_acceptance_rate']:.2f}")
    # Pad the acceptance stats to the same length
    max_length = max(len(stats) for stats in acceptance_stats)
    for i in range(len(acceptance_stats)):
        acceptance_stats[i] += [-2] * (max_length - len(acceptance_stats[i]))
    print(f"Load {len(acceptance_stats)} with max length {max_length}")
    return acceptance_stats
-acceptance_stats = load_data(datapath)
+    # Create heatmap visualization
    plot_heatmap = False
    if plot_heatmap:
        client.plot_heatmap()
 fig, ax = plt.subplots()
 sns.heatmap(acceptance_stats, cmap="YlGnBu")
 plt.xlabel("Position")
 plt.ylabel("Request ID")
 # Add Y-axis labels on the right
 ax2 = ax.twinx()
 ax2.set_ylim(ax.get_ylim())              # Match y-axis range
 ax2.set_yticks([])                       # Remove right tick marks if undesired
 ax2.set_ylabel("# of Accepted Tokens", labelpad=10)         # Set right y-axis label
 plt.tight_layout()
 plt.savefig(f"figures/{model}/{method}_{dataset}_acceptance_stats.png")
--- a/benchmarks/visualize/vis_acc_diff.py
+++ b/benchmarks/visualize/vis_acc_diff.py
@ -5,7 +5,7 @@ from matplotlib.colors import LinearSegmentedColormap
 model = "llama3.1-8B"
 dataset = "instructcode"
-method1 = "eagle"
+method1 = "ngram"
 method2 = "eagle3"
 def get_datapath(method):
@ -66,4 +66,4 @@ ax2.set_ylabel("# of Accepted Tokens", labelpad=10)         # Set right y-axis l
 plt.title(f"Diff between {method2} - {method1} acceptance stats for {dataset}")
 plt.tight_layout()
-plt.savefig(f"figures/{model}/diff_{method2}_{method1}_{dataset}_acceptance_stats.png")
+plt.savefig(f"figures/{model}/diff_{method2}_{method1}_{dataset}_acceptance_stats.pdf")
--- a/benchmarks/visualize/vis_prob_entropy.py
+++ b/benchmarks/visualize/vis_prob_entropy.py
@ -0,0 +1,38 @@
 from transformers import AutoTokenizer
 from common import MODEL_TO_NAMES, load_data
 import matplotlib.pyplot as plt
 def plot_prob_entropy(acceptance_stats, 
                    output_path):
    acc_probs = []
    rej_probs = []
    for stat in acceptance_stats:
        for i, acc_len in enumerate(stat.lens):
            acc_probs.extend(stat.probs[i][:acc_len-1])
            rej_probs.extend(stat.probs[i][acc_len-1:])
    fig, ax = plt.subplots(figsize=(12, 8))
    plt.hist(acc_probs, bins=100, alpha=0.5, 
             label='Accepted Probabilities', color='green')
    plt.hist(rej_probs, bins=100, alpha=0.5, 
             label='Rejected Probabilities', color='red')
    plt.xlabel('Probability')
    plt.ylabel('Frequency')
    plt.title('Distribution of Accepted and Rejected Probabilities')
    plt.legend()
    plt.tight_layout()
    plt.savefig(output_path)
 if __name__ == "__main__":
    datapath = "/data/lily/sd-benchmark-paper/batch-sd/acceptance_stats.jsonl"
    model = "llama3.1-8B"
    tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model], 
                                              use_fast=False)
    acceptance_stats = load_data(datapath, tokenizer)
    plot_prob_entropy(acceptance_stats, output_path="prob_entropy_figures")