mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-06 22:35:47 +08:00
accepted length code
This commit is contained in:
parent
54be44ee74
commit
bc3b20f81f
@ -66,6 +66,15 @@
|
|||||||
# --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}'
|
# --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}'
|
||||||
|
|
||||||
|
|
||||||
|
python benchmarks/benchmark_throughput.py \
|
||||||
|
--model meta-llama/Meta-Llama-3.1-8B-Instruct\
|
||||||
|
--dataset-name hf \
|
||||||
|
--dataset-path philschmid/mt-bench \
|
||||||
|
--prefix-len 0 \
|
||||||
|
--output-len 512 \
|
||||||
|
--num-prompts 200 \
|
||||||
|
--speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}'
|
||||||
|
|
||||||
|
|
||||||
# python benchmarks/benchmark_throughput.py \
|
# python benchmarks/benchmark_throughput.py \
|
||||||
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
@ -119,18 +128,86 @@
|
|||||||
# --dataset-name hf \
|
# --dataset-name hf \
|
||||||
# --dataset-path AI-MO/aimo-validation-aime \
|
# --dataset-path AI-MO/aimo-validation-aime \
|
||||||
# --prefix-len 0 \
|
# --prefix-len 0 \
|
||||||
# --output-len 5120 \
|
# --output-len 1024 \
|
||||||
# --num-prompts 90 \
|
# --num-prompts 90 \
|
||||||
# --speculative_config '{"method": "eagle3", "num_speculative_tokens": 20, "model": "yuhuili/EAGLE3-DeepSeek-R1-Distill-LLaMA-8B"}'
|
# --speculative_config '{"method": "eagle3", "num_speculative_tokens": 20, "model": "yuhuili/EAGLE3-DeepSeek-R1-Distill-LLaMA-8B"}'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
python benchmarks/benchmark_throughput.py \
|
# python benchmarks/benchmark_throughput.py \
|
||||||
--model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
|
# --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
|
||||||
--dataset-name hf \
|
# --dataset-name hf \
|
||||||
--dataset-path AI-MO/aimo-validation-aime \
|
# --dataset-path AI-MO/aimo-validation-aime \
|
||||||
--prefix-len 0 \
|
# --prefix-len 0 \
|
||||||
--output-len 5120 \
|
# --output-len 1024 \
|
||||||
--num-prompts 90 \
|
# --num-prompts 90 \
|
||||||
--speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
|
# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# python benchmarks/benchmark_throughput.py \
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
|
# --dataset-name sharegpt \
|
||||||
|
# --dataset-path /data/lily/ShareGPT_V3_unfiltered_cleaned_split.json \
|
||||||
|
# --prefix-len 0 \
|
||||||
|
# --output-len 512 \
|
||||||
|
# --num-prompts 200 \
|
||||||
|
# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
|
||||||
|
|
||||||
|
|
||||||
|
# python benchmarks/benchmark_throughput.py \
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
|
# --dataset-name hf \
|
||||||
|
# --dataset-path philschmid/mt-bench \
|
||||||
|
# --prefix-len 0 \
|
||||||
|
# --output-len 512 \
|
||||||
|
# --num-prompts 200 \
|
||||||
|
# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
|
||||||
|
|
||||||
|
# python benchmarks/benchmark_throughput.py \
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
|
# --dataset-name hf \
|
||||||
|
# --dataset-path philschmid/mt-bench \
|
||||||
|
# --prefix-len 0 \
|
||||||
|
# --output-len 512 \
|
||||||
|
# --num-prompts 200 \
|
||||||
|
# --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}'
|
||||||
|
|
||||||
|
|
||||||
|
# python benchmarks/benchmark_throughput.py \
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
|
# --dataset-name hf \
|
||||||
|
# --dataset-path abisee/cnn_dailymail \
|
||||||
|
# --prefix-len 0 \
|
||||||
|
# --output-len 512 \
|
||||||
|
# --num-prompts 200 \
|
||||||
|
# --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}'
|
||||||
|
|
||||||
|
# python benchmarks/benchmark_throughput.py \
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
|
# --dataset-name hf \
|
||||||
|
# --dataset-path abisee/cnn_dailymail \
|
||||||
|
# --prefix-len 0 \
|
||||||
|
# --output-len 512 \
|
||||||
|
# --num-prompts 200 \
|
||||||
|
# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
|
||||||
|
|
||||||
|
|
||||||
|
# python benchmarks/benchmark_throughput.py \
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
|
# --dataset-name hf \
|
||||||
|
# --dataset-path philschmid/mt-bench \
|
||||||
|
# --prefix-len 0 \
|
||||||
|
# --output-len 512 \
|
||||||
|
# --num-prompts 10 \
|
||||||
|
# --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}'
|
||||||
|
|
||||||
|
|
||||||
|
# python benchmarks/benchmark_throughput.py \
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||||
|
# --dataset-name hf \
|
||||||
|
# --dataset-path abisee/cnn_dailymail \
|
||||||
|
# --prefix-len 0 \
|
||||||
|
# --output-len 512 \
|
||||||
|
# --num-prompts 200 \
|
||||||
|
# --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'
|
||||||
|
|||||||
63
benchmarks/visualize/common.py
Normal file
63
benchmarks/visualize/common.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
MODEL_TO_NAMES = {
|
||||||
|
"r1-distill-llama-8B" : "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
||||||
|
"llama3-8B" : "meta-llama/Meta-Llama-3-8B-Instruct",
|
||||||
|
"llama3.1-8B" : "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
"llama3.1-70B" : "meta-llama/Llama-3.1-70B-Instruct",
|
||||||
|
}
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AccStats:
|
||||||
|
lens: list[int]
|
||||||
|
probs: list[float] = None
|
||||||
|
entropies: list[float] = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.probs is not None:
|
||||||
|
assert len(self.lens) == len(self.probs), "Length of lens and probs must match"
|
||||||
|
if self.entropies is not None:
|
||||||
|
assert len(self.lens) == len(self.entropies), "Length of lens and entropies must match"
|
||||||
|
|
||||||
|
# remove the prefill accepted lens
|
||||||
|
self.lens = self.lens[1:]
|
||||||
|
|
||||||
|
# remove the last proposed tokens
|
||||||
|
if self.probs:
|
||||||
|
self.probs = self.probs[:-1]
|
||||||
|
if self.entropies:
|
||||||
|
self.entropies = self.entropies[:-1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def length(self):
|
||||||
|
return len(self.lens)
|
||||||
|
|
||||||
|
# def cleanup(acc_stats: AccStats) ->
|
||||||
|
# # Remove the prefill phase
|
||||||
|
# data = data[1:]
|
||||||
|
# # Cap the maximum value to 10
|
||||||
|
# data = [min(x, 10) for x in data]
|
||||||
|
# return data
|
||||||
|
|
||||||
|
def load_data(datapath, tokenizer, verbose=False):
|
||||||
|
acceptance_stats = []
|
||||||
|
with open(datapath, "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
for line in lines:
|
||||||
|
data = json.loads(line)
|
||||||
|
stat = AccStats(
|
||||||
|
lens=data['acc']['acc_len'],
|
||||||
|
probs=data['acc'].get('acc_prob', None),
|
||||||
|
entropies=data['acc'].get('acc_entropy', None)
|
||||||
|
)
|
||||||
|
acceptance_stats.append(stat)
|
||||||
|
if verbose:
|
||||||
|
print("Input:", tokenizer.decode(data['prompt_token_ids']))
|
||||||
|
print("Output:", tokenizer.decode(data['generated_token_ids']))
|
||||||
|
print("=============================================")
|
||||||
|
|
||||||
|
max_length = max(stats.length for stats in acceptance_stats)
|
||||||
|
|
||||||
|
print(f"Load {len(acceptance_stats)} with max length {max_length}")
|
||||||
|
return acceptance_stats
|
||||||
@ -2,56 +2,107 @@ import json
|
|||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
from .common import MODEL_TO_NAMES, load_data
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
class AcceptanceStatsClient:
|
||||||
|
"""Client for fetching and processing acceptance statistics data."""
|
||||||
|
|
||||||
|
def __init__(self, model_name, method, dataset, data_path=None):
|
||||||
|
"""Initialize the client with model and dataset info."""
|
||||||
|
self.model_name = model_name
|
||||||
|
self.method = method
|
||||||
|
self.dataset = dataset
|
||||||
|
|
||||||
|
if data_path is None:
|
||||||
|
self.data_path = f"/data/lily/batch-sd/data/{model_name}/{method}_{dataset}_acceptance_stats.jsonl"
|
||||||
|
else:
|
||||||
|
self.data_path = data_path
|
||||||
|
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model_name], use_fast=False)
|
||||||
|
self.acceptance_stats = None
|
||||||
|
|
||||||
|
def load_data(self):
|
||||||
|
"""Load the acceptance statistics from file."""
|
||||||
|
self.acceptance_stats = load_data(self.data_path, self.tokenizer)
|
||||||
|
return self.acceptance_stats
|
||||||
|
|
||||||
|
def plot_heatmap(self, output_dir="figures"):
|
||||||
|
"""Plot the acceptance statistics as a heatmap."""
|
||||||
|
if self.acceptance_stats is None:
|
||||||
|
self.load_data()
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(12, 8))
|
||||||
|
sns.heatmap(self.acceptance_stats, cmap="YlGnBu")
|
||||||
|
plt.xlabel("Position")
|
||||||
|
plt.ylabel("Request ID")
|
||||||
|
|
||||||
|
# Add Y-axis labels on the right
|
||||||
|
ax2 = ax.twinx()
|
||||||
|
ax2.set_ylim(ax.get_ylim())
|
||||||
|
ax2.set_yticks([])
|
||||||
|
ax2.set_ylabel("# of Accepted Tokens", labelpad=10)
|
||||||
|
|
||||||
|
plt.title(f"Acceptance Statistics: {self.model_name} - {self.method} - {self.dataset}")
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Create output directory if it doesn't exist
|
||||||
|
output_path = Path(output_dir) / self.model_name
|
||||||
|
os.makedirs(output_path, exist_ok=True)
|
||||||
|
|
||||||
|
output_file = output_path / f"{self.method}_{self.dataset}_acceptance_stats.pdf"
|
||||||
|
plt.savefig(output_file)
|
||||||
|
print(f"Saved heatmap to {output_file}")
|
||||||
|
return fig
|
||||||
|
|
||||||
|
def get_summary_stats(self):
|
||||||
|
"""Get summary statistics about the acceptance data."""
|
||||||
|
if self.acceptance_stats is None:
|
||||||
|
self.load_data()
|
||||||
|
|
||||||
|
# Calculate average acceptance rate for each position
|
||||||
|
avg_by_position = [sum(col)/len(col) for col in zip(*self.acceptance_stats) if sum(1 for v in col if v >= 0) > 0]
|
||||||
|
|
||||||
|
# Calculate average acceptance rate for each request
|
||||||
|
avg_by_request = [sum(row)/len(row) for row in self.acceptance_stats]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_requests": len(self.acceptance_stats),
|
||||||
|
"max_position": len(avg_by_position),
|
||||||
|
"avg_acceptance_rate": sum(avg_by_request)/len(avg_by_request),
|
||||||
|
"avg_by_position": avg_by_position,
|
||||||
|
"avg_by_request": avg_by_request
|
||||||
|
}
|
||||||
|
|
||||||
model = "r1-distill-llama-8B"
|
# Example model configuration
|
||||||
MODEL_TO_NAMES = {
|
model = "llama3.1-8B"
|
||||||
"r1-distill-llama-8B" : "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
|
# model = "r1-distill-llama-8B"
|
||||||
}
|
method = "eagle3"
|
||||||
method = "ngram"
|
dataset = "mtbench"
|
||||||
dataset = "aime"
|
# dataset = "aime"
|
||||||
datapath = f"/data/lily/batch-sd/data/{model}/{method}_{dataset}_acceptance_stats.jsonl"
|
# method = "ngram"
|
||||||
|
# dataset = "cnndailymail"
|
||||||
|
# datapath = f"/data/lily/batch-sd/data/{model}/{method}_{dataset}_acceptance_stats.jsonl"
|
||||||
|
datapath = "acceptance_stats.jsonl"
|
||||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model], use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model], use_fast=False)
|
||||||
|
|
||||||
def cleanup(data):
|
|
||||||
# Remove the prefill phase
|
|
||||||
data = data[1:]
|
|
||||||
# Cap the maximum value to 10
|
|
||||||
data = [min(x, 10) for x in data]
|
|
||||||
return data
|
|
||||||
|
|
||||||
def load_data(datapath):
|
if __name__ == "__main__":
|
||||||
acceptance_stats = []
|
# Use the client instead of directly loading data
|
||||||
with open(datapath, "r") as f:
|
client = AcceptanceStatsClient(model, method, dataset, datapath)
|
||||||
lines = f.readlines()
|
acceptance_stats = client.load_data()
|
||||||
for line in lines:
|
|
||||||
data = json.loads(line)
|
# Get summary statistics
|
||||||
acceptance_stats.append(cleanup(data['acc']))
|
summary = client.get_summary_stats()
|
||||||
print("Input:", tokenizer.decode(data['prompt_token_ids']))
|
print("Summary Statistics:")
|
||||||
print("Output:", tokenizer.decode(data['generated_token_ids']))
|
print(f"Total Requests: {summary['total_requests']}")
|
||||||
print("=============================================")
|
print(f"Max Position: {summary['max_position']}")
|
||||||
|
print(f"Average Acceptance Rate: {summary['avg_acceptance_rate']:.2f}")
|
||||||
# Pad the acceptance stats to the same length
|
|
||||||
max_length = max(len(stats) for stats in acceptance_stats)
|
|
||||||
for i in range(len(acceptance_stats)):
|
|
||||||
acceptance_stats[i] += [-2] * (max_length - len(acceptance_stats[i]))
|
|
||||||
|
|
||||||
print(f"Load {len(acceptance_stats)} with max length {max_length}")
|
|
||||||
return acceptance_stats
|
|
||||||
|
|
||||||
acceptance_stats = load_data(datapath)
|
# Create heatmap visualization
|
||||||
|
plot_heatmap = False
|
||||||
|
if plot_heatmap:
|
||||||
|
client.plot_heatmap()
|
||||||
|
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
|
||||||
sns.heatmap(acceptance_stats, cmap="YlGnBu")
|
|
||||||
plt.xlabel("Position")
|
|
||||||
plt.ylabel("Request ID")
|
|
||||||
# Add Y-axis labels on the right
|
|
||||||
ax2 = ax.twinx()
|
|
||||||
ax2.set_ylim(ax.get_ylim()) # Match y-axis range
|
|
||||||
ax2.set_yticks([]) # Remove right tick marks if undesired
|
|
||||||
ax2.set_ylabel("# of Accepted Tokens", labelpad=10) # Set right y-axis label
|
|
||||||
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f"figures/{model}/{method}_{dataset}_acceptance_stats.png")
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ from matplotlib.colors import LinearSegmentedColormap
|
|||||||
|
|
||||||
model = "llama3.1-8B"
|
model = "llama3.1-8B"
|
||||||
dataset = "instructcode"
|
dataset = "instructcode"
|
||||||
method1 = "eagle"
|
method1 = "ngram"
|
||||||
method2 = "eagle3"
|
method2 = "eagle3"
|
||||||
|
|
||||||
def get_datapath(method):
|
def get_datapath(method):
|
||||||
@ -66,4 +66,4 @@ ax2.set_ylabel("# of Accepted Tokens", labelpad=10) # Set right y-axis l
|
|||||||
plt.title(f"Diff between {method2} - {method1} acceptance stats for {dataset}")
|
plt.title(f"Diff between {method2} - {method1} acceptance stats for {dataset}")
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(f"figures/{model}/diff_{method2}_{method1}_{dataset}_acceptance_stats.png")
|
plt.savefig(f"figures/{model}/diff_{method2}_{method1}_{dataset}_acceptance_stats.pdf")
|
||||||
|
|||||||
38
benchmarks/visualize/vis_prob_entropy.py
Normal file
38
benchmarks/visualize/vis_prob_entropy.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from transformers import AutoTokenizer
|
||||||
|
from common import MODEL_TO_NAMES, load_data
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
def plot_prob_entropy(acceptance_stats,
|
||||||
|
output_path):
|
||||||
|
|
||||||
|
acc_probs = []
|
||||||
|
rej_probs = []
|
||||||
|
for stat in acceptance_stats:
|
||||||
|
for i, acc_len in enumerate(stat.lens):
|
||||||
|
acc_probs.extend(stat.probs[i][:acc_len-1])
|
||||||
|
rej_probs.extend(stat.probs[i][acc_len-1:])
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(12, 8))
|
||||||
|
plt.hist(acc_probs, bins=100, alpha=0.5,
|
||||||
|
label='Accepted Probabilities', color='green')
|
||||||
|
plt.hist(rej_probs, bins=100, alpha=0.5,
|
||||||
|
label='Rejected Probabilities', color='red')
|
||||||
|
plt.xlabel('Probability')
|
||||||
|
plt.ylabel('Frequency')
|
||||||
|
plt.title('Distribution of Accepted and Rejected Probabilities')
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
datapath = "/data/lily/sd-benchmark-paper/batch-sd/acceptance_stats.jsonl"
|
||||||
|
model = "llama3.1-8B"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model],
|
||||||
|
use_fast=False)
|
||||||
|
acceptance_stats = load_data(datapath, tokenizer)
|
||||||
|
plot_prob_entropy(acceptance_stats, output_path="prob_entropy_figures")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user