mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 05:07:03 +08:00
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
from transformers import AutoTokenizer
|
|
from common import MODEL_TO_NAMES, load_data
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
def plot_prob_entropy(acceptance_stats,
|
|
output_path):
|
|
|
|
acc_probs = []
|
|
rej_probs = []
|
|
for stat in acceptance_stats:
|
|
for i, acc_len in enumerate(stat.lens):
|
|
acc_probs.extend(stat.probs[i][:acc_len-1])
|
|
rej_probs.extend(stat.probs[i][acc_len-1:])
|
|
|
|
fig, ax = plt.subplots(figsize=(12, 8))
|
|
plt.hist(acc_probs, bins=100, alpha=0.5,
|
|
label='Accepted Probabilities', color='green')
|
|
plt.hist(rej_probs, bins=100, alpha=0.5,
|
|
label='Rejected Probabilities', color='red')
|
|
plt.xlabel('Probability')
|
|
plt.ylabel('Frequency')
|
|
plt.title('Distribution of Accepted and Rejected Probabilities')
|
|
plt.legend()
|
|
plt.tight_layout()
|
|
plt.savefig(output_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
datapath = "/data/lily/sd-benchmark-paper/batch-sd/acceptance_stats.jsonl"
|
|
model = "llama3.1-8B"
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_TO_NAMES[model],
|
|
use_fast=False)
|
|
acceptance_stats = load_data(datapath, tokenizer)
|
|
plot_prob_entropy(acceptance_stats, output_path="prob_entropy_figures")
|
|
|
|
|
|
|