# python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3-8B-Instruct \ # --dataset-name sonnet \ # --dataset-path /data/lily/batch-sd/benchmarks/sonnet.txt \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3-8B-Instruct \ # --dataset-name sharegpt \ # --dataset-path /data/lily/ShareGPT_V3_unfiltered_cleaned_split.json \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3-8B-Instruct \ # --dataset-name hf \ # --dataset-path likaixin/InstructCoder \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3-8B-Instruct \ # --dataset-name sonnet \ # --dataset-path /data/lily/batch-sd/benchmarks/sonnet.txt \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3-8B-Instruct \ # --dataset-name sharegpt \ # --dataset-path /data/lily/ShareGPT_V3_unfiltered_cleaned_split.json \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3-8B-Instruct \ # --dataset-name hf \ # --dataset-path likaixin/InstructCoder \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3.1-8B-Instruct \ # --dataset-name hf \ # --dataset-path likaixin/InstructCoder \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3.1-8B-Instruct \ # --dataset-name sharegpt \ # --dataset-path /data/lily/ShareGPT_V3_unfiltered_cleaned_split.json \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3.1-8B-Instruct \ # --dataset-name sonnet \ # --dataset-path /data/lily/batch-sd/benchmarks/sonnet.txt \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle3", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3.1-8B-Instruct \ # --dataset-name hf \ # --dataset-path likaixin/InstructCoder \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3.1-8B-Instruct \ # --dataset-name sharegpt \ # --dataset-path /data/lily/ShareGPT_V3_unfiltered_cleaned_split.json \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "eagle", "model": "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", "num_speculative_tokens": 20}' # python benchmarks/benchmark_throughput.py \ # --model meta-llama/Meta-Llama-3.1-8B-Instruct \ # --dataset-name hf \ # --dataset-path likaixin/InstructCoder \ # --prefix-len 0 \ # --output-len 512 \ # --num-prompts 200 \ # --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}' # python benchmarks/benchmark_throughput.py \ # --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ # --dataset-name hf \ # --dataset-path AI-MO/aimo-validation-aime \ # --prefix-len 0 \ # --output-len 5120 \ # --num-prompts 90 \ # --speculative_config '{"method": "eagle3", "num_speculative_tokens": 20, "model": "yuhuili/EAGLE3-DeepSeek-R1-Distill-LLaMA-8B"}' python benchmarks/benchmark_throughput.py \ --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --dataset-name hf \ --dataset-path AI-MO/aimo-validation-aime \ --prefix-len 0 \ --output-len 5120 \ --num-prompts 90 \ --speculative_config '{"method": "ngram", "num_speculative_tokens": 20, "prompt_lookup_min": 2, "prompt_lookup_max": 5}'