From 6acb7a62855a037a9cb50344e692ca23ed8782ea Mon Sep 17 00:00:00 2001 From: Rabi Mishra Date: Fri, 30 May 2025 13:28:04 +0530 Subject: [PATCH] [Misc]Fix benchmarks/README.md for speculative decoding (#18897) Signed-off-by: rabi --- benchmarks/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index ecab570bb31c..cbf2f281bdde 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -146,9 +146,9 @@ python3 vllm/benchmarks/benchmark_serving.py \ ``` bash VLLM_USE_V1=1 vllm serve meta-llama/Meta-Llama-3-8B-Instruct \ - --ngram_prompt_lookup_min 2 \ - --ngram-prompt-lookup-max 5 \ - --speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5} + --speculative-config $'{"method": "ngram", + "num_speculative_tokens": 5, "prompt_lookup_max": 5, + "prompt_lookup_min": 2}' ``` ``` bash @@ -273,9 +273,9 @@ python3 vllm/benchmarks/benchmark_throughput.py \ --output-len=100 \ --num-prompts=2048 \ --async-engine \ - --ngram_prompt_lookup_min=2 \ - --ngram-prompt-lookup-max=5 \ - --speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5} + --speculative-config $'{"method": "ngram", + "num_speculative_tokens": 5, "prompt_lookup_max": 5, + "prompt_lookup_min": 2}' ``` ```