mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 22:04:58 +08:00
[Doc] cleanup deprecated flag for doc (#18715)
Signed-off-by: calvin chen <120380290@qq.com>
This commit is contained in:
parent
bbd9a84dc5
commit
4693a3438c
@ -146,10 +146,9 @@ python3 vllm/benchmarks/benchmark_serving.py \
|
|||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
VLLM_USE_V1=1 vllm serve meta-llama/Meta-Llama-3-8B-Instruct \
|
VLLM_USE_V1=1 vllm serve meta-llama/Meta-Llama-3-8B-Instruct \
|
||||||
--speculative-model "[ngram]" \
|
|
||||||
--ngram_prompt_lookup_min 2 \
|
--ngram_prompt_lookup_min 2 \
|
||||||
--ngram-prompt-lookup-max 5 \
|
--ngram-prompt-lookup-max 5 \
|
||||||
--num_speculative_tokens 5
|
--speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5}
|
||||||
```
|
```
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
@ -274,10 +273,9 @@ python3 vllm/benchmarks/benchmark_throughput.py \
|
|||||||
--output-len=100 \
|
--output-len=100 \
|
||||||
--num-prompts=2048 \
|
--num-prompts=2048 \
|
||||||
--async-engine \
|
--async-engine \
|
||||||
--speculative-model="[ngram]" \
|
|
||||||
--ngram_prompt_lookup_min=2 \
|
--ngram_prompt_lookup_min=2 \
|
||||||
--ngram-prompt-lookup-max=5 \
|
--ngram-prompt-lookup-max=5 \
|
||||||
--num_speculative_tokens=5
|
--speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5}
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user