mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-02 20:04:37 +08:00
[Misc] Fix spec decode example (#20296)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
e28533a16f
commit
7151f92241
@ -79,9 +79,7 @@ def main():
|
|||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
tensor_parallel_size=args.tp,
|
tensor_parallel_size=args.tp,
|
||||||
enable_chunked_prefill=args.enable_chunked_prefill,
|
enable_chunked_prefill=args.enable_chunked_prefill,
|
||||||
max_num_batched_tokens=args.max_num_batched_tokens,
|
|
||||||
enforce_eager=args.enforce_eager,
|
enforce_eager=args.enforce_eager,
|
||||||
max_num_seqs=args.max_num_seqs,
|
|
||||||
gpu_memory_utilization=0.8,
|
gpu_memory_utilization=0.8,
|
||||||
speculative_config=speculative_config,
|
speculative_config=speculative_config,
|
||||||
disable_log_stats=False,
|
disable_log_stats=False,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user