mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 05:44:59 +08:00
Attempt to fix GPU OOM in a spec-decoding test (#29419)
Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com>
This commit is contained in:
parent
b07555d26f
commit
c32a18cbe7
@ -133,7 +133,7 @@ def main(args):
|
|||||||
tensor_parallel_size=args.tp,
|
tensor_parallel_size=args.tp,
|
||||||
enable_chunked_prefill=args.enable_chunked_prefill,
|
enable_chunked_prefill=args.enable_chunked_prefill,
|
||||||
enforce_eager=args.enforce_eager,
|
enforce_eager=args.enforce_eager,
|
||||||
gpu_memory_utilization=0.8,
|
gpu_memory_utilization=0.9,
|
||||||
speculative_config=speculative_config,
|
speculative_config=speculative_config,
|
||||||
disable_log_stats=False,
|
disable_log_stats=False,
|
||||||
max_model_len=args.max_model_len,
|
max_model_len=args.max_model_len,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user