Attempt to fix GPU OOM in a spec-decoding test (#29419)

Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com>
This commit is contained in:
Eldar Kurtić 2025-11-25 20:23:36 +01:00 committed by GitHub
parent b07555d26f
commit c32a18cbe7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -133,7 +133,7 @@ def main(args):
tensor_parallel_size=args.tp,
enable_chunked_prefill=args.enable_chunked_prefill,
enforce_eager=args.enforce_eager,
gpu_memory_utilization=0.8,
gpu_memory_utilization=0.9,
speculative_config=speculative_config,
disable_log_stats=False,
max_model_len=args.max_model_len,