diff --git a/examples/offline_inference/spec_decode.py b/examples/offline_inference/spec_decode.py index 90d103e5cb05d..3f38aa9fcaa60 100644 --- a/examples/offline_inference/spec_decode.py +++ b/examples/offline_inference/spec_decode.py @@ -79,9 +79,7 @@ def main(): trust_remote_code=True, tensor_parallel_size=args.tp, enable_chunked_prefill=args.enable_chunked_prefill, - max_num_batched_tokens=args.max_num_batched_tokens, enforce_eager=args.enforce_eager, - max_num_seqs=args.max_num_seqs, gpu_memory_utilization=0.8, speculative_config=speculative_config, disable_log_stats=False,