diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index 6df1e1d628e6c..79d7e9adc4515 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -335,7 +335,7 @@ if __name__ == "__main__": "--device", type=str, default="cuda", - choices=["cuda", "cpu"], + choices=["cuda", "cpu", "tpu"], help='device type for vLLM execution, supporting CUDA and CPU.') parser.add_argument( "--enable-prefix-caching",