From 70e132244a61425a6b88c0b8345e496dc5bdfecd Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 28 Mar 2025 09:30:08 -0700 Subject: [PATCH] [Minor] Remove TGI launching script (#15646) Signed-off-by: Woosuk Kwon --- benchmarks/benchmark_serving.py | 3 --- .../benchmark_serving_structured_output.py | 3 --- benchmarks/launch_tgi_server.sh | 16 ---------------- 3 files changed, 22 deletions(-) delete mode 100755 benchmarks/launch_tgi_server.sh diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 82c6b426b9a2b..e2f712dfc6f49 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -7,9 +7,6 @@ On the server side, run one of the following commands: --swap-space 16 \ --disable-log-requests - (TGI backend) - ./launch_tgi_server.sh - On the client side, run: python benchmarks/benchmark_serving.py \ --backend \ diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index c79a93faff197..71cb420a52c46 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -5,9 +5,6 @@ On the server side, run one of the following commands: (vLLM OpenAI API server) vllm serve --disable-log-requests - (TGI backend) - ./launch_tgi_server.sh - On the client side, run: python benchmarks/benchmark_serving_structured_output.py \ --backend \ diff --git a/benchmarks/launch_tgi_server.sh b/benchmarks/launch_tgi_server.sh deleted file mode 100755 index ba7383d88dc49..0000000000000 --- a/benchmarks/launch_tgi_server.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -PORT=8000 -MODEL=$1 -TOKENS=$2 - -docker run -e "HF_TOKEN=$HF_TOKEN" --gpus all --shm-size 1g -p $PORT:80 \ - -v "$PWD/data:/data" \ - ghcr.io/huggingface/text-generation-inference:2.2.0 \ - --model-id "$MODEL" \ - --sharded false \ - --max-input-length 1024 \ - --max-total-tokens 2048 \ - --max-best-of 5 \ - --max-concurrent-requests 5000 \ - --max-batch-total-tokens "$TOKENS"