mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-25 17:24:25 +08:00
Simplify!
Signed-off-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
parent
67ba6a9487
commit
b15005dc12
@ -41,37 +41,17 @@ steps:
|
|||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
- block: "Build FlashInfer wheels"
|
- block: "Build FlashInfer wheel"
|
||||||
key: block-build-flashinfer-wheels
|
key: block-build-flashinfer-wheel
|
||||||
depends_on: ~
|
depends_on: ~
|
||||||
|
|
||||||
- label: "Build FlashInfer wheels - CUDA 12.8"
|
- label: "Build and upload FlashInfer wheel - CUDA 12.8"
|
||||||
depends_on: block-build-flashinfer-wheels
|
depends_on: block-build-flashinfer-wheel
|
||||||
id: build-flashinfer-wheel-cuda-12-8
|
id: build-upload-flashinfer-wheel
|
||||||
agents:
|
agents:
|
||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "bash .buildkite/scripts/build-flashinfer-wheel.sh 12.8.1"
|
- "bash .buildkite/scripts/build-upload-flashinfer-wheel.sh 12.8.1"
|
||||||
env:
|
|
||||||
DOCKER_BUILDKIT: "1"
|
|
||||||
|
|
||||||
- label: "Build FlashInfer wheels - CUDA 12.6"
|
|
||||||
depends_on: block-build-flashinfer-wheels
|
|
||||||
id: build-flashinfer-wheel-cuda-12-6
|
|
||||||
agents:
|
|
||||||
queue: cpu_queue_postmerge
|
|
||||||
commands:
|
|
||||||
- "bash .buildkite/scripts/build-flashinfer-wheel.sh 12.6.3"
|
|
||||||
env:
|
|
||||||
DOCKER_BUILDKIT: "1"
|
|
||||||
|
|
||||||
- label: "Build FlashInfer wheels - CUDA 11.8"
|
|
||||||
depends_on: block-build-flashinfer-wheels
|
|
||||||
id: build-flashinfer-wheel-cuda-11-8
|
|
||||||
agents:
|
|
||||||
queue: cpu_queue_postmerge
|
|
||||||
commands:
|
|
||||||
- "bash .buildkite/scripts/build-flashinfer-wheel.sh 11.8.0"
|
|
||||||
env:
|
env:
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
|
|||||||
@ -1,30 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
CUDA_VERSION="${1:-12.8.1}"
|
|
||||||
FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}"
|
|
||||||
|
|
||||||
echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile"
|
|
||||||
|
|
||||||
# Build the FlashInfer wheel using the existing Dockerfile stage
|
|
||||||
DOCKER_BUILDKIT=1 docker build \
|
|
||||||
--build-arg max_jobs=16 \
|
|
||||||
--build-arg USE_SCCACHE=1 \
|
|
||||||
--build-arg CUDA_VERSION="${CUDA_VERSION}" \
|
|
||||||
--build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \
|
|
||||||
--tag flashinfer-wheel-builder:${CUDA_VERSION} \
|
|
||||||
--target flashinfer-wheel-builder \
|
|
||||||
--progress plain \
|
|
||||||
-f docker/Dockerfile .
|
|
||||||
|
|
||||||
# Extract the wheel
|
|
||||||
mkdir -p artifacts/dist
|
|
||||||
docker run --rm -v $(pwd)/artifacts:/output_host flashinfer-wheel-builder:${CUDA_VERSION} \
|
|
||||||
bash -c 'cp /output/*.whl /output_host/dist/ && chmod -R a+rw /output_host'
|
|
||||||
|
|
||||||
# Upload the wheel
|
|
||||||
bash .buildkite/scripts/upload-flashinfer-wheels.sh
|
|
||||||
|
|
||||||
echo "FlashInfer wheel built and uploaded successfully for CUDA ${CUDA_VERSION}"
|
|
||||||
ls -la artifacts/dist/
|
|
||||||
58
.buildkite/scripts/build-upload-flashinfer-wheel.sh
Executable file
58
.buildkite/scripts/build-upload-flashinfer-wheel.sh
Executable file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
CUDA_VERSION="${1:-12.8.1}"
|
||||||
|
FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}"
|
||||||
|
|
||||||
|
echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile"
|
||||||
|
|
||||||
|
# Build the FlashInfer wheel using the existing Dockerfile stage
|
||||||
|
DOCKER_BUILDKIT=1 docker build \
|
||||||
|
--build-arg max_jobs=16 \
|
||||||
|
--build-arg USE_SCCACHE=1 \
|
||||||
|
--build-arg CUDA_VERSION="${CUDA_VERSION}" \
|
||||||
|
--build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \
|
||||||
|
--tag flashinfer-wheel-builder:${CUDA_VERSION} \
|
||||||
|
--target flashinfer-wheel-builder \
|
||||||
|
--progress plain \
|
||||||
|
-f docker/Dockerfile .
|
||||||
|
|
||||||
|
# Extract the wheel
|
||||||
|
mkdir -p artifacts/dist
|
||||||
|
docker run --rm -v $(pwd)/artifacts:/output_host flashinfer-wheel-builder:${CUDA_VERSION} \
|
||||||
|
bash -c 'cp /output/*.whl /output_host/dist/ && chmod -R a+rw /output_host'
|
||||||
|
|
||||||
|
# Upload the wheel to S3
|
||||||
|
echo "Uploading FlashInfer wheel to S3..."
|
||||||
|
wheel_files=(artifacts/dist/*.whl)
|
||||||
|
|
||||||
|
# Check that exactly one wheel is found
|
||||||
|
if [[ ${#wheel_files[@]} -ne 1 ]]; then
|
||||||
|
echo "Error: Expected exactly one wheel file in artifacts/dist/, but found ${#wheel_files[@]}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get the single wheel file
|
||||||
|
wheel="${wheel_files[0]}"
|
||||||
|
echo "Processing FlashInfer wheel: $wheel"
|
||||||
|
|
||||||
|
# Rename 'linux' to 'manylinux1' in the wheel filename for compatibility
|
||||||
|
new_wheel="${wheel/linux/manylinux1}"
|
||||||
|
if [[ "$wheel" != "$new_wheel" ]]; then
|
||||||
|
mv -- "$wheel" "$new_wheel"
|
||||||
|
wheel="$new_wheel"
|
||||||
|
echo "Renamed wheel to: $wheel"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract the version from the wheel
|
||||||
|
version=$(unzip -p "$wheel" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2)
|
||||||
|
wheel_name=$(basename "$wheel")
|
||||||
|
echo "FlashInfer version: $version"
|
||||||
|
|
||||||
|
# Upload the wheel to S3 under flashinfer directory
|
||||||
|
aws s3 cp "$wheel" "s3://vllm-wheels/flashinfer/"
|
||||||
|
|
||||||
|
echo "✅ FlashInfer wheel built and uploaded successfully for CUDA ${CUDA_VERSION}"
|
||||||
|
echo "📦 Wheel: $wheel_name (version $version)"
|
||||||
|
ls -la artifacts/dist/
|
||||||
@ -1,65 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
# Assume wheels are in artifacts/dist/*.whl
|
|
||||||
wheel_files=(artifacts/dist/*.whl)
|
|
||||||
|
|
||||||
# Check that exactly one wheel is found
|
|
||||||
if [[ ${#wheel_files[@]} -ne 1 ]]; then
|
|
||||||
echo "Error: Expected exactly one wheel file in artifacts/dist/, but found ${#wheel_files[@]}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get the single wheel file
|
|
||||||
wheel="${wheel_files[0]}"
|
|
||||||
|
|
||||||
echo "Processing FlashInfer wheel: $wheel"
|
|
||||||
|
|
||||||
# Rename 'linux' to 'manylinux1' in the wheel filename for compatibility
|
|
||||||
new_wheel="${wheel/linux/manylinux1}"
|
|
||||||
if [[ "$wheel" != "$new_wheel" ]]; then
|
|
||||||
mv -- "$wheel" "$new_wheel"
|
|
||||||
wheel="$new_wheel"
|
|
||||||
echo "Renamed wheel to: $wheel"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Extract the version from the wheel
|
|
||||||
version=$(unzip -p "$wheel" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2)
|
|
||||||
wheel_name=$(basename "$wheel")
|
|
||||||
echo "FlashInfer version: $version"
|
|
||||||
|
|
||||||
# Upload the wheel to S3 under flashinfer directory
|
|
||||||
aws s3 cp "$wheel" "s3://vllm-wheels/flashinfer/"
|
|
||||||
|
|
||||||
# Download existing index if it exists, then rebuild it with all wheels
|
|
||||||
echo "Rebuilding index with all available wheels..."
|
|
||||||
aws s3 ls s3://vllm-wheels/flashinfer/ --recursive | grep '\.whl$' | awk '{print $4}' | sed 's|flashinfer/||' > wheel_list.txt
|
|
||||||
|
|
||||||
# Generate complete index.html for the package (following pip index pattern)
|
|
||||||
cat > flashinfer_index.html << 'EOF'
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head><title>Links for flashinfer-python</title></head>
|
|
||||||
<body>
|
|
||||||
<h1>Links for flashinfer-python</h1>
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# Add each wheel to the index
|
|
||||||
while IFS= read -r wheel_file; do
|
|
||||||
if [[ -n "$wheel_file" ]]; then
|
|
||||||
echo "<a href=\"$wheel_file\">$wheel_file</a><br/>" >> flashinfer_index.html
|
|
||||||
fi
|
|
||||||
done < wheel_list.txt
|
|
||||||
|
|
||||||
cat >> flashinfer_index.html << 'EOF'
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
EOF
|
|
||||||
|
|
||||||
aws s3 cp flashinfer_index.html "s3://vllm-wheels/flashinfer/index.html"
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
rm -f flashinfer_index.html wheel_list.txt
|
|
||||||
|
|
||||||
echo "Successfully uploaded FlashInfer wheel $wheel_name (version $version)"
|
|
||||||
@ -274,10 +274,10 @@ ARG CUDA_VERSION
|
|||||||
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
||||||
ARG FLASHINFER_GIT_REF="v0.2.9rc2"
|
ARG FLASHINFER_GIT_REF="v0.2.9rc2"
|
||||||
|
|
||||||
COPY tools/build-flashinfer.sh /tmp/build-flashinfer.sh
|
COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
. /etc/environment && \
|
. /etc/environment && \
|
||||||
BUILD_WHEEL=true /tmp/build-flashinfer.sh
|
BUILD_WHEEL=true /tmp/flashinfer-build.sh
|
||||||
|
|
||||||
#################### EXTENSION Build IMAGE ####################
|
#################### EXTENSION Build IMAGE ####################
|
||||||
|
|
||||||
@ -406,10 +406,10 @@ ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
|||||||
# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
|
# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
|
||||||
# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel.
|
# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel.
|
||||||
ARG FLASHINFER_GIT_REF="v0.2.9rc2"
|
ARG FLASHINFER_GIT_REF="v0.2.9rc2"
|
||||||
COPY tools/build-flashinfer.sh /tmp/build-flashinfer.sh
|
COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
. /etc/environment && \
|
. /etc/environment && \
|
||||||
/tmp/build-flashinfer.sh
|
/tmp/flashinfer-build.sh
|
||||||
COPY examples examples
|
COPY examples examples
|
||||||
COPY benchmarks benchmarks
|
COPY benchmarks benchmarks
|
||||||
COPY ./vllm/collect_env.py .
|
COPY ./vllm/collect_env.py .
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user