mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-05 20:17:07 +08:00
Consolidate args
Signed-off-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
parent
61d568a19d
commit
02f7780716
@ -3,7 +3,7 @@
|
||||
set -ex
|
||||
|
||||
CUDA_VERSION="${1:-12.8.1}"
|
||||
FLASHINFER_VERSION="${FLASHINFER_VERSION:-v0.2.9rc2}"
|
||||
# FlashInfer version controlled in tools/flashinfer-build.sh
|
||||
|
||||
echo "Building FlashInfer wheel for CUDA ${CUDA_VERSION} using vLLM Dockerfile"
|
||||
|
||||
@ -12,7 +12,6 @@ DOCKER_BUILDKIT=1 docker build \
|
||||
--build-arg max_jobs=16 \
|
||||
--build-arg USE_SCCACHE=1 \
|
||||
--build-arg CUDA_VERSION="${CUDA_VERSION}" \
|
||||
--build-arg FLASHINFER_GIT_REF="${FLASHINFER_VERSION}" \
|
||||
--tag flashinfer-wheel-builder:${CUDA_VERSION} \
|
||||
--target flashinfer-wheel-builder \
|
||||
--progress plain \
|
||||
|
||||
@ -271,8 +271,6 @@ RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
|
||||
#################### FLASHINFER WHEEL BUILD IMAGE ####################
|
||||
FROM base AS flashinfer-wheel-builder
|
||||
ARG CUDA_VERSION
|
||||
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
||||
ARG FLASHINFER_GIT_REF="v0.2.9rc2"
|
||||
|
||||
COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
@ -402,10 +400,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
|
||||
# $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/v0.2.6.post1/flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl
|
||||
|
||||
# Install FlashInfer from source
|
||||
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
||||
# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
|
||||
# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel.
|
||||
ARG FLASHINFER_GIT_REF="v0.2.9rc2"
|
||||
# Version controlled in tools/flashinfer-build.sh - keep in sync with requirements/cuda.txt
|
||||
COPY tools/flashinfer-build.sh /tmp/flashinfer-build.sh
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
. /etc/environment && \
|
||||
|
||||
@ -5,8 +5,9 @@ set -ex
|
||||
# Build FlashInfer with AOT kernels
|
||||
# This script is used by both the Dockerfile and standalone wheel building
|
||||
|
||||
FLASHINFER_GIT_REPO="${FLASHINFER_GIT_REPO:-https://github.com/flashinfer-ai/flashinfer.git}"
|
||||
FLASHINFER_GIT_REF="${FLASHINFER_GIT_REF:-v0.2.9rc2}"
|
||||
# FlashInfer configuration - keep FLASHINFER_GIT_REF in sync with requirements/cuda.txt
|
||||
FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
||||
FLASHINFER_GIT_REF="${FLASHINFER_GIT_REF:-v0.2.9rc2}" # Must match requirements/cuda.txt
|
||||
CUDA_VERSION="${CUDA_VERSION:-12.8.1}"
|
||||
BUILD_WHEEL="${BUILD_WHEEL:-false}"
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user