diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index c394f3fd7a0c..c624c893d671 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -1,5 +1,24 @@ steps: - - block: "Build wheels" + - label: "Build wheel default - Python {{matrix.python_version}}, CUDA {{matrix.cuda_version}}" + agents: + queue: cpu_queue + commands: + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg CUDA_VERSION={{matrix.cuda_version}} --build-arg PYTHON_VERSION={{matrix.python_version}} --tag vllm-ci:build-image -e CMAKE_BUILD_TYPE=Release --target build --progress plain ." + - "mkdir artifacts" + - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image cp -r dist /artifacts_host" + # rename the files to change linux -> manylinux1 + - "for f in artifacts/dist/*.whl; do mv -- \"$f\" \"${f/linux/manylinux1}\"; done" + - "aws s3 cp --recursive --acl public-read artifacts/dist s3://vllm-wheels/$BUILDKITE_COMMIT/" + - "aws s3 cp --recursive --acl public-read artifacts/dist s3://vllm-wheels/nightly/" + matrix: + setup: + cuda_version: + - "12.1.0" + python_version: + - "3.10" + - "3.11" + + - block: "Build wheels full" - label: "Build wheel - Python {{matrix.python_version}}, CUDA {{matrix.cuda_version}}" agents: @@ -8,7 +27,9 @@ steps: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg CUDA_VERSION={{matrix.cuda_version}} --build-arg PYTHON_VERSION={{matrix.python_version}} --tag vllm-ci:build-image --target build --progress plain ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image cp -r dist /artifacts_host" - - "aws s3 cp --recursive artifacts/dist s3://vllm-wheels/$BUILDKITE_COMMIT/" + - "for f in artifacts/dist/*.whl; do mv -- \"$f\" \"${f/linux/manylinux1}\"; done + - "aws s3 cp --recursive --acl public-read artifacts/dist s3://vllm-wheels/$BUILDKITE_COMMIT/" + - "aws s3 cp --recursive --acl public-read artifacts/dist s3://vllm-wheels/nightly/" matrix: setup: cuda_version: @@ -19,3 +40,12 @@ steps: - "3.9" - "3.10" - "3.11" + adjustments: + - with: + cuda_version: "12.1.0" + python_version: "3.10" + skip: true + - with: + cuda_version: "12.1.0" + python_version: "3.11" + skip: true diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst index d458b0235ecb..a9dfac8ff5af 100644 --- a/docs/source/getting_started/installation.rst +++ b/docs/source/getting_started/installation.rst @@ -42,6 +42,20 @@ You can install vLLM using pip: Therefore, it is recommended to install vLLM with a **fresh new** conda environment. If either you have a different CUDA version or you want to use an existing PyTorch installation, you need to build vLLM from source. See below for instructions. +.. note:: + + vLLM also publishes a subset of wheels (Python 3.10, 3.11 with CUDA 12) for every commit since v0.5.3. You can download them with the following command: + + .. code-block:: console + + $ export VLLM_VERSION=0.5.2 # vLLM's main branch version is currently set to latest released tag + $ export PYTHON_VERSION=310 + $ pip install https://vllm-wheels.s3.us-west-2.amazonaws.com/nightly/vllm-${VLLM_VERSION}-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux1_x86_64.whl + $ # You can also access a specific commit + $ # export VLLM_COMMIT=... + $ # pip install https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_COMMIT}/vllm-${VLLM_VERSION}-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux1_x86_64.whl + + .. _build_from_source: Build from source