mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:05:01 +08:00
Revert precompile wheel changes (#22055)
This commit is contained in:
parent
98df153abf
commit
da31f6ad3d
@ -206,7 +206,16 @@ ARG SCCACHE_REGION_NAME=us-west-2
|
|||||||
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
||||||
|
|
||||||
# Flag to control whether to use pre-built vLLM wheels
|
# Flag to control whether to use pre-built vLLM wheels
|
||||||
ARG VLLM_USE_PRECOMPILED=""
|
ARG VLLM_USE_PRECOMPILED
|
||||||
|
# TODO: in setup.py VLLM_USE_PRECOMPILED is sensitive to truthiness, it will take =0 as "true", this should be fixed
|
||||||
|
ENV VLLM_USE_PRECOMPILED=""
|
||||||
|
RUN if [ "${VLLM_USE_PRECOMPILED}" = "1" ]; then \
|
||||||
|
export VLLM_USE_PRECOMPILED=1 && \
|
||||||
|
echo "Using precompiled wheels"; \
|
||||||
|
else \
|
||||||
|
unset VLLM_USE_PRECOMPILED && \
|
||||||
|
echo "Leaving VLLM_USE_PRECOMPILED unset to build wheels from source"; \
|
||||||
|
fi
|
||||||
|
|
||||||
# if USE_SCCACHE is set, use sccache to speed up compilation
|
# if USE_SCCACHE is set, use sccache to speed up compilation
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
@ -223,8 +232,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
||||||
&& export SCCACHE_IDLE_TIMEOUT=0 \
|
&& export SCCACHE_IDLE_TIMEOUT=0 \
|
||||||
&& export CMAKE_BUILD_TYPE=Release \
|
&& export CMAKE_BUILD_TYPE=Release \
|
||||||
&& export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
|
|
||||||
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
|
|
||||||
&& sccache --show-stats \
|
&& sccache --show-stats \
|
||||||
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
|
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
|
||||||
&& sccache --show-stats; \
|
&& sccache --show-stats; \
|
||||||
@ -238,22 +245,9 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
|
|||||||
# Clean any existing CMake artifacts
|
# Clean any existing CMake artifacts
|
||||||
rm -rf .deps && \
|
rm -rf .deps && \
|
||||||
mkdir -p .deps && \
|
mkdir -p .deps && \
|
||||||
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
|
|
||||||
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
|
|
||||||
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
|
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# When using precompiled wheels, keep only the newest manylinux1 wheel and delete others
|
|
||||||
RUN if [ "$VLLM_USE_PRECOMPILED" = "1" ]; then \
|
|
||||||
echo "Cleaning up extra wheels in dist/..." && \
|
|
||||||
# Identify the most recent manylinux1_x86_64 wheel
|
|
||||||
KEEP_WHEEL=$(ls -t dist/*manylinux1_x86_64.whl 2>/dev/null | head -n1) && \
|
|
||||||
if [ -n "$KEEP_WHEEL" ]; then \
|
|
||||||
echo "Keeping wheel: $KEEP_WHEEL"; \
|
|
||||||
find dist/ -type f -name "*.whl" ! -path "${KEEP_WHEEL}" -delete; \
|
|
||||||
fi; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check the size of the wheel if RUN_WHEEL_CHECK is true
|
# Check the size of the wheel if RUN_WHEEL_CHECK is true
|
||||||
COPY .buildkite/check-wheel-size.py check-wheel-size.py
|
COPY .buildkite/check-wheel-size.py check-wheel-size.py
|
||||||
# sync the default value with .buildkite/check-wheel-size.py
|
# sync the default value with .buildkite/check-wheel-size.py
|
||||||
@ -369,7 +363,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Install vllm wheel first, so that torch etc will be installed.
|
# Install vllm wheel first, so that torch etc will be installed.
|
||||||
# !bang
|
|
||||||
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
|
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
|
||||||
--mount=type=cache,target=/root/.cache/uv \
|
--mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system dist/*.whl --verbose \
|
uv pip install --system dist/*.whl --verbose \
|
||||||
|
|||||||
@ -22,7 +22,9 @@ aiohttp==3.10.11
|
|||||||
aiohttp-cors==0.8.1
|
aiohttp-cors==0.8.1
|
||||||
# via ray
|
# via ray
|
||||||
aiosignal==1.3.1
|
aiosignal==1.3.1
|
||||||
# via aiohttp
|
# via
|
||||||
|
# aiohttp
|
||||||
|
# ray
|
||||||
albucore==0.0.16
|
albucore==0.0.16
|
||||||
# via terratorch
|
# via terratorch
|
||||||
albumentations==1.4.6
|
albumentations==1.4.6
|
||||||
@ -137,7 +139,7 @@ contourpy==1.3.0
|
|||||||
# via matplotlib
|
# via matplotlib
|
||||||
cramjam==2.9.0
|
cramjam==2.9.0
|
||||||
# via fastparquet
|
# via fastparquet
|
||||||
cupy-cuda12x==13.5.1
|
cupy-cuda12x==13.3.0
|
||||||
# via ray
|
# via ray
|
||||||
cycler==0.12.1
|
cycler==0.12.1
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
@ -224,6 +226,7 @@ frozenlist==1.5.0
|
|||||||
# via
|
# via
|
||||||
# aiohttp
|
# aiohttp
|
||||||
# aiosignal
|
# aiosignal
|
||||||
|
# ray
|
||||||
fsspec==2024.9.0
|
fsspec==2024.9.0
|
||||||
# via
|
# via
|
||||||
# datasets
|
# datasets
|
||||||
@ -600,18 +603,10 @@ opencv-python-headless==4.11.0.86
|
|||||||
opentelemetry-api==1.35.0
|
opentelemetry-api==1.35.0
|
||||||
# via
|
# via
|
||||||
# mlflow-skinny
|
# mlflow-skinny
|
||||||
# opentelemetry-exporter-prometheus
|
|
||||||
# opentelemetry-sdk
|
# opentelemetry-sdk
|
||||||
# opentelemetry-semantic-conventions
|
# opentelemetry-semantic-conventions
|
||||||
opentelemetry-exporter-prometheus==0.56b0
|
|
||||||
# via ray
|
|
||||||
opentelemetry-proto==1.36.0
|
|
||||||
# via ray
|
|
||||||
opentelemetry-sdk==1.35.0
|
opentelemetry-sdk==1.35.0
|
||||||
# via
|
# via mlflow-skinny
|
||||||
# mlflow-skinny
|
|
||||||
# opentelemetry-exporter-prometheus
|
|
||||||
# ray
|
|
||||||
opentelemetry-semantic-conventions==0.56b0
|
opentelemetry-semantic-conventions==0.56b0
|
||||||
# via opentelemetry-sdk
|
# via opentelemetry-sdk
|
||||||
packaging==24.2
|
packaging==24.2
|
||||||
@ -702,9 +697,7 @@ pqdm==0.2.0
|
|||||||
pretrainedmodels==0.7.4
|
pretrainedmodels==0.7.4
|
||||||
# via segmentation-models-pytorch
|
# via segmentation-models-pytorch
|
||||||
prometheus-client==0.22.0
|
prometheus-client==0.22.0
|
||||||
# via
|
# via ray
|
||||||
# opentelemetry-exporter-prometheus
|
|
||||||
# ray
|
|
||||||
propcache==0.2.0
|
propcache==0.2.0
|
||||||
# via yarl
|
# via yarl
|
||||||
proto-plus==1.26.1
|
proto-plus==1.26.1
|
||||||
@ -714,7 +707,6 @@ protobuf==5.28.3
|
|||||||
# google-api-core
|
# google-api-core
|
||||||
# googleapis-common-protos
|
# googleapis-common-protos
|
||||||
# mlflow-skinny
|
# mlflow-skinny
|
||||||
# opentelemetry-proto
|
|
||||||
# proto-plus
|
# proto-plus
|
||||||
# ray
|
# ray
|
||||||
# tensorboardx
|
# tensorboardx
|
||||||
@ -862,7 +854,7 @@ rasterio==1.4.3
|
|||||||
# rioxarray
|
# rioxarray
|
||||||
# terratorch
|
# terratorch
|
||||||
# torchgeo
|
# torchgeo
|
||||||
ray==2.48.0
|
ray==2.43.0
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
redis==5.2.0
|
redis==5.2.0
|
||||||
# via tensorizer
|
# via tensorizer
|
||||||
|
|||||||
182
setup.py
182
setup.py
@ -7,7 +7,6 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -282,69 +281,10 @@ class cmake_build_ext(build_ext):
|
|||||||
self.copy_file(file, dst_file)
|
self.copy_file(file, dst_file)
|
||||||
|
|
||||||
|
|
||||||
class precompiled_wheel_utils:
|
class repackage_wheel(build_ext):
|
||||||
"""Extracts libraries and other files from an existing wheel."""
|
"""Extracts libraries and other files from an existing wheel."""
|
||||||
|
|
||||||
@staticmethod
|
def get_base_commit_in_main_branch(self) -> str:
|
||||||
def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
|
|
||||||
import tempfile
|
|
||||||
import zipfile
|
|
||||||
|
|
||||||
temp_dir = None
|
|
||||||
try:
|
|
||||||
if not os.path.isfile(wheel_url_or_path):
|
|
||||||
wheel_filename = wheel_url_or_path.split("/")[-1]
|
|
||||||
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
|
|
||||||
wheel_path = os.path.join(temp_dir, wheel_filename)
|
|
||||||
print(f"Downloading wheel from {wheel_url_or_path} "
|
|
||||||
f"to {wheel_path}")
|
|
||||||
from urllib.request import urlretrieve
|
|
||||||
urlretrieve(wheel_url_or_path, filename=wheel_path)
|
|
||||||
else:
|
|
||||||
wheel_path = wheel_url_or_path
|
|
||||||
print(f"Using existing wheel at {wheel_path}")
|
|
||||||
|
|
||||||
package_data_patch = {}
|
|
||||||
|
|
||||||
with zipfile.ZipFile(wheel_path) as wheel:
|
|
||||||
files_to_copy = [
|
|
||||||
"vllm/_C.abi3.so",
|
|
||||||
"vllm/_moe_C.abi3.so",
|
|
||||||
"vllm/_flashmla_C.abi3.so",
|
|
||||||
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
|
|
||||||
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
|
|
||||||
"vllm/cumem_allocator.abi3.so",
|
|
||||||
]
|
|
||||||
|
|
||||||
compiled_regex = re.compile(
|
|
||||||
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
|
|
||||||
file_members = list(
|
|
||||||
filter(lambda x: x.filename in files_to_copy,
|
|
||||||
wheel.filelist))
|
|
||||||
file_members += list(
|
|
||||||
filter(lambda x: compiled_regex.match(x.filename),
|
|
||||||
wheel.filelist))
|
|
||||||
|
|
||||||
for file in file_members:
|
|
||||||
print(f"[extract] {file.filename}")
|
|
||||||
target_path = os.path.join(".", file.filename)
|
|
||||||
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
|
||||||
with wheel.open(file.filename) as src, open(
|
|
||||||
target_path, "wb") as dst:
|
|
||||||
shutil.copyfileobj(src, dst)
|
|
||||||
|
|
||||||
pkg = os.path.dirname(file.filename).replace("/", ".")
|
|
||||||
package_data_patch.setdefault(pkg, []).append(
|
|
||||||
os.path.basename(file.filename))
|
|
||||||
|
|
||||||
return package_data_patch
|
|
||||||
finally:
|
|
||||||
if temp_dir is not None:
|
|
||||||
print(f"Removing temporary directory {temp_dir}")
|
|
||||||
shutil.rmtree(temp_dir)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_base_commit_in_main_branch() -> str:
|
|
||||||
# Force to use the nightly wheel. This is mainly used for CI testing.
|
# Force to use the nightly wheel. This is mainly used for CI testing.
|
||||||
if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
|
if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
|
||||||
return "nightly"
|
return "nightly"
|
||||||
@ -357,10 +297,6 @@ class precompiled_wheel_utils:
|
|||||||
]).decode("utf-8")
|
]).decode("utf-8")
|
||||||
upstream_main_commit = json.loads(resp_json)["sha"]
|
upstream_main_commit = json.loads(resp_json)["sha"]
|
||||||
|
|
||||||
# In Docker build context, .git may be immutable or missing.
|
|
||||||
if envs.VLLM_DOCKER_BUILD_CONTEXT:
|
|
||||||
return upstream_main_commit
|
|
||||||
|
|
||||||
# Check if the upstream_main_commit exists in the local repo
|
# Check if the upstream_main_commit exists in the local repo
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(
|
subprocess.check_output(
|
||||||
@ -393,15 +329,92 @@ class precompiled_wheel_utils:
|
|||||||
"wheel may not be compatible with your dev branch: %s", err)
|
"wheel may not be compatible with your dev branch: %s", err)
|
||||||
return "nightly"
|
return "nightly"
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
assert _is_cuda(
|
||||||
|
), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
||||||
|
|
||||||
|
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
|
||||||
|
if wheel_location is None:
|
||||||
|
base_commit = self.get_base_commit_in_main_branch()
|
||||||
|
wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
||||||
|
# Fallback to nightly wheel if latest commit wheel is unavailable,
|
||||||
|
# in this rare case, the nightly release CI hasn't finished on main.
|
||||||
|
if not is_url_available(wheel_location):
|
||||||
|
wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
||||||
|
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
if os.path.isfile(wheel_location):
|
||||||
|
wheel_path = wheel_location
|
||||||
|
print(f"Using existing wheel={wheel_path}")
|
||||||
|
else:
|
||||||
|
# Download the wheel from a given URL, assume
|
||||||
|
# the filename is the last part of the URL
|
||||||
|
wheel_filename = wheel_location.split("/")[-1]
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
# create a temporary directory to store the wheel
|
||||||
|
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
|
||||||
|
wheel_path = os.path.join(temp_dir, wheel_filename)
|
||||||
|
|
||||||
|
print(f"Downloading wheel from {wheel_location} to {wheel_path}")
|
||||||
|
|
||||||
|
from urllib.request import urlretrieve
|
||||||
|
|
||||||
|
try:
|
||||||
|
urlretrieve(wheel_location, filename=wheel_path)
|
||||||
|
except Exception as e:
|
||||||
|
from setuptools.errors import SetupError
|
||||||
|
|
||||||
|
raise SetupError(
|
||||||
|
f"Failed to get vLLM wheel from {wheel_location}") from e
|
||||||
|
|
||||||
|
with zipfile.ZipFile(wheel_path) as wheel:
|
||||||
|
files_to_copy = [
|
||||||
|
"vllm/_C.abi3.so",
|
||||||
|
"vllm/_moe_C.abi3.so",
|
||||||
|
"vllm/_flashmla_C.abi3.so",
|
||||||
|
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
|
||||||
|
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
|
||||||
|
"vllm/cumem_allocator.abi3.so",
|
||||||
|
# "vllm/_version.py", # not available in nightly wheels yet
|
||||||
|
]
|
||||||
|
|
||||||
|
file_members = list(
|
||||||
|
filter(lambda x: x.filename in files_to_copy, wheel.filelist))
|
||||||
|
|
||||||
|
# vllm_flash_attn python code:
|
||||||
|
# Regex from
|
||||||
|
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
|
||||||
|
compiled_regex = re.compile(
|
||||||
|
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
|
||||||
|
file_members += list(
|
||||||
|
filter(lambda x: compiled_regex.match(x.filename),
|
||||||
|
wheel.filelist))
|
||||||
|
|
||||||
|
for file in file_members:
|
||||||
|
print(f"Extracting and including {file.filename} "
|
||||||
|
"from existing wheel")
|
||||||
|
package_name = os.path.dirname(file.filename).replace("/", ".")
|
||||||
|
file_name = os.path.basename(file.filename)
|
||||||
|
|
||||||
|
if package_name not in package_data:
|
||||||
|
package_data[package_name] = []
|
||||||
|
|
||||||
|
wheel.extract(file)
|
||||||
|
if file_name.endswith(".py"):
|
||||||
|
# python files shouldn't be added to package_data
|
||||||
|
continue
|
||||||
|
|
||||||
|
package_data[package_name].append(file_name)
|
||||||
|
|
||||||
|
|
||||||
def _no_device() -> bool:
|
def _no_device() -> bool:
|
||||||
return VLLM_TARGET_DEVICE == "empty"
|
return VLLM_TARGET_DEVICE == "empty"
|
||||||
|
|
||||||
|
|
||||||
def _is_cuda() -> bool:
|
def _is_cuda() -> bool:
|
||||||
# Allow forced CUDA in Docker/precompiled builds, even without torch.cuda
|
|
||||||
if envs.VLLM_USE_PRECOMPILED and envs.VLLM_DOCKER_BUILD_CONTEXT:
|
|
||||||
return True
|
|
||||||
has_cuda = torch.version.cuda is not None
|
has_cuda = torch.version.cuda is not None
|
||||||
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
|
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
|
||||||
and not (_is_neuron() or _is_tpu()))
|
and not (_is_neuron() or _is_tpu()))
|
||||||
@ -626,37 +639,16 @@ package_data = {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
# If using precompiled, extract and patch package_data (in advance of setup)
|
|
||||||
if envs.VLLM_USE_PRECOMPILED:
|
|
||||||
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
|
||||||
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
|
|
||||||
if wheel_location is not None:
|
|
||||||
wheel_url = wheel_location
|
|
||||||
else:
|
|
||||||
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
|
|
||||||
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
|
||||||
from urllib.request import urlopen
|
|
||||||
try:
|
|
||||||
with urlopen(wheel_url) as resp:
|
|
||||||
if resp.status != 200:
|
|
||||||
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[warn] Falling back to nightly wheel: {e}")
|
|
||||||
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
|
||||||
|
|
||||||
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
|
|
||||||
wheel_url)
|
|
||||||
for pkg, files in patch.items():
|
|
||||||
package_data.setdefault(pkg, []).extend(files)
|
|
||||||
|
|
||||||
if _no_device():
|
if _no_device():
|
||||||
ext_modules = []
|
ext_modules = []
|
||||||
|
|
||||||
if not ext_modules or envs.VLLM_USE_PRECOMPILED:
|
if not ext_modules:
|
||||||
# Disable build_ext when using precompiled wheel
|
|
||||||
cmdclass = {}
|
cmdclass = {}
|
||||||
else:
|
else:
|
||||||
cmdclass = {"build_ext": cmake_build_ext}
|
cmdclass = {
|
||||||
|
"build_ext":
|
||||||
|
repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
|
||||||
|
}
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
# static metadata should rather go in pyproject.toml
|
# static metadata should rather go in pyproject.toml
|
||||||
|
|||||||
11
vllm/envs.py
11
vllm/envs.py
@ -68,7 +68,6 @@ if TYPE_CHECKING:
|
|||||||
MAX_JOBS: Optional[str] = None
|
MAX_JOBS: Optional[str] = None
|
||||||
NVCC_THREADS: Optional[str] = None
|
NVCC_THREADS: Optional[str] = None
|
||||||
VLLM_USE_PRECOMPILED: bool = False
|
VLLM_USE_PRECOMPILED: bool = False
|
||||||
VLLM_DOCKER_BUILD_CONTEXT: bool = False
|
|
||||||
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
|
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
|
||||||
VLLM_NO_DEPRECATION_WARNING: bool = False
|
VLLM_NO_DEPRECATION_WARNING: bool = False
|
||||||
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
|
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
|
||||||
@ -228,14 +227,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
|
|
||||||
# If set, vllm will use precompiled binaries (*.so)
|
# If set, vllm will use precompiled binaries (*.so)
|
||||||
"VLLM_USE_PRECOMPILED":
|
"VLLM_USE_PRECOMPILED":
|
||||||
lambda: os.environ.get("VLLM_USE_PRECOMPILED", "").strip().lower() in
|
lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")) or bool(
|
||||||
("1", "true") or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
|
os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
|
||||||
|
|
||||||
# Used to mark that setup.py is running in a Docker build context,
|
|
||||||
# in order to force the use of precompiled binaries.
|
|
||||||
"VLLM_DOCKER_BUILD_CONTEXT":
|
|
||||||
lambda: os.environ.get("VLLM_DOCKER_BUILD_CONTEXT", "").strip().lower() in
|
|
||||||
("1", "true"),
|
|
||||||
|
|
||||||
# Whether to force using nightly wheel in python build.
|
# Whether to force using nightly wheel in python build.
|
||||||
# This is used for testing the nightly wheel in python build.
|
# This is used for testing the nightly wheel in python build.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user