mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 06:14:57 +08:00
fix(setup): improve precompiled wheel setup for Docker builds (#22025)
Signed-off-by: dougbtv <dosmith@redhat.com>
This commit is contained in:
parent
7349d5268b
commit
58bb902186
@ -370,6 +370,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
fi
|
||||
|
||||
# Install vllm wheel first, so that torch etc will be installed.
|
||||
# !bang
|
||||
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system dist/*.whl --verbose \
|
||||
|
||||
@ -22,9 +22,7 @@ aiohttp==3.10.11
|
||||
aiohttp-cors==0.8.1
|
||||
# via ray
|
||||
aiosignal==1.3.1
|
||||
# via
|
||||
# aiohttp
|
||||
# ray
|
||||
# via aiohttp
|
||||
albucore==0.0.16
|
||||
# via terratorch
|
||||
albumentations==1.4.6
|
||||
@ -139,7 +137,7 @@ contourpy==1.3.0
|
||||
# via matplotlib
|
||||
cramjam==2.9.0
|
||||
# via fastparquet
|
||||
cupy-cuda12x==13.3.0
|
||||
cupy-cuda12x==13.5.1
|
||||
# via ray
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
@ -226,7 +224,6 @@ frozenlist==1.5.0
|
||||
# via
|
||||
# aiohttp
|
||||
# aiosignal
|
||||
# ray
|
||||
fsspec==2024.9.0
|
||||
# via
|
||||
# datasets
|
||||
@ -603,10 +600,18 @@ opencv-python-headless==4.11.0.86
|
||||
opentelemetry-api==1.35.0
|
||||
# via
|
||||
# mlflow-skinny
|
||||
# opentelemetry-exporter-prometheus
|
||||
# opentelemetry-sdk
|
||||
# opentelemetry-semantic-conventions
|
||||
opentelemetry-exporter-prometheus==0.56b0
|
||||
# via ray
|
||||
opentelemetry-proto==1.36.0
|
||||
# via ray
|
||||
opentelemetry-sdk==1.35.0
|
||||
# via mlflow-skinny
|
||||
# via
|
||||
# mlflow-skinny
|
||||
# opentelemetry-exporter-prometheus
|
||||
# ray
|
||||
opentelemetry-semantic-conventions==0.56b0
|
||||
# via opentelemetry-sdk
|
||||
packaging==24.2
|
||||
@ -697,7 +702,9 @@ pqdm==0.2.0
|
||||
pretrainedmodels==0.7.4
|
||||
# via segmentation-models-pytorch
|
||||
prometheus-client==0.22.0
|
||||
# via ray
|
||||
# via
|
||||
# opentelemetry-exporter-prometheus
|
||||
# ray
|
||||
propcache==0.2.0
|
||||
# via yarl
|
||||
proto-plus==1.26.1
|
||||
@ -707,6 +714,7 @@ protobuf==5.28.3
|
||||
# google-api-core
|
||||
# googleapis-common-protos
|
||||
# mlflow-skinny
|
||||
# opentelemetry-proto
|
||||
# proto-plus
|
||||
# ray
|
||||
# tensorboardx
|
||||
@ -854,7 +862,7 @@ rasterio==1.4.3
|
||||
# rioxarray
|
||||
# terratorch
|
||||
# torchgeo
|
||||
ray==2.43.0
|
||||
ray==2.48.0
|
||||
# via -r requirements/test.in
|
||||
redis==5.2.0
|
||||
# via tensorizer
|
||||
|
||||
203
setup.py
203
setup.py
@ -282,10 +282,69 @@ class cmake_build_ext(build_ext):
|
||||
self.copy_file(file, dst_file)
|
||||
|
||||
|
||||
class repackage_wheel(build_ext):
|
||||
class precompiled_wheel_utils:
|
||||
"""Extracts libraries and other files from an existing wheel."""
|
||||
|
||||
def get_base_commit_in_main_branch(self) -> str:
|
||||
@staticmethod
|
||||
def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
|
||||
import tempfile
|
||||
import zipfile
|
||||
|
||||
temp_dir = None
|
||||
try:
|
||||
if not os.path.isfile(wheel_url_or_path):
|
||||
wheel_filename = wheel_url_or_path.split("/")[-1]
|
||||
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
|
||||
wheel_path = os.path.join(temp_dir, wheel_filename)
|
||||
print(f"Downloading wheel from {wheel_url_or_path} "
|
||||
f"to {wheel_path}")
|
||||
from urllib.request import urlretrieve
|
||||
urlretrieve(wheel_url_or_path, filename=wheel_path)
|
||||
else:
|
||||
wheel_path = wheel_url_or_path
|
||||
print(f"Using existing wheel at {wheel_path}")
|
||||
|
||||
package_data_patch = {}
|
||||
|
||||
with zipfile.ZipFile(wheel_path) as wheel:
|
||||
files_to_copy = [
|
||||
"vllm/_C.abi3.so",
|
||||
"vllm/_moe_C.abi3.so",
|
||||
"vllm/_flashmla_C.abi3.so",
|
||||
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
|
||||
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
|
||||
"vllm/cumem_allocator.abi3.so",
|
||||
]
|
||||
|
||||
compiled_regex = re.compile(
|
||||
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
|
||||
file_members = list(
|
||||
filter(lambda x: x.filename in files_to_copy,
|
||||
wheel.filelist))
|
||||
file_members += list(
|
||||
filter(lambda x: compiled_regex.match(x.filename),
|
||||
wheel.filelist))
|
||||
|
||||
for file in file_members:
|
||||
print(f"[extract] {file.filename}")
|
||||
target_path = os.path.join(".", file.filename)
|
||||
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
||||
with wheel.open(file.filename) as src, open(
|
||||
target_path, "wb") as dst:
|
||||
shutil.copyfileobj(src, dst)
|
||||
|
||||
pkg = os.path.dirname(file.filename).replace("/", ".")
|
||||
package_data_patch.setdefault(pkg, []).append(
|
||||
os.path.basename(file.filename))
|
||||
|
||||
return package_data_patch
|
||||
finally:
|
||||
if temp_dir is not None:
|
||||
print(f"Removing temporary directory {temp_dir}")
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@staticmethod
|
||||
def get_base_commit_in_main_branch() -> str:
|
||||
# Force to use the nightly wheel. This is mainly used for CI testing.
|
||||
if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
|
||||
return "nightly"
|
||||
@ -334,115 +393,6 @@ class repackage_wheel(build_ext):
|
||||
"wheel may not be compatible with your dev branch: %s", err)
|
||||
return "nightly"
|
||||
|
||||
def run(self) -> None:
|
||||
assert _is_cuda(
|
||||
), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
||||
|
||||
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
|
||||
if wheel_location is None:
|
||||
base_commit = self.get_base_commit_in_main_branch()
|
||||
wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
||||
# Fallback to nightly wheel if latest commit wheel is unavailable,
|
||||
# in this rare case, the nightly release CI hasn't finished on main.
|
||||
if not is_url_available(wheel_location):
|
||||
wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
||||
|
||||
import zipfile
|
||||
|
||||
if os.path.isfile(wheel_location):
|
||||
wheel_path = wheel_location
|
||||
print(f"Using existing wheel={wheel_path}")
|
||||
else:
|
||||
# Download the wheel from a given URL, assume
|
||||
# the filename is the last part of the URL
|
||||
wheel_filename = wheel_location.split("/")[-1]
|
||||
|
||||
import tempfile
|
||||
|
||||
# create a temporary directory to store the wheel
|
||||
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
|
||||
wheel_path = os.path.join(temp_dir, wheel_filename)
|
||||
print(f"Downloading wheel from {wheel_location} to {wheel_path}")
|
||||
from urllib.request import urlretrieve
|
||||
try:
|
||||
urlretrieve(wheel_location, filename=wheel_path)
|
||||
except Exception as e:
|
||||
from setuptools.errors import SetupError
|
||||
raise SetupError(
|
||||
f"Failed to get vLLM wheel from {wheel_location}") from e
|
||||
|
||||
# Set the dist_dir for Docker build context
|
||||
dist_dir = ("/workspace/dist"
|
||||
if envs.VLLM_DOCKER_BUILD_CONTEXT else "dist")
|
||||
os.makedirs(dist_dir, exist_ok=True)
|
||||
|
||||
# Extract only necessary compiled .so files from precompiled wheel
|
||||
with zipfile.ZipFile(wheel_path) as wheel:
|
||||
# Get version from METADATA (optional, mostly useful for logging)
|
||||
metadata_file = next((n for n in wheel.namelist()
|
||||
if n.endswith(".dist-info/METADATA")), None)
|
||||
if not metadata_file:
|
||||
raise RuntimeError(
|
||||
"Could not find METADATA in precompiled wheel.")
|
||||
metadata = wheel.read(metadata_file).decode()
|
||||
version_line = next((line for line in metadata.splitlines()
|
||||
if line.startswith("Version: ")), None)
|
||||
if not version_line:
|
||||
raise RuntimeError(
|
||||
"Could not determine version from METADATA.")
|
||||
version = version_line.split(": ")[1].strip()
|
||||
|
||||
print(f"Extracting precompiled kernels from vLLM wheel version: "
|
||||
f"{version}")
|
||||
|
||||
# List of compiled shared objects to extract
|
||||
files_to_copy = [
|
||||
"vllm/_C.abi3.so",
|
||||
"vllm/_moe_C.abi3.so",
|
||||
"vllm/_flashmla_C.abi3.so",
|
||||
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
|
||||
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
|
||||
"vllm/cumem_allocator.abi3.so",
|
||||
]
|
||||
|
||||
file_members = list(
|
||||
filter(lambda x: x.filename in files_to_copy, wheel.filelist))
|
||||
compiled_regex = re.compile(
|
||||
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
|
||||
file_members += list(
|
||||
filter(lambda x: compiled_regex.match(x.filename),
|
||||
wheel.filelist))
|
||||
|
||||
for file in file_members:
|
||||
print(f"Extracting and including {file.filename} "
|
||||
"from existing wheel")
|
||||
package_name = os.path.dirname(file.filename).replace("/", ".")
|
||||
file_name = os.path.basename(file.filename)
|
||||
|
||||
if package_name not in package_data:
|
||||
package_data[package_name] = []
|
||||
|
||||
output_base = (dist_dir
|
||||
if envs.VLLM_DOCKER_BUILD_CONTEXT else ".")
|
||||
target_path = os.path.join(output_base, file.filename)
|
||||
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
||||
with wheel.open(file.filename) as src, open(target_path,
|
||||
"wb") as dst:
|
||||
shutil.copyfileobj(src, dst)
|
||||
|
||||
package_data[package_name].append(file_name)
|
||||
|
||||
# Copy wheel into dist dir for Docker to consume (e.g., via --mount)
|
||||
if envs.VLLM_DOCKER_BUILD_CONTEXT:
|
||||
arch_tag = "cp38-abi3-manylinux1_x86_64"
|
||||
corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
|
||||
final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
|
||||
|
||||
print(
|
||||
"Docker build context detected, copying precompiled wheel to "
|
||||
f"{final_wheel_path}")
|
||||
shutil.copy2(wheel_path, final_wheel_path)
|
||||
|
||||
|
||||
def _no_device() -> bool:
|
||||
return VLLM_TARGET_DEVICE == "empty"
|
||||
@ -676,16 +626,37 @@ package_data = {
|
||||
]
|
||||
}
|
||||
|
||||
# If using precompiled, extract and patch package_data (in advance of setup)
|
||||
if envs.VLLM_USE_PRECOMPILED:
|
||||
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
||||
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
|
||||
if wheel_location is not None:
|
||||
wheel_url = wheel_location
|
||||
else:
|
||||
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
|
||||
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
||||
from urllib.request import urlopen
|
||||
try:
|
||||
with urlopen(wheel_url) as resp:
|
||||
if resp.status != 200:
|
||||
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
||||
except Exception as e:
|
||||
print(f"[warn] Falling back to nightly wheel: {e}")
|
||||
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
|
||||
|
||||
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
|
||||
wheel_url)
|
||||
for pkg, files in patch.items():
|
||||
package_data.setdefault(pkg, []).extend(files)
|
||||
|
||||
if _no_device():
|
||||
ext_modules = []
|
||||
|
||||
if not ext_modules:
|
||||
if not ext_modules or envs.VLLM_USE_PRECOMPILED:
|
||||
# Disable build_ext when using precompiled wheel
|
||||
cmdclass = {}
|
||||
else:
|
||||
cmdclass = {
|
||||
"build_ext":
|
||||
repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
|
||||
}
|
||||
cmdclass = {"build_ext": cmake_build_ext}
|
||||
|
||||
setup(
|
||||
# static metadata should rather go in pyproject.toml
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user