From 58bb902186a87007deeeef2d2af02ed2b13bb182 Mon Sep 17 00:00:00 2001 From: Doug Smith Date: Thu, 31 Jul 2025 12:52:48 -0400 Subject: [PATCH] fix(setup): improve precompiled wheel setup for Docker builds (#22025) Signed-off-by: dougbtv --- docker/Dockerfile | 1 + requirements/test.txt | 24 +++-- setup.py | 203 ++++++++++++++++++------------------------ 3 files changed, 104 insertions(+), 124 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 43522ef8fb8d..69aeee67a430 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -370,6 +370,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ fi # Install vllm wheel first, so that torch etc will be installed. +# !bang RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ uv pip install --system dist/*.whl --verbose \ diff --git a/requirements/test.txt b/requirements/test.txt index d45048aae580..4aaca2afea26 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -22,9 +22,7 @@ aiohttp==3.10.11 aiohttp-cors==0.8.1 # via ray aiosignal==1.3.1 - # via - # aiohttp - # ray + # via aiohttp albucore==0.0.16 # via terratorch albumentations==1.4.6 @@ -139,7 +137,7 @@ contourpy==1.3.0 # via matplotlib cramjam==2.9.0 # via fastparquet -cupy-cuda12x==13.3.0 +cupy-cuda12x==13.5.1 # via ray cycler==0.12.1 # via matplotlib @@ -226,7 +224,6 @@ frozenlist==1.5.0 # via # aiohttp # aiosignal - # ray fsspec==2024.9.0 # via # datasets @@ -603,10 +600,18 @@ opencv-python-headless==4.11.0.86 opentelemetry-api==1.35.0 # via # mlflow-skinny + # opentelemetry-exporter-prometheus # opentelemetry-sdk # opentelemetry-semantic-conventions +opentelemetry-exporter-prometheus==0.56b0 + # via ray +opentelemetry-proto==1.36.0 + # via ray opentelemetry-sdk==1.35.0 - # via mlflow-skinny + # via + # mlflow-skinny + # opentelemetry-exporter-prometheus + # ray opentelemetry-semantic-conventions==0.56b0 # via opentelemetry-sdk packaging==24.2 @@ -697,7 +702,9 @@ pqdm==0.2.0 pretrainedmodels==0.7.4 # via segmentation-models-pytorch prometheus-client==0.22.0 - # via ray + # via + # opentelemetry-exporter-prometheus + # ray propcache==0.2.0 # via yarl proto-plus==1.26.1 @@ -707,6 +714,7 @@ protobuf==5.28.3 # google-api-core # googleapis-common-protos # mlflow-skinny + # opentelemetry-proto # proto-plus # ray # tensorboardx @@ -854,7 +862,7 @@ rasterio==1.4.3 # rioxarray # terratorch # torchgeo -ray==2.43.0 +ray==2.48.0 # via -r requirements/test.in redis==5.2.0 # via tensorizer diff --git a/setup.py b/setup.py index bf3391e2db19..6d615d122d69 100644 --- a/setup.py +++ b/setup.py @@ -282,10 +282,69 @@ class cmake_build_ext(build_ext): self.copy_file(file, dst_file) -class repackage_wheel(build_ext): +class precompiled_wheel_utils: """Extracts libraries and other files from an existing wheel.""" - def get_base_commit_in_main_branch(self) -> str: + @staticmethod + def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict: + import tempfile + import zipfile + + temp_dir = None + try: + if not os.path.isfile(wheel_url_or_path): + wheel_filename = wheel_url_or_path.split("/")[-1] + temp_dir = tempfile.mkdtemp(prefix="vllm-wheels") + wheel_path = os.path.join(temp_dir, wheel_filename) + print(f"Downloading wheel from {wheel_url_or_path} " + f"to {wheel_path}") + from urllib.request import urlretrieve + urlretrieve(wheel_url_or_path, filename=wheel_path) + else: + wheel_path = wheel_url_or_path + print(f"Using existing wheel at {wheel_path}") + + package_data_patch = {} + + with zipfile.ZipFile(wheel_path) as wheel: + files_to_copy = [ + "vllm/_C.abi3.so", + "vllm/_moe_C.abi3.so", + "vllm/_flashmla_C.abi3.so", + "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so", + "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so", + "vllm/cumem_allocator.abi3.so", + ] + + compiled_regex = re.compile( + r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py") + file_members = list( + filter(lambda x: x.filename in files_to_copy, + wheel.filelist)) + file_members += list( + filter(lambda x: compiled_regex.match(x.filename), + wheel.filelist)) + + for file in file_members: + print(f"[extract] {file.filename}") + target_path = os.path.join(".", file.filename) + os.makedirs(os.path.dirname(target_path), exist_ok=True) + with wheel.open(file.filename) as src, open( + target_path, "wb") as dst: + shutil.copyfileobj(src, dst) + + pkg = os.path.dirname(file.filename).replace("/", ".") + package_data_patch.setdefault(pkg, []).append( + os.path.basename(file.filename)) + + return package_data_patch + finally: + if temp_dir is not None: + print(f"Removing temporary directory {temp_dir}") + shutil.rmtree(temp_dir) + + @staticmethod + def get_base_commit_in_main_branch() -> str: # Force to use the nightly wheel. This is mainly used for CI testing. if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: return "nightly" @@ -334,115 +393,6 @@ class repackage_wheel(build_ext): "wheel may not be compatible with your dev branch: %s", err) return "nightly" - def run(self) -> None: - assert _is_cuda( - ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" - - wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None) - if wheel_location is None: - base_commit = self.get_base_commit_in_main_branch() - wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" - # Fallback to nightly wheel if latest commit wheel is unavailable, - # in this rare case, the nightly release CI hasn't finished on main. - if not is_url_available(wheel_location): - wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" - - import zipfile - - if os.path.isfile(wheel_location): - wheel_path = wheel_location - print(f"Using existing wheel={wheel_path}") - else: - # Download the wheel from a given URL, assume - # the filename is the last part of the URL - wheel_filename = wheel_location.split("/")[-1] - - import tempfile - - # create a temporary directory to store the wheel - temp_dir = tempfile.mkdtemp(prefix="vllm-wheels") - wheel_path = os.path.join(temp_dir, wheel_filename) - print(f"Downloading wheel from {wheel_location} to {wheel_path}") - from urllib.request import urlretrieve - try: - urlretrieve(wheel_location, filename=wheel_path) - except Exception as e: - from setuptools.errors import SetupError - raise SetupError( - f"Failed to get vLLM wheel from {wheel_location}") from e - - # Set the dist_dir for Docker build context - dist_dir = ("/workspace/dist" - if envs.VLLM_DOCKER_BUILD_CONTEXT else "dist") - os.makedirs(dist_dir, exist_ok=True) - - # Extract only necessary compiled .so files from precompiled wheel - with zipfile.ZipFile(wheel_path) as wheel: - # Get version from METADATA (optional, mostly useful for logging) - metadata_file = next((n for n in wheel.namelist() - if n.endswith(".dist-info/METADATA")), None) - if not metadata_file: - raise RuntimeError( - "Could not find METADATA in precompiled wheel.") - metadata = wheel.read(metadata_file).decode() - version_line = next((line for line in metadata.splitlines() - if line.startswith("Version: ")), None) - if not version_line: - raise RuntimeError( - "Could not determine version from METADATA.") - version = version_line.split(": ")[1].strip() - - print(f"Extracting precompiled kernels from vLLM wheel version: " - f"{version}") - - # List of compiled shared objects to extract - files_to_copy = [ - "vllm/_C.abi3.so", - "vllm/_moe_C.abi3.so", - "vllm/_flashmla_C.abi3.so", - "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so", - "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so", - "vllm/cumem_allocator.abi3.so", - ] - - file_members = list( - filter(lambda x: x.filename in files_to_copy, wheel.filelist)) - compiled_regex = re.compile( - r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py") - file_members += list( - filter(lambda x: compiled_regex.match(x.filename), - wheel.filelist)) - - for file in file_members: - print(f"Extracting and including {file.filename} " - "from existing wheel") - package_name = os.path.dirname(file.filename).replace("/", ".") - file_name = os.path.basename(file.filename) - - if package_name not in package_data: - package_data[package_name] = [] - - output_base = (dist_dir - if envs.VLLM_DOCKER_BUILD_CONTEXT else ".") - target_path = os.path.join(output_base, file.filename) - os.makedirs(os.path.dirname(target_path), exist_ok=True) - with wheel.open(file.filename) as src, open(target_path, - "wb") as dst: - shutil.copyfileobj(src, dst) - - package_data[package_name].append(file_name) - - # Copy wheel into dist dir for Docker to consume (e.g., via --mount) - if envs.VLLM_DOCKER_BUILD_CONTEXT: - arch_tag = "cp38-abi3-manylinux1_x86_64" - corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl" - final_wheel_path = os.path.join(dist_dir, corrected_wheel_name) - - print( - "Docker build context detected, copying precompiled wheel to " - f"{final_wheel_path}") - shutil.copy2(wheel_path, final_wheel_path) - def _no_device() -> bool: return VLLM_TARGET_DEVICE == "empty" @@ -676,16 +626,37 @@ package_data = { ] } +# If using precompiled, extract and patch package_data (in advance of setup) +if envs.VLLM_USE_PRECOMPILED: + assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" + wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None) + if wheel_location is not None: + wheel_url = wheel_location + else: + base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch() + wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" + from urllib.request import urlopen + try: + with urlopen(wheel_url) as resp: + if resp.status != 200: + wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" + except Exception as e: + print(f"[warn] Falling back to nightly wheel: {e}") + wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" + + patch = precompiled_wheel_utils.extract_precompiled_and_patch_package( + wheel_url) + for pkg, files in patch.items(): + package_data.setdefault(pkg, []).extend(files) + if _no_device(): ext_modules = [] -if not ext_modules: +if not ext_modules or envs.VLLM_USE_PRECOMPILED: + # Disable build_ext when using precompiled wheel cmdclass = {} else: - cmdclass = { - "build_ext": - repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext - } + cmdclass = {"build_ext": cmake_build_ext} setup( # static metadata should rather go in pyproject.toml