fix(setup): improve precompiled wheel setup for Docker builds (#22025)

Signed-off-by: dougbtv <dosmith@redhat.com>
2025-12-09 06:14:57 +08:00 · 2025-07-31 12:52:48 -04:00 · 2025-07-31 12:52:48 -04:00 · 58bb902186
commit 58bb902186
parent 7349d5268b
3 changed files with 104 additions and 124 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -370,6 +370,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    fi
 # Install vllm wheel first, so that torch etc will be installed.
 # !bang
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system dist/*.whl --verbose \
--- a/requirements/test.txt
+++ b/requirements/test.txt
@ -22,9 +22,7 @@ aiohttp==3.10.11
 aiohttp-cors==0.8.1
    # via ray
 aiosignal==1.3.1
-    # via
+    # via aiohttp
    #   aiohttp
    #   ray
 albucore==0.0.16
    # via terratorch
 albumentations==1.4.6
@ -139,7 +137,7 @@ contourpy==1.3.0
    # via matplotlib
 cramjam==2.9.0
    # via fastparquet
-cupy-cuda12x==13.3.0
+cupy-cuda12x==13.5.1
    # via ray
 cycler==0.12.1
    # via matplotlib
@ -226,7 +224,6 @@ frozenlist==1.5.0
    # via
    #   aiohttp
    #   aiosignal
    #   ray
 fsspec==2024.9.0
    # via
    #   datasets
@ -603,10 +600,18 @@ opencv-python-headless==4.11.0.86
 opentelemetry-api==1.35.0
    # via
    #   mlflow-skinny
    #   opentelemetry-exporter-prometheus
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
 opentelemetry-exporter-prometheus==0.56b0
    # via ray
 opentelemetry-proto==1.36.0
    # via ray
 opentelemetry-sdk==1.35.0
-    # via mlflow-skinny
+    # via
    #   mlflow-skinny
    #   opentelemetry-exporter-prometheus
    #   ray
 opentelemetry-semantic-conventions==0.56b0
    # via opentelemetry-sdk
 packaging==24.2
@ -697,7 +702,9 @@ pqdm==0.2.0
 pretrainedmodels==0.7.4
    # via segmentation-models-pytorch
 prometheus-client==0.22.0
-    # via ray
+    # via
    #   opentelemetry-exporter-prometheus
    #   ray
 propcache==0.2.0
    # via yarl
 proto-plus==1.26.1
@ -707,6 +714,7 @@ protobuf==5.28.3
    #   google-api-core
    #   googleapis-common-protos
    #   mlflow-skinny
    #   opentelemetry-proto
    #   proto-plus
    #   ray
    #   tensorboardx
@ -854,7 +862,7 @@ rasterio==1.4.3
    #   rioxarray
    #   terratorch
    #   torchgeo
-ray==2.43.0
+ray==2.48.0
    # via -r requirements/test.in
 redis==5.2.0
    # via tensorizer
--- a/setup.py
+++ b/setup.py
@ -282,10 +282,69 @@ class cmake_build_ext(build_ext):
            self.copy_file(file, dst_file)
-class repackage_wheel(build_ext):
+class precompiled_wheel_utils:
    """Extracts libraries and other files from an existing wheel."""
-    def get_base_commit_in_main_branch(self) -> str:
+    @staticmethod
    def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
        import tempfile
        import zipfile
        temp_dir = None
        try:
            if not os.path.isfile(wheel_url_or_path):
                wheel_filename = wheel_url_or_path.split("/")[-1]
                temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
                wheel_path = os.path.join(temp_dir, wheel_filename)
                print(f"Downloading wheel from {wheel_url_or_path} "
                      f"to {wheel_path}")
                from urllib.request import urlretrieve
                urlretrieve(wheel_url_or_path, filename=wheel_path)
            else:
                wheel_path = wheel_url_or_path
                print(f"Using existing wheel at {wheel_path}")
            package_data_patch = {}
            with zipfile.ZipFile(wheel_path) as wheel:
                files_to_copy = [
                    "vllm/_C.abi3.so",
                    "vllm/_moe_C.abi3.so",
                    "vllm/_flashmla_C.abi3.so",
                    "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
                    "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
                    "vllm/cumem_allocator.abi3.so",
                ]
                compiled_regex = re.compile(
                    r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
                file_members = list(
                    filter(lambda x: x.filename in files_to_copy,
                           wheel.filelist))
                file_members += list(
                    filter(lambda x: compiled_regex.match(x.filename),
                           wheel.filelist))
                for file in file_members:
                    print(f"[extract] {file.filename}")
                    target_path = os.path.join(".", file.filename)
                    os.makedirs(os.path.dirname(target_path), exist_ok=True)
                    with wheel.open(file.filename) as src, open(
                            target_path, "wb") as dst:
                        shutil.copyfileobj(src, dst)
                    pkg = os.path.dirname(file.filename).replace("/", ".")
                    package_data_patch.setdefault(pkg, []).append(
                        os.path.basename(file.filename))
            return package_data_patch
        finally:
            if temp_dir is not None:
                print(f"Removing temporary directory {temp_dir}")
                shutil.rmtree(temp_dir)
    @staticmethod
    def get_base_commit_in_main_branch() -> str:
        # Force to use the nightly wheel. This is mainly used for CI testing.
        if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
            return "nightly"
@ -334,115 +393,6 @@ class repackage_wheel(build_ext):
                "wheel may not be compatible with your dev branch: %s", err)
            return "nightly"
    def run(self) -> None:
        assert _is_cuda(
        ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
        wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
        if wheel_location is None:
            base_commit = self.get_base_commit_in_main_branch()
            wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
            # Fallback to nightly wheel if latest commit wheel is unavailable,
            # in this rare case, the nightly release CI hasn't finished on main.
            if not is_url_available(wheel_location):
                wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
        import zipfile
        if os.path.isfile(wheel_location):
            wheel_path = wheel_location
            print(f"Using existing wheel={wheel_path}")
        else:
            # Download the wheel from a given URL, assume
            # the filename is the last part of the URL
            wheel_filename = wheel_location.split("/")[-1]
            import tempfile
            # create a temporary directory to store the wheel
            temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
            wheel_path = os.path.join(temp_dir, wheel_filename)
            print(f"Downloading wheel from {wheel_location} to {wheel_path}")
            from urllib.request import urlretrieve
            try:
                urlretrieve(wheel_location, filename=wheel_path)
            except Exception as e:
                from setuptools.errors import SetupError
                raise SetupError(
                    f"Failed to get vLLM wheel from {wheel_location}") from e
        # Set the dist_dir for Docker build context
        dist_dir = ("/workspace/dist"
                    if envs.VLLM_DOCKER_BUILD_CONTEXT else "dist")
        os.makedirs(dist_dir, exist_ok=True)
        # Extract only necessary compiled .so files from precompiled wheel
        with zipfile.ZipFile(wheel_path) as wheel:
            # Get version from METADATA (optional, mostly useful for logging)
            metadata_file = next((n for n in wheel.namelist()
                                  if n.endswith(".dist-info/METADATA")), None)
            if not metadata_file:
                raise RuntimeError(
                    "Could not find METADATA in precompiled wheel.")
            metadata = wheel.read(metadata_file).decode()
            version_line = next((line for line in metadata.splitlines()
                                 if line.startswith("Version: ")), None)
            if not version_line:
                raise RuntimeError(
                    "Could not determine version from METADATA.")
            version = version_line.split(": ")[1].strip()
            print(f"Extracting precompiled kernels from vLLM wheel version: "
                  f"{version}")
            # List of compiled shared objects to extract
            files_to_copy = [
                "vllm/_C.abi3.so",
                "vllm/_moe_C.abi3.so",
                "vllm/_flashmla_C.abi3.so",
                "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
                "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
                "vllm/cumem_allocator.abi3.so",
            ]
            file_members = list(
                filter(lambda x: x.filename in files_to_copy, wheel.filelist))
            compiled_regex = re.compile(
                r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
            file_members += list(
                filter(lambda x: compiled_regex.match(x.filename),
                       wheel.filelist))
            for file in file_members:
                print(f"Extracting and including {file.filename} "
                      "from existing wheel")
                package_name = os.path.dirname(file.filename).replace("/", ".")
                file_name = os.path.basename(file.filename)
                if package_name not in package_data:
                    package_data[package_name] = []
                output_base = (dist_dir
                               if envs.VLLM_DOCKER_BUILD_CONTEXT else ".")
                target_path = os.path.join(output_base, file.filename)
                os.makedirs(os.path.dirname(target_path), exist_ok=True)
                with wheel.open(file.filename) as src, open(target_path,
                                                            "wb") as dst:
                    shutil.copyfileobj(src, dst)
                package_data[package_name].append(file_name)
        # Copy wheel into dist dir for Docker to consume (e.g., via --mount)
        if envs.VLLM_DOCKER_BUILD_CONTEXT:
            arch_tag = "cp38-abi3-manylinux1_x86_64"
            corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
            final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
            print(
                "Docker build context detected, copying precompiled wheel to "
                f"{final_wheel_path}")
            shutil.copy2(wheel_path, final_wheel_path)
 def _no_device() -> bool:
    return VLLM_TARGET_DEVICE == "empty"
@ -676,16 +626,37 @@ package_data = {
    ]
 }
 # If using precompiled, extract and patch package_data (in advance of setup)
 if envs.VLLM_USE_PRECOMPILED:
    assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
    wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
    if wheel_location is not None:
        wheel_url = wheel_location
    else:
        base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
        wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
        from urllib.request import urlopen
        try:
            with urlopen(wheel_url) as resp:
                if resp.status != 200:
                    wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
        except Exception as e:
            print(f"[warn] Falling back to nightly wheel: {e}")
            wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
    patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
        wheel_url)
    for pkg, files in patch.items():
        package_data.setdefault(pkg, []).extend(files)
 if _no_device():
    ext_modules = []
-if not ext_modules:
+if not ext_modules or envs.VLLM_USE_PRECOMPILED:
    # Disable build_ext when using precompiled wheel
    cmdclass = {}
 else:
-    cmdclass = {
+    cmdclass = {"build_ext": cmake_build_ext}
        "build_ext":
        repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
    }
 setup(
    # static metadata should rather go in pyproject.toml