From f12d9256b39f058b93c201cedc7ffd9e605e9db8 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 30 Jul 2025 13:15:06 -0700 Subject: [PATCH] [Misc] Use dracut on CentOS and skip clone if repo exists for EP kernel installation (#21635) Signed-off-by: Ming Yang --- tools/ep_kernels/configure_system_drivers.sh | 12 +++++- tools/ep_kernels/install_python_libraries.sh | 40 +++++++++++++++++++- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tools/ep_kernels/configure_system_drivers.sh b/tools/ep_kernels/configure_system_drivers.sh index cf15c1daccaec..b8bd8b8f6f550 100644 --- a/tools/ep_kernels/configure_system_drivers.sh +++ b/tools/ep_kernels/configure_system_drivers.sh @@ -2,6 +2,16 @@ set -ex # turn on IBGDA echo 'options nvidia NVreg_EnableStreamMemOPs=1 NVreg_RegistryDwords="PeerMappingOverride=1;"' | tee -a /etc/modprobe.d/nvidia.conf -update-initramfs -u + +if command -v update-initramfs &> /dev/null; then + # for Debian/Ubuntu + sudo update-initramfs -u +elif command -v dracut &> /dev/null; then + # for Fedora/CentOS + sudo dracut --force +else + echo "No supported initramfs update tool found." + exit 1 +fi echo "Please reboot the system to apply the changes" diff --git a/tools/ep_kernels/install_python_libraries.sh b/tools/ep_kernels/install_python_libraries.sh index 83643c084bf9a..9d1b2da3b4122 100644 --- a/tools/ep_kernels/install_python_libraries.sh +++ b/tools/ep_kernels/install_python_libraries.sh @@ -53,9 +53,45 @@ popd export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH +is_git_dirty() { + local dir=$1 + pushd "$dir" > /dev/null + + if [ -d ".git" ] && [ -n "$(git status --porcelain 2>/dev/null)" ]; then + popd > /dev/null + return 0 # dirty (true) + else + popd > /dev/null + return 1 # clean (false) + fi +} + +# Function to handle git repository cloning with dirty/incomplete checks +clone_repo() { + local repo_url=$1 + local dir_name=$2 + local key_file=$3 + + if [ -d "$dir_name" ]; then + # Check if directory has uncommitted changes (dirty) + if is_git_dirty "$dir_name"; then + echo "$dir_name directory is dirty, skipping clone" + # Check if clone failed (directory exists but not a valid git repo or missing key files) + elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then + echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning" + rm -rf "$dir_name" + git clone "$repo_url" + else + echo "$dir_name directory exists and appears complete; manually update if needed" + fi + else + git clone "$repo_url" + fi +} + # build and install pplx, require pytorch installed pushd $WORKSPACE -git clone https://github.com/ppl-ai/pplx-kernels +clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py" cd pplx-kernels # see https://github.com/pypa/pip/issues/9955#issuecomment-838065925 # PIP_NO_BUILD_ISOLATION=0 disables build isolation @@ -64,7 +100,7 @@ popd # build and install deepep, require pytorch installed pushd $WORKSPACE -git clone https://github.com/deepseek-ai/DeepEP +clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py" cd DeepEP export NVSHMEM_DIR=$WORKSPACE/nvshmem_install PIP_NO_BUILD_ISOLATION=0 pip install -vvv -e .