From dd8a29da99aaca4aaedf710c813222871245e140 Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Wed, 26 Mar 2025 15:35:11 -0500 Subject: [PATCH] Applying some fixes for K8s agents in CI (#15493) Signed-off-by: Alexei V. Ivanov --- .buildkite/run-amd-test.sh | 10 ++++++---- Dockerfile.rocm | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh index 0680bae13ddb..e5a1b760db1f 100755 --- a/.buildkite/run-amd-test.sh +++ b/.buildkite/run-amd-test.sh @@ -134,9 +134,10 @@ if [[ $commands == *"--shard-id="* ]]; then # assign shard-id for each shard commands_gpu=${commands//"--shard-id= "/"--shard-id=${GPU} "} echo "Shard ${GPU} commands:$commands_gpu" + echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES" docker run \ - --device /dev/kfd --device /dev/dri \ - --network host \ + --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \ + --network=host \ --shm-size=16gb \ --rm \ -e HIP_VISIBLE_DEVICES="${GPU}" \ @@ -163,9 +164,10 @@ if [[ $commands == *"--shard-id="* ]]; then fi done else + echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES" docker run \ - --device /dev/kfd --device /dev/dri \ - --network host \ + --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \ + --network=host \ --shm-size=16gb \ --rm \ -e HIP_VISIBLE_DEVICES=0 \ diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 841e7978a424..f9ebb10ca873 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -12,7 +12,8 @@ ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}} # Install some basic utilities RUN apt-get update -q -y && apt-get install -q -y \ - sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev + sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \ + apt-transport-https ca-certificates wget curl # Remove sccache RUN python3 -m pip install --upgrade pip && pip install setuptools_scm RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"