From d166d83574f1beb0f19ea27a1b761e170dfe674d Mon Sep 17 00:00:00 2001 From: Puneet Matharu Date: Thu, 4 Dec 2025 12:24:46 +0000 Subject: [PATCH] Add ccache support. Signed-off-by: Puneet Matharu --- .github/workflows/pytorch.yml | 30 +++++++++++++- ML-Frameworks/pytorch-aarch64/CHANGELOG.md | 2 + ML-Frameworks/pytorch-aarch64/build-wheel.sh | 41 +++++++++++++++++++- ML-Frameworks/pytorch-aarch64/build.sh | 7 +++- ML-Frameworks/pytorch-aarch64/get-source.sh | 12 ++++-- 5 files changed, 85 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pytorch.yml b/.github/workflows/pytorch.yml index c0edcf25..cdc445b2 100644 --- a/.github/workflows/pytorch.yml +++ b/.github/workflows/pytorch.yml @@ -67,6 +67,9 @@ jobs: { name: c8g, label: ah-ubuntu_22_04-c8g_8x } ] runs-on: ${{ matrix.config.label }} + env: + CCACHE_HOST_DIR: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64/.ccache + TORCH_BUILD_CONTAINER_ID_FILE: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64/.torch_build_container_id steps: - name: Checkout Tool-Solutions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -76,13 +79,38 @@ jobs: - name: Set up Docker uses: docker/setup-docker-action@v4 + - name: Create unique cache key from the year and week (YYYY-WW) + id: cache_suffix + run: echo "week=$(date -u +%G-%V)" >> "$GITHUB_OUTPUT" + + # Restore cache if available. GitHub automatically evicts cache entries that have not been + # accessed for over 7 days. We rotate the cache key weekly; if no cache exists for the + # current week, a cache from a previous week (via the prefix restore key) will be restored + # and then saved under the current week's key at the end of the job. This effectively limits + # the cache to at most two weeks of cache data. + - name: Restore ccache cache + uses: actions/cache@v5 + with: + path: ${{ env.CCACHE_HOST_DIR }} + key: ccache-${{ matrix.config.name }}-${{ steps.cache_suffix.outputs.week }} + restore-keys: | + ccache-${{ matrix.config.name }}- + - name: Build Tool-Solutions PyTorch working-directory: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64 run: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64/build.sh env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CCACHE_HOST_DIR: ${{ env.CCACHE_HOST_DIR }} + CCACHE_MAXSIZE: 2G + + - name: Print ccache disk usage + run: du -sh "${{ env.CCACHE_HOST_DIR }}" || true + + - name: Report final ccache build stats + run: docker exec "$(cat ${{ env.TORCH_BUILD_CONTAINER_ID_FILE }})" ccache -s || true - - name: Save image as a artifact + - name: Save image as an artifact run: docker save toolsolutions-pytorch:latest -o toolsolutions-pytorch-image-${{ matrix.config.name }}.tar - name: Upload build artifact diff --git a/ML-Frameworks/pytorch-aarch64/CHANGELOG.md b/ML-Frameworks/pytorch-aarch64/CHANGELOG.md index c7899788..5f701439 100644 --- a/ML-Frameworks/pytorch-aarch64/CHANGELOG.md +++ b/ML-Frameworks/pytorch-aarch64/CHANGELOG.md @@ -8,6 +8,8 @@ where `YY` is the year, and `MM` the month of the increment. ## [unreleased] ### Added + - Adds PyTorch [PR #170600](https://github.com/pytorch/pytorch/pull/170600), to patch incremental build support. + - Adds PyTorch [PR #170062](https://github.com/pytorch/pytorch/pull/170062), to add ccache support to ACL/OpenBLAS and manywheel build script. ### Changed diff --git a/ML-Frameworks/pytorch-aarch64/build-wheel.sh b/ML-Frameworks/pytorch-aarch64/build-wheel.sh index 095c2d27..32b30d29 100755 --- a/ML-Frameworks/pytorch-aarch64/build-wheel.sh +++ b/ML-Frameworks/pytorch-aarch64/build-wheel.sh @@ -55,6 +55,29 @@ UTILS=/utils COMMON_UTILS=/common_utils PYTORCH_FINAL_PACKAGE_DIR=/artifacts +# Enable ccache support by default. +# NOTE: The default behaviour is to have a project-specific cache directory that we cache +# build artefacts inside and can be easily wiped. These build artefacts are specific to the +# manylinux builder container (and thus compilers) that we use to build the torch wheel. As +# such, you may not want to populate the global ccache cache with them. However, if you wish +# to do so, simply set CCACHE_HOST_DIR to that directory. +CCACHE_HOST_DIR=${CCACHE_HOST_DIR:-"${PWD}/.ccache"} +LOCAL_CCACHE_DIR=/.ccache +CCACHE_MAXSIZE=${CCACHE_MAXSIZE:-} + +# If the user wants to use ccache for build caching +ccache_args=() +if [[ "$*" == *--disable-ccache* ]]; then + ccache_args+=(-e USE_CCACHE=0) +else + ccache_args+=(-e USE_CCACHE=1) + mkdir -p "${CCACHE_HOST_DIR}" + ccache_args+=( + -e CCACHE_DIR="${LOCAL_CCACHE_DIR}" + -v "${CCACHE_HOST_DIR}:${LOCAL_CCACHE_DIR}" + ) +fi + # Want a CPU build DESIRED_CUDA=cpu GPU_ARCH_TYPE=cpu-aarch64 @@ -89,6 +112,7 @@ if ! docker container inspect $TORCH_BUILD_CONTAINER >/dev/null 2>&1 ; then -e SKIP_ALL_TESTS=1 \ -e OPENSSL_ROOT_DIR="${OPENSSL_HOST_DIR}" \ -e CMAKE_INCLUDE_PATH="${OPENSSL_HOST_DIR}/include" \ + "${ccache_args[@]}" \ -v "${PYTORCH_HOST_DIR}:${PYTORCH_ROOT}" \ -v "${PYTORCH_FINAL_PACKAGE_HOST_DIR}:${PYTORCH_FINAL_PACKAGE_DIR}" \ -v "${PWD}/utils:${UTILS}" \ @@ -96,6 +120,18 @@ if ! docker container inspect $TORCH_BUILD_CONTAINER >/dev/null 2>&1 ; then -w / \ "${IMAGE_NAME}") + # Provide ccache support + if [[ "$*" != *--disable-ccache* ]]; then + docker exec "$TORCH_BUILD_CONTAINER" yum install -y ccache || true + if [ -n "${CCACHE_MAXSIZE}" ]; then + docker exec "$TORCH_BUILD_CONTAINER" ccache --max-size="$CCACHE_MAXSIZE" || true + fi + docker exec "$TORCH_BUILD_CONTAINER" ccache -z || true + docker exec "$TORCH_BUILD_CONTAINER" ccache -o compression=true || true + docker exec "$TORCH_BUILD_CONTAINER" ccache -o compression_level=6 || true + docker exec "$TORCH_BUILD_CONTAINER" ccache -s || true + fi + # Currently changes in these scripts will not be applied without a clean # build, which is not ideal for dev work. But we have to balance this with # extra time/network traffic when rebuilding many times. @@ -134,9 +170,12 @@ OVERRIDE_PACKAGE_VERSION="${version%??}.dev${build_date}${TORCH_RELEASE_ID:+"+$T docker exec $TORCH_BUILD_CONTAINER bash -lc " source /tmp/env && BUILD_TEST=0 \ + DO_SETUP_PY_CLEAN_BEFORE_BUILD=0 \ + WIPE_RH_CUDA_AFTER_BUILD=0 \ OVERRIDE_PACKAGE_VERSION=$OVERRIDE_PACKAGE_VERSION \ bash ${PYTORCH_ROOT}/.ci/manywheel/build.sh " # directories generated by the docker container are owned by root, so transfer ownership to user -docker exec $TORCH_BUILD_CONTAINER chown -R "$(id -u)":"$(id -g)" "${PYTORCH_ROOT}" /artifacts +docker exec "$TORCH_BUILD_CONTAINER" chown -R "$(id -u)":"$(id -g)" \ + "${PYTORCH_ROOT}" "${PYTORCH_FINAL_PACKAGE_DIR}" "${LOCAL_CCACHE_DIR}" diff --git a/ML-Frameworks/pytorch-aarch64/build.sh b/ML-Frameworks/pytorch-aarch64/build.sh index 6f538c93..4785167e 100755 --- a/ML-Frameworks/pytorch-aarch64/build.sh +++ b/ML-Frameworks/pytorch-aarch64/build.sh @@ -83,7 +83,12 @@ if ! [[ $* == *--use-existing-sources* ]]; then ./get-source.sh fi -./build-wheel.sh +# We build the wheel with ccache by default; allow disabling it via the --disable-ccache flag +build_wheel_args=() +if [[ "$*" == *--disable-ccache* ]]; then + build_wheel_args+=(--disable-ccache) +fi +./build-wheel.sh "${build_wheel_args[@]}" [[ $* == *--wheel-only* ]] && exit 0 diff --git a/ML-Frameworks/pytorch-aarch64/get-source.sh b/ML-Frameworks/pytorch-aarch64/get-source.sh index b2172f55..524c3da2 100755 --- a/ML-Frameworks/pytorch-aarch64/get-source.sh +++ b/ML-Frameworks/pytorch-aarch64/get-source.sh @@ -33,10 +33,14 @@ git-shallow-clone https://github.com/pytorch/pytorch.git $PYTORCH_HASH cd pytorch # https://github.com/pytorch/pytorch/pull/167829 - Refactor ACL and OpenBLAS install scripts on AArch64 - # Note: as part of this patch, setuptools is pinned to ~= 78.1.1 which is not affected by - # CVE-2025-47273 and CVE-2024-6345 - apply-github-patch pytorch/pytorch 69db12b465887df96d27fe2bb93746ac334577f1 - apply-github-patch pytorch/pytorch 5184c373a8bc77809b6e59361e191d4e78d6a824 + apply-github-patch pytorch/pytorch f5e7b3ab44b14902f1e44ac138006b04bd9b7728 + + # https://github.com/pytorch/pytorch/pull/170062 - Add ccache support to ACL/OpenBLAS and manywheel + # build script. + apply-github-patch pytorch/pytorch 327b118078869b85d979d9f7eb1038b8a53c8a49 + + # https://github.com/pytorch/pytorch/pull/170600 - Gate deletion of clean-up steps in build_common.sh + apply-github-patch pytorch/pytorch e368ec2693b8b2b8ba35d0913f1d663ba2fdc804 # FIXME: Temporarily disabled; to be updated in a later PR # # https://github.com/pytorch/pytorch/pull/160184 - Draft: separate reqs for manywheel build and pin