Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion .github/workflows/pytorch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ jobs:
{ name: c8g, label: ah-ubuntu_22_04-c8g_8x }
]
runs-on: ${{ matrix.config.label }}
env:
CCACHE_HOST_DIR: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64/.ccache
TORCH_BUILD_CONTAINER_ID_FILE: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64/.torch_build_container_id
steps:
- name: Checkout Tool-Solutions
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -76,13 +79,38 @@ jobs:
- name: Set up Docker
uses: docker/setup-docker-action@v4

- name: Create unique cache key from the year and week (YYYY-WW)
id: cache_suffix
run: echo "week=$(date -u +%G-%V)" >> "$GITHUB_OUTPUT"

# Restore cache if available. GitHub automatically evicts cache entries that have not been
# accessed for over 7 days. We rotate the cache key weekly; if no cache exists for the
# current week, a cache from a previous week (via the prefix restore key) will be restored
# and then saved under the current week's key at the end of the job. This effectively limits
# the cache to at most two weeks of cache data.
- name: Restore ccache cache
uses: actions/cache@v5
with:
path: ${{ env.CCACHE_HOST_DIR }}
key: ccache-${{ matrix.config.name }}-${{ steps.cache_suffix.outputs.week }}
restore-keys: |
ccache-${{ matrix.config.name }}-

- name: Build Tool-Solutions PyTorch
working-directory: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64
run: ${{ github.workspace }}/Tool-Solutions/ML-Frameworks/pytorch-aarch64/build.sh
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CCACHE_HOST_DIR: ${{ env.CCACHE_HOST_DIR }}
CCACHE_MAXSIZE: 2G

- name: Print ccache disk usage
run: du -sh "${{ env.CCACHE_HOST_DIR }}" || true

- name: Report final ccache build stats
run: docker exec "$(cat ${{ env.TORCH_BUILD_CONTAINER_ID_FILE }})" ccache -s || true

- name: Save image as a artifact
- name: Save image as an artifact
run: docker save toolsolutions-pytorch:latest -o toolsolutions-pytorch-image-${{ matrix.config.name }}.tar

- name: Upload build artifact
Expand Down
2 changes: 2 additions & 0 deletions ML-Frameworks/pytorch-aarch64/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ where `YY` is the year, and `MM` the month of the increment.
## [unreleased]

### Added
- Adds PyTorch [PR #170600](https://github.com/pytorch/pytorch/pull/170600), to patch incremental build support.
- Adds PyTorch [PR #170062](https://github.com/pytorch/pytorch/pull/170062), to add ccache support to ACL/OpenBLAS and manywheel build script.

### Changed

Expand Down
41 changes: 40 additions & 1 deletion ML-Frameworks/pytorch-aarch64/build-wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,29 @@ UTILS=/utils
COMMON_UTILS=/common_utils
PYTORCH_FINAL_PACKAGE_DIR=/artifacts

# Enable ccache support by default.
# NOTE: The default behaviour is to have a project-specific cache directory that we cache
# build artefacts inside and can be easily wiped. These build artefacts are specific to the
# manylinux builder container (and thus compilers) that we use to build the torch wheel. As
# such, you may not want to populate the global ccache cache with them. However, if you wish
# to do so, simply set CCACHE_HOST_DIR to that directory.
CCACHE_HOST_DIR=${CCACHE_HOST_DIR:-"${PWD}/.ccache"}
LOCAL_CCACHE_DIR=/.ccache
CCACHE_MAXSIZE=${CCACHE_MAXSIZE:-}

# If the user wants to use ccache for build caching
ccache_args=()
if [[ "$*" == *--disable-ccache* ]]; then
ccache_args+=(-e USE_CCACHE=0)
else
ccache_args+=(-e USE_CCACHE=1)
mkdir -p "${CCACHE_HOST_DIR}"
ccache_args+=(
-e CCACHE_DIR="${LOCAL_CCACHE_DIR}"
-v "${CCACHE_HOST_DIR}:${LOCAL_CCACHE_DIR}"
)
fi

# Want a CPU build
DESIRED_CUDA=cpu
GPU_ARCH_TYPE=cpu-aarch64
Expand Down Expand Up @@ -89,13 +112,26 @@ if ! docker container inspect $TORCH_BUILD_CONTAINER >/dev/null 2>&1 ; then
-e SKIP_ALL_TESTS=1 \
-e OPENSSL_ROOT_DIR="${OPENSSL_HOST_DIR}" \
-e CMAKE_INCLUDE_PATH="${OPENSSL_HOST_DIR}/include" \
"${ccache_args[@]}" \
-v "${PYTORCH_HOST_DIR}:${PYTORCH_ROOT}" \
-v "${PYTORCH_FINAL_PACKAGE_HOST_DIR}:${PYTORCH_FINAL_PACKAGE_DIR}" \
-v "${PWD}/utils:${UTILS}" \
-v "${PWD}/../utils:${COMMON_UTILS}" \
-w / \
"${IMAGE_NAME}")

# Provide ccache support
if [[ "$*" != *--disable-ccache* ]]; then
docker exec "$TORCH_BUILD_CONTAINER" yum install -y ccache || true
if [ -n "${CCACHE_MAXSIZE}" ]; then
docker exec "$TORCH_BUILD_CONTAINER" ccache --max-size="$CCACHE_MAXSIZE" || true
fi
docker exec "$TORCH_BUILD_CONTAINER" ccache -z || true
docker exec "$TORCH_BUILD_CONTAINER" ccache -o compression=true || true
docker exec "$TORCH_BUILD_CONTAINER" ccache -o compression_level=6 || true
docker exec "$TORCH_BUILD_CONTAINER" ccache -s || true
fi

# Currently changes in these scripts will not be applied without a clean
# build, which is not ideal for dev work. But we have to balance this with
# extra time/network traffic when rebuilding many times.
Expand Down Expand Up @@ -134,9 +170,12 @@ OVERRIDE_PACKAGE_VERSION="${version%??}.dev${build_date}${TORCH_RELEASE_ID:+"+$T
docker exec $TORCH_BUILD_CONTAINER bash -lc "
source /tmp/env &&
BUILD_TEST=0 \
DO_SETUP_PY_CLEAN_BEFORE_BUILD=0 \
WIPE_RH_CUDA_AFTER_BUILD=0 \
OVERRIDE_PACKAGE_VERSION=$OVERRIDE_PACKAGE_VERSION \
bash ${PYTORCH_ROOT}/.ci/manywheel/build.sh
"

# directories generated by the docker container are owned by root, so transfer ownership to user
docker exec $TORCH_BUILD_CONTAINER chown -R "$(id -u)":"$(id -g)" "${PYTORCH_ROOT}" /artifacts
docker exec "$TORCH_BUILD_CONTAINER" chown -R "$(id -u)":"$(id -g)" \
"${PYTORCH_ROOT}" "${PYTORCH_FINAL_PACKAGE_DIR}" "${LOCAL_CCACHE_DIR}"
7 changes: 6 additions & 1 deletion ML-Frameworks/pytorch-aarch64/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ if ! [[ $* == *--use-existing-sources* ]]; then
./get-source.sh
fi

./build-wheel.sh
# We build the wheel with ccache by default; allow disabling it via the --disable-ccache flag
build_wheel_args=()
if [[ "$*" == *--disable-ccache* ]]; then
build_wheel_args+=(--disable-ccache)
fi
./build-wheel.sh "${build_wheel_args[@]}"

[[ $* == *--wheel-only* ]] && exit 0

Expand Down
12 changes: 8 additions & 4 deletions ML-Frameworks/pytorch-aarch64/get-source.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,14 @@ git-shallow-clone https://github.com/pytorch/pytorch.git $PYTORCH_HASH
cd pytorch

# https://github.com/pytorch/pytorch/pull/167829 - Refactor ACL and OpenBLAS install scripts on AArch64
# Note: as part of this patch, setuptools is pinned to ~= 78.1.1 which is not affected by
# CVE-2025-47273 and CVE-2024-6345
apply-github-patch pytorch/pytorch 69db12b465887df96d27fe2bb93746ac334577f1
apply-github-patch pytorch/pytorch 5184c373a8bc77809b6e59361e191d4e78d6a824
apply-github-patch pytorch/pytorch f5e7b3ab44b14902f1e44ac138006b04bd9b7728

# https://github.com/pytorch/pytorch/pull/170062 - Add ccache support to ACL/OpenBLAS and manywheel
# build script.
apply-github-patch pytorch/pytorch 327b118078869b85d979d9f7eb1038b8a53c8a49

# https://github.com/pytorch/pytorch/pull/170600 - Gate deletion of clean-up steps in build_common.sh
apply-github-patch pytorch/pytorch e368ec2693b8b2b8ba35d0913f1d663ba2fdc804

# FIXME: Temporarily disabled; to be updated in a later PR
# # https://github.com/pytorch/pytorch/pull/160184 - Draft: separate reqs for manywheel build and pin
Expand Down