From 764fffdea539e7d0541f55ef8e8149c9a454025e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Paw=C5=82owski?= Date: Tue, 17 Dec 2024 14:05:04 +0000 Subject: [PATCH 1/7] add tests for gpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Piotr Pawłowski --- .github/workflows/nightly_tests.yaml | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/.github/workflows/nightly_tests.yaml b/.github/workflows/nightly_tests.yaml index 2dff1261c..e89e868e1 100644 --- a/.github/workflows/nightly_tests.yaml +++ b/.github/workflows/nightly_tests.yaml @@ -22,6 +22,7 @@ on: env: # Names must be unique in parallel running tests. EMPTY_CLUSTER_NAME: nightly-xpk-zero-nodepools + GPU_CLUSTER_NAME: nightly-gpu PRIVATE_CLUSTER_NAME: nightly-xpk-private-2-v4-8-nodepools TPU_CLUSTER_NAME: nightly-xpk-2-v4-8-nodepools WORKLOAD_NAME: xpktest-nightly-${{ github.run_attempt }} @@ -31,6 +32,62 @@ env: RAYCLUSTER_TPU_CLUSTER_NAME: rc-nightly-test-2-v4-8-nodepools jobs: + gpu-workloads: + runs-on: [ubuntu-22.04] + concurrency: # We support one build test to run at a time currently. + group: nightly-test-cluster-group + cancel-in-progress: false + strategy: + matrix: + gpu-type: ["a100-40gb-1", "h100-80gb-8"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install expect package + run: sudo apt-get install expect + - uses: 'google-github-actions/auth@v2' + with: + credentials_json: '${{ secrets.GCP_SA_KEY }}' + - uses: google-github-actions/setup-gcloud@v2 + with: + version: '>= 363.0.0' + install_components: 'beta,gke-gcloud-auth-plugin' + - name: Verify gcp setup + run: gcloud info + - name: Set Google Cloud CLI properties to a unused zone to verify --zone arg is passed properly in commands. + run: | + gcloud config set compute/zone us-east4-a + gcloud config get compute/zone + - name: Install xpk dependencies + run: | + make install + echo $PWD/bin >> "$GITHUB_PATH" + - name: Check xpk installation + run: xpk --help + - name: Create an XPK Cluster with one gpu nodepool + run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=${{ matrix.gpu-type }} --zone=us-central2-b --default-pool-cpu-machine-type=n1-standard-16 --spot --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS }}' + - name: Authenticate Docker + run: gcloud auth configure-docker --quiet + - name: Create test script to execute in workloads + run: echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > workload.sh + - name: Run a base-docker-image workload + run: python xpk.py workload create --cluster $GPU_CLUSTER_NAME --workload $WORKLOAD_NAME --command "bash workload.sh" --device-type=${{ matrix.gpu-type }} --zone=us-central2-b + - name: List out the workloads on the cluster + run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=us-central2-b + - name: Run xpk inspector with the workload created above + run: python3 xpk.py inspector --cluster $GPU_CLUSTER_NAME --zone=us-central2-b --workload $WORKLOAD_NAME + - name: Wait for workload completion and confirm it succeeded + run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=us-central2-b --wait-for-job-completion $WORKLOAD_NAME --timeout 300 + - name: Run xpk info command + run : python3 xpk.py info --cluster $GPU_CLUSTER_NAME --zone=us-central2-b + - name: Delete the workload on the cluster + run: python3 xpk.py workload delete --workload $WORKLOAD_NAME --cluster $GPU_CLUSTER_NAME --zone=us-central2-b + - name: Delete the cluster created + if: always() + run: python xpk.py cluster delete --cluster $GPU_CLUSTER_NAME --zone=us-central2-b --force + cluster-create-and-delete: runs-on: [ubuntu-22.04] concurrency: # We support one build test to run at a time currently. From 7a12505daffeaf3ae837d0a4b086daeb43ee670c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Paw=C5=82owski?= Date: Thu, 23 Jan 2025 12:06:31 +0000 Subject: [PATCH 2/7] add tests for a3u and a3mega in nightly tests and on dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Piotr Pawłowski --- .github/workflows/a3mega-workload.yaml | 77 ++++++++++++++++++++++++++ .github/workflows/a3u-workload.yaml | 77 ++++++++++++++++++++++++++ .github/workflows/nightly_tests.yaml | 73 ++++++------------------ 3 files changed, 170 insertions(+), 57 deletions(-) create mode 100644 .github/workflows/a3mega-workload.yaml create mode 100644 .github/workflows/a3u-workload.yaml diff --git a/.github/workflows/a3mega-workload.yaml b/.github/workflows/a3mega-workload.yaml new file mode 100644 index 000000000..e38d002ad --- /dev/null +++ b/.github/workflows/a3mega-workload.yaml @@ -0,0 +1,77 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +name: Nightly Tests + +on: + workflow_call: + +env: + # Names must be unique in parallel running tests. + GPU_CLUSTER_NAME: nightly-gpu-a3mega + WORKLOAD_NAME: xpktest-nightly-${{ github.run_attempt }} + +jobs: + gpu-a3u-workload: + runs-on: [ubuntu-22.04] + concurrency: # We support one build test to run at a time currently. + group: nightly-test-cluster-group + cancel-in-progress: false + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install expect package + run: sudo apt-get install expect + - uses: 'google-github-actions/auth@v2' + with: + credentials_json: '${{ secrets.GCP_SA_KEY }}' + - uses: google-github-actions/setup-gcloud@v2 + with: + version: '>= 363.0.0' + install_components: 'beta,gke-gcloud-auth-plugin' + - name: Verify gcp setup + run: gcloud info + - name: Set Google Cloud CLI properties to a unused zone to verify --zone arg is passed properly in commands. + run: | + gcloud config set compute/zone us-east4-a + gcloud config get compute/zone + - name: Install xpk dependencies + run: | + make install + echo $PWD/bin >> "$GITHUB_PATH" + - name: Check xpk installation + run: xpk --help + - name: Create an XPK Cluster with one gpu nodepool + run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=h200-141gb-8 --num-nodes=1 --project=${{secrets.A3M_GPU_PROJECT}} --zone=${{secrets.A3M_GPU_ZONE}} --reservation=${{secrets.A3M_RESERVATION}}' + - name: Authenticate Docker + run: gcloud auth configure-docker --quiet + - name: Create test script to execute in workloads + run: echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > workload.sh + - name: Run a base-docker-image workload + run: python xpk.py workload create --cluster $GPU_CLUSTER_NAME --workload $WORKLOAD_NAME --command "bash workload.sh" --device-type=h200-141gb-8 --zone=${{secrets.A3M_GPU_ZONE}} + - name: List out the workloads on the cluster + run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3M_GPU_ZONE}} + - name: Run xpk inspector with the workload created above + run: python3 xpk.py inspector --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3M_GPU_ZONE}} --workload $WORKLOAD_NAME + - name: Wait for workload completion and confirm it succeeded + run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3M_GPU_ZONE}} --wait-for-job-completion $WORKLOAD_NAME --timeout 300 + - name: Run xpk info command + run : python3 xpk.py info --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3M_GPU_ZONE}} + - name: Delete the workload on the cluster + run: python3 xpk.py workload delete --workload $WORKLOAD_NAME --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3M_GPU_ZONE}} + - name: Delete the cluster created + if: always() + run: python xpk.py cluster delete --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3M_GPU_ZONE}} --force diff --git a/.github/workflows/a3u-workload.yaml b/.github/workflows/a3u-workload.yaml new file mode 100644 index 000000000..78a404a0b --- /dev/null +++ b/.github/workflows/a3u-workload.yaml @@ -0,0 +1,77 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +name: Nightly Tests + +on: + workflow_call: + +env: + # Names must be unique in parallel running tests. + GPU_CLUSTER_NAME: nightly-gpu-a3ultra + WORKLOAD_NAME: xpktest-nightly-${{ github.run_attempt }} + +jobs: + gpu-a3u-workload: + runs-on: [ubuntu-22.04] + concurrency: # We support one build test to run at a time currently. + group: nightly-test-cluster-group + cancel-in-progress: false + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install expect package + run: sudo apt-get install expect + - uses: 'google-github-actions/auth@v2' + with: + credentials_json: '${{ secrets.GCP_SA_KEY }}' + - uses: google-github-actions/setup-gcloud@v2 + with: + version: '>= 363.0.0' + install_components: 'beta,gke-gcloud-auth-plugin' + - name: Verify gcp setup + run: gcloud info + - name: Set Google Cloud CLI properties to a unused zone to verify --zone arg is passed properly in commands. + run: | + gcloud config set compute/zone us-east4-a + gcloud config get compute/zone + - name: Install xpk dependencies + run: | + make install + echo $PWD/bin >> "$GITHUB_PATH" + - name: Check xpk installation + run: xpk --help + - name: Create an XPK Cluster with one gpu nodepool + run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=h200-141gb-8 --num-nodes=1 --project=${{secrets.A3U_GPU_PROJECT}} --zone=${{secrets.A3U_GPU_ZONE}} --reservation=${{secrets.A3U_RESERVATION}}' + - name: Authenticate Docker + run: gcloud auth configure-docker --quiet + - name: Create test script to execute in workloads + run: echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > workload.sh + - name: Run a base-docker-image workload + run: python xpk.py workload create --cluster $GPU_CLUSTER_NAME --workload $WORKLOAD_NAME --command "bash workload.sh" --device-type=h200-141gb-8 --zone=${{secrets.A3U_GPU_ZONE}} + - name: List out the workloads on the cluster + run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3U_GPU_ZONE}} + - name: Run xpk inspector with the workload created above + run: python3 xpk.py inspector --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3U_GPU_ZONE}} --workload $WORKLOAD_NAME + - name: Wait for workload completion and confirm it succeeded + run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3U_GPU_ZONE}} --wait-for-job-completion $WORKLOAD_NAME --timeout 300 + - name: Run xpk info command + run : python3 xpk.py info --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3U_GPU_ZONE}} + - name: Delete the workload on the cluster + run: python3 xpk.py workload delete --workload $WORKLOAD_NAME --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3U_GPU_ZONE}} + - name: Delete the cluster created + if: always() + run: python xpk.py cluster delete --cluster $GPU_CLUSTER_NAME --zone=${{secrets.A3U_GPU_ZONE}} --force diff --git a/.github/workflows/nightly_tests.yaml b/.github/workflows/nightly_tests.yaml index e89e868e1..7be7f73bf 100644 --- a/.github/workflows/nightly_tests.yaml +++ b/.github/workflows/nightly_tests.yaml @@ -16,13 +16,22 @@ name: Nightly Tests on: workflow_dispatch: + inputs: + gpu-type: + description: 'GPU Type' + required: false + default: '' + type: choice + options: + - 'h200-141gb-8' + - 'h100-mega-80gb-8' + - 'h100-80gb-8' schedule: # Schedule the job run at 12AM PST daily. - cron: '0 8 * * *' env: # Names must be unique in parallel running tests. EMPTY_CLUSTER_NAME: nightly-xpk-zero-nodepools - GPU_CLUSTER_NAME: nightly-gpu PRIVATE_CLUSTER_NAME: nightly-xpk-private-2-v4-8-nodepools TPU_CLUSTER_NAME: nightly-xpk-2-v4-8-nodepools WORKLOAD_NAME: xpktest-nightly-${{ github.run_attempt }} @@ -32,62 +41,12 @@ env: RAYCLUSTER_TPU_CLUSTER_NAME: rc-nightly-test-2-v4-8-nodepools jobs: - gpu-workloads: - runs-on: [ubuntu-22.04] - concurrency: # We support one build test to run at a time currently. - group: nightly-test-cluster-group - cancel-in-progress: false - strategy: - matrix: - gpu-type: ["a100-40gb-1", "h100-80gb-8"] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install expect package - run: sudo apt-get install expect - - uses: 'google-github-actions/auth@v2' - with: - credentials_json: '${{ secrets.GCP_SA_KEY }}' - - uses: google-github-actions/setup-gcloud@v2 - with: - version: '>= 363.0.0' - install_components: 'beta,gke-gcloud-auth-plugin' - - name: Verify gcp setup - run: gcloud info - - name: Set Google Cloud CLI properties to a unused zone to verify --zone arg is passed properly in commands. - run: | - gcloud config set compute/zone us-east4-a - gcloud config get compute/zone - - name: Install xpk dependencies - run: | - make install - echo $PWD/bin >> "$GITHUB_PATH" - - name: Check xpk installation - run: xpk --help - - name: Create an XPK Cluster with one gpu nodepool - run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=${{ matrix.gpu-type }} --zone=us-central2-b --default-pool-cpu-machine-type=n1-standard-16 --spot --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS }}' - - name: Authenticate Docker - run: gcloud auth configure-docker --quiet - - name: Create test script to execute in workloads - run: echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > workload.sh - - name: Run a base-docker-image workload - run: python xpk.py workload create --cluster $GPU_CLUSTER_NAME --workload $WORKLOAD_NAME --command "bash workload.sh" --device-type=${{ matrix.gpu-type }} --zone=us-central2-b - - name: List out the workloads on the cluster - run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=us-central2-b - - name: Run xpk inspector with the workload created above - run: python3 xpk.py inspector --cluster $GPU_CLUSTER_NAME --zone=us-central2-b --workload $WORKLOAD_NAME - - name: Wait for workload completion and confirm it succeeded - run: python3 xpk.py workload list --cluster $GPU_CLUSTER_NAME --zone=us-central2-b --wait-for-job-completion $WORKLOAD_NAME --timeout 300 - - name: Run xpk info command - run : python3 xpk.py info --cluster $GPU_CLUSTER_NAME --zone=us-central2-b - - name: Delete the workload on the cluster - run: python3 xpk.py workload delete --workload $WORKLOAD_NAME --cluster $GPU_CLUSTER_NAME --zone=us-central2-b - - name: Delete the cluster created - if: always() - run: python xpk.py cluster delete --cluster $GPU_CLUSTER_NAME --zone=us-central2-b --force - + a3u-test: + if: inputs.gpu-type == 'h200-141gb-8' + uses: ./.github/workflows/a3u-workload.yaml + a3mega-test: + if: inputs.gpu-type == 'h100-mega-80gb-8' + uses: ./.github/workflows/a3mega-workload.yaml cluster-create-and-delete: runs-on: [ubuntu-22.04] concurrency: # We support one build test to run at a time currently. From ea077aa71e83bccf7c5908fbf935796fab0a6bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Paw=C5=82owski?= Date: Thu, 23 Jan 2025 12:53:06 +0000 Subject: [PATCH 3/7] fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Piotr Pawłowski --- .github/workflows/a3mega-workload.yaml | 2 +- .github/workflows/a3u-workload.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/a3mega-workload.yaml b/.github/workflows/a3mega-workload.yaml index e38d002ad..a8e26b268 100644 --- a/.github/workflows/a3mega-workload.yaml +++ b/.github/workflows/a3mega-workload.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License -name: Nightly Tests +name: a3mega-nightly on: workflow_call: diff --git a/.github/workflows/a3u-workload.yaml b/.github/workflows/a3u-workload.yaml index 78a404a0b..325ad0ab1 100644 --- a/.github/workflows/a3u-workload.yaml +++ b/.github/workflows/a3u-workload.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License -name: Nightly Tests +name: a3u-nightly on: workflow_call: From c4175dcb7e504ab8b288226b3651591bedb108fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Paw=C5=82owski?= Date: Thu, 23 Jan 2025 12:56:02 +0000 Subject: [PATCH 4/7] change concurency groups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Piotr Pawłowski --- .github/workflows/a3mega-workload.yaml | 2 +- .github/workflows/a3u-workload.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/a3mega-workload.yaml b/.github/workflows/a3mega-workload.yaml index a8e26b268..28fa4e76c 100644 --- a/.github/workflows/a3mega-workload.yaml +++ b/.github/workflows/a3mega-workload.yaml @@ -26,7 +26,7 @@ jobs: gpu-a3u-workload: runs-on: [ubuntu-22.04] concurrency: # We support one build test to run at a time currently. - group: nightly-test-cluster-group + group: nightly-test-cluster-group-gpu cancel-in-progress: false steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/a3u-workload.yaml b/.github/workflows/a3u-workload.yaml index 325ad0ab1..433f3f270 100644 --- a/.github/workflows/a3u-workload.yaml +++ b/.github/workflows/a3u-workload.yaml @@ -26,7 +26,7 @@ jobs: gpu-a3u-workload: runs-on: [ubuntu-22.04] concurrency: # We support one build test to run at a time currently. - group: nightly-test-cluster-group + group: nightly-test-cluster-group-gpu cancel-in-progress: false steps: - uses: actions/checkout@v4 From 13d917364e5557a57946df40e126cefeeab9e0ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Paw=C5=82owski?= Date: Thu, 23 Jan 2025 13:01:24 +0000 Subject: [PATCH 5/7] fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Piotr Pawłowski --- .github/workflows/a3mega-workload.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/a3mega-workload.yaml b/.github/workflows/a3mega-workload.yaml index 28fa4e76c..e0601ba4c 100644 --- a/.github/workflows/a3mega-workload.yaml +++ b/.github/workflows/a3mega-workload.yaml @@ -23,7 +23,7 @@ env: WORKLOAD_NAME: xpktest-nightly-${{ github.run_attempt }} jobs: - gpu-a3u-workload: + gpu-a3mega-workload: runs-on: [ubuntu-22.04] concurrency: # We support one build test to run at a time currently. group: nightly-test-cluster-group-gpu From 762a4e0e83119f57033354a07e58451969a5d6c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Paw=C5=82owski?= Date: Thu, 23 Jan 2025 13:02:59 +0000 Subject: [PATCH 6/7] fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Piotr Pawłowski --- .github/workflows/a3mega-workload.yaml | 2 -- .github/workflows/nightly_tests.yaml | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/a3mega-workload.yaml b/.github/workflows/a3mega-workload.yaml index e0601ba4c..244c1e664 100644 --- a/.github/workflows/a3mega-workload.yaml +++ b/.github/workflows/a3mega-workload.yaml @@ -33,8 +33,6 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.10' - - name: Install expect package - run: sudo apt-get install expect - uses: 'google-github-actions/auth@v2' with: credentials_json: '${{ secrets.GCP_SA_KEY }}' diff --git a/.github/workflows/nightly_tests.yaml b/.github/workflows/nightly_tests.yaml index 7be7f73bf..682c771bb 100644 --- a/.github/workflows/nightly_tests.yaml +++ b/.github/workflows/nightly_tests.yaml @@ -44,9 +44,11 @@ jobs: a3u-test: if: inputs.gpu-type == 'h200-141gb-8' uses: ./.github/workflows/a3u-workload.yaml + secrets: inherit a3mega-test: if: inputs.gpu-type == 'h100-mega-80gb-8' uses: ./.github/workflows/a3mega-workload.yaml + secrets: inherit cluster-create-and-delete: runs-on: [ubuntu-22.04] concurrency: # We support one build test to run at a time currently. From 48c66d0e04bbb51a7e3cd049ba08eee3549e3c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Paw=C5=82owski?= Date: Thu, 23 Jan 2025 13:06:32 +0000 Subject: [PATCH 7/7] fix typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Piotr Pawłowski --- .github/workflows/a3mega-workload.yaml | 2 +- .github/workflows/a3u-workload.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/a3mega-workload.yaml b/.github/workflows/a3mega-workload.yaml index 244c1e664..08fd1b449 100644 --- a/.github/workflows/a3mega-workload.yaml +++ b/.github/workflows/a3mega-workload.yaml @@ -53,7 +53,7 @@ jobs: - name: Check xpk installation run: xpk --help - name: Create an XPK Cluster with one gpu nodepool - run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=h200-141gb-8 --num-nodes=1 --project=${{secrets.A3M_GPU_PROJECT}} --zone=${{secrets.A3M_GPU_ZONE}} --reservation=${{secrets.A3M_RESERVATION}}' + run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=h200-141gb-8 --num-nodes=1 --project=${{secrets.A3M_GPU_PROJECT}} --zone=${{secrets.A3M_GPU_ZONE}} --reservation=${{secrets.A3M_RESERVATION}} - name: Authenticate Docker run: gcloud auth configure-docker --quiet - name: Create test script to execute in workloads diff --git a/.github/workflows/a3u-workload.yaml b/.github/workflows/a3u-workload.yaml index 433f3f270..eda178cce 100644 --- a/.github/workflows/a3u-workload.yaml +++ b/.github/workflows/a3u-workload.yaml @@ -55,7 +55,7 @@ jobs: - name: Check xpk installation run: xpk --help - name: Create an XPK Cluster with one gpu nodepool - run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=h200-141gb-8 --num-nodes=1 --project=${{secrets.A3U_GPU_PROJECT}} --zone=${{secrets.A3U_GPU_ZONE}} --reservation=${{secrets.A3U_RESERVATION}}' + run: python xpk.py cluster create --cluster $GPU_CLUSTER_NAME --device-type=h200-141gb-8 --num-nodes=1 --project=${{secrets.A3U_GPU_PROJECT}} --zone=${{secrets.A3U_GPU_ZONE}} --reservation=${{secrets.A3U_RESERVATION}} - name: Authenticate Docker run: gcloud auth configure-docker --quiet - name: Create test script to execute in workloads