Skip to content

Commit 40ba831

Browse files
committed
update worklflow
1 parent 69c5bff commit 40ba831

File tree

4 files changed

+153
-159
lines changed

4 files changed

+153
-159
lines changed

.github/workflows/01C_Deployment_PR.yaml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,20 @@ on:
88
workflow_dispatch:
99

1010
jobs:
11-
trigger-build-image:
12-
uses: ./.github/workflows/01D_Deployment_PR_Push_Image.yaml
13-
secrets:
14-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
15-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
16-
AWS_REGION: ${{ secrets.AWS_REGION }}
17-
AWS_ECR_LOGIN_URI: ${{ secrets.AWS_ECR_LOGIN_URI }}
18-
ECR_REPOSITORY_NAME: ${{ secrets.ECR_REPOSITORY_NAME }}
19-
AWS_SECURITY_GROUP_ID: ${{ secrets.AWS_SECURITY_GROUP_ID }}
20-
AWS_SUBNET_ID_1: ${{ secrets.AWS_SUBNET_ID_1 }}
21-
PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
11+
# trigger-build-image:
12+
# uses: ./.github/workflows/01D_Deployment_PR_Push_Image.yaml
13+
# secrets:
14+
# AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
15+
# AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
16+
# AWS_REGION: ${{ secrets.AWS_REGION }}
17+
# AWS_ECR_LOGIN_URI: ${{ secrets.AWS_ECR_LOGIN_URI }}
18+
# ECR_REPOSITORY_NAME: ${{ secrets.ECR_REPOSITORY_NAME }}
19+
# AWS_SECURITY_GROUP_ID: ${{ secrets.AWS_SECURITY_GROUP_ID }}
20+
# AWS_SUBNET_ID_1: ${{ secrets.AWS_SUBNET_ID_1 }}
21+
# PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
2222

2323
trigger-train:
24-
needs: trigger-build-image
24+
# needs: trigger-build-image
2525
uses: ./.github/workflows/01D_Deployment_PR_Train_And_Store.yaml
2626
secrets:
2727
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}

.github/workflows/01D_Deployment_PR_Train_And_Store.yaml

Lines changed: 66 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Deployment 01D - PR - Train and Store
1+
name: Deployment 01E - PR - Train and Store
22

33
on:
44
workflow_call:
@@ -41,66 +41,66 @@ jobs:
4141
#----------------------------------------
4242
# JOB 1: Build and push Docker image to ECR
4343
#----------------------------------------
44-
# build-and-push-ecr-image:
45-
# name: 📦 Build and Push Docker Image
46-
# runs-on: ubuntu-latest
47-
# outputs:
48-
# commit_id: ${{ steps.get_commit_id.outputs.commit_id }}
49-
# registry: ${{ steps.login-ecr.outputs.registry }}
50-
# docker_username: ${{ steps.login-ecr.outputs.docker_username_306093656765_dkr_ecr_ap_south_1_amazonaws_com }}
51-
# docker_password: ${{ steps.login-ecr.outputs.docker_password_306093656765_dkr_ecr_ap_south_1_amazonaws_com }}
52-
# steps:
53-
# - name: Checkout Code
54-
# uses: actions/checkout@v3
55-
56-
# - name: Install Utilities
57-
# run: |
58-
# sudo apt-get update
59-
# sudo apt-get install -y jq unzip
60-
61-
# - name: Configure AWS Credentials
62-
# uses: aws-actions/configure-aws-credentials@v4
63-
# with:
64-
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
65-
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
66-
# aws-region: ${{ secrets.AWS_REGION }}
67-
68-
# - name: Login to Amazon ECR
69-
# id: login-ecr
70-
# uses: aws-actions/amazon-ecr-login@v2
71-
# with:
72-
# mask-password: 'false'
73-
74-
# - name: Get Latest Commit ID
75-
# id: get_commit_id
76-
# run: |
77-
# latest_commit=$(git rev-parse HEAD)
78-
# echo "commit_id=$latest_commit" >> $GITHUB_OUTPUT
44+
build-and-push-ecr-image:
45+
name: 📦 Build and Push Docker Image
46+
runs-on: ubuntu-22.04
47+
outputs:
48+
# commit_id: ${{ steps.get_commit_id.outputs.commit_id }}
49+
registry: ${{ steps.login-ecr.outputs.registry }}
50+
docker_username: ${{ steps.login-ecr.outputs.docker_username_306093656765_dkr_ecr_ap_south_1_amazonaws_com }}
51+
docker_password: ${{ steps.login-ecr.outputs.docker_password_306093656765_dkr_ecr_ap_south_1_amazonaws_com }}
52+
steps:
53+
- name: Checkout Code
54+
uses: actions/checkout@v3
55+
56+
- name: Install Utilities
57+
run: |
58+
sudo apt-get update
59+
sudo apt-get install -y jq unzip
60+
61+
- name: Configure AWS Credentials
62+
uses: aws-actions/configure-aws-credentials@v4
63+
with:
64+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
65+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
66+
aws-region: ${{ secrets.AWS_REGION }}
67+
68+
- name: Login to Amazon ECR
69+
id: login-ecr
70+
uses: aws-actions/amazon-ecr-login@v2
71+
with:
72+
mask-password: 'false'
73+
74+
# - name: Get Latest Commit ID
75+
# id: get_commit_id
76+
# run: |
77+
# latest_commit=$(git rev-parse HEAD)
78+
# echo "commit_id=$latest_commit" >> $GITHUB_OUTPUT
7979

80-
# - name: Display the commit ID
81-
# run: |
82-
# echo "Latest commit ID is: ${{ steps.get_commit_id.outputs.commit_id }}"
83-
84-
# # - name: Build and Push Docker Image
85-
# # id: build-image
86-
# # env:
87-
# # ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
88-
# # IMAGE_TAG: latest
89-
# # run: |
90-
# # # Build development container image and push to ECR
91-
# # echo "Building and pushing image to $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG"
92-
# # docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
93-
# # docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
94-
# # echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
80+
# - name: Display the commit ID
81+
# run: |
82+
# echo "Latest commit ID is: ${{ steps.get_commit_id.outputs.commit_id }}"
83+
84+
- name: Build and Push Docker Image
85+
id: build-image
86+
env:
87+
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
88+
IMAGE_TAG: latest
89+
run: |
90+
# Build development container image and push to ECR
91+
echo "Building and pushing image to $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG"
92+
docker build --platform linux/amd64 -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
93+
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
94+
echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
9595
9696
# #----------------------------------------
9797
# # JOB 2: Launch EC2 instance with GPU for training
9898
# #----------------------------------------
9999

100100
launch-runner:
101101
name: 🚀 Launch EC2 GPU Runner
102-
runs-on: ubuntu-latest
103-
# needs: build-and-push-ecr-image
102+
runs-on: ubuntu-22.04
103+
needs: build-and-push-ecr-image
104104
outputs:
105105
label: ${{ steps.start-ec2-runner.outputs.label }}
106106
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
@@ -158,7 +158,7 @@ jobs:
158158
runs-on: ${{ needs.launch-runner.outputs.label }}
159159
# outputs:
160160
# commit_id: ${{ steps.get_commit_id_ec2.outputs.commit_id }}
161-
timeout-minutes: 45
161+
timeout-minutes: 30
162162

163163
steps:
164164
# - uses: iterative/setup-cml@v2
@@ -197,11 +197,21 @@ jobs:
197197
# pip install -r requirements.cpu.txt
198198
# pip install pyopenssl --upgrade
199199

200-
# 306093656765.dkr.ecr.ap-south-1.amazonaws.com/emlo-session-10-image
200+
- name: Verify GPU Access via torch and lightining
201+
run: |
202+
docker run --gpus all \
203+
--privileged --ipc=host \
204+
-e NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics \
205+
-e NVIDIA_VISIBLE_DEVICES=all \
206+
--rm ${{ secrets.AWS_ECR_LOGIN_URI }}/${{ secrets.ECR_REPOSITORY_NAME }}:latest \
207+
python -c "import torch; from lightning.pytorch.accelerators import CUDAAccelerator; print(f'PyTorch CUDA: {torch.cuda.is_available()}'); print(f'Lightning GPUs: {CUDAAccelerator.auto_device_count()}')"
201208
202209
- name: Run DVC commands in container
203210
run: |
204211
docker run --gpus all \
212+
--privileged --ipc=host \
213+
-e NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics \
214+
-e NVIDIA_VISIBLE_DEVICES=all \
205215
--name session-18-container \
206216
--shm-size=8g \
207217
-v "$(pwd):/workspace" \
@@ -282,7 +292,7 @@ jobs:
282292
needs:
283293
- launch-runner
284294
- do-the-job
285-
runs-on: ubuntu-latest
295+
runs-on: ubuntu-22.04
286296
if: ${{ always() }}
287297
steps:
288298
- name: Configure AWS credentials

0 commit comments

Comments
 (0)