|
1 | | -name: Deployment 01D - PR - Train and Store |
| 1 | +name: Deployment 01E - PR - Train and Store |
2 | 2 |
|
3 | 3 | on: |
4 | 4 | workflow_call: |
@@ -41,66 +41,66 @@ jobs: |
41 | 41 | #---------------------------------------- |
42 | 42 | # JOB 1: Build and push Docker image to ECR |
43 | 43 | #---------------------------------------- |
44 | | - # build-and-push-ecr-image: |
45 | | - # name: 📦 Build and Push Docker Image |
46 | | - # runs-on: ubuntu-latest |
47 | | - # outputs: |
48 | | - # commit_id: ${{ steps.get_commit_id.outputs.commit_id }} |
49 | | - # registry: ${{ steps.login-ecr.outputs.registry }} |
50 | | - # docker_username: ${{ steps.login-ecr.outputs.docker_username_306093656765_dkr_ecr_ap_south_1_amazonaws_com }} |
51 | | - # docker_password: ${{ steps.login-ecr.outputs.docker_password_306093656765_dkr_ecr_ap_south_1_amazonaws_com }} |
52 | | - # steps: |
53 | | - # - name: Checkout Code |
54 | | - # uses: actions/checkout@v3 |
55 | | - |
56 | | - # - name: Install Utilities |
57 | | - # run: | |
58 | | - # sudo apt-get update |
59 | | - # sudo apt-get install -y jq unzip |
60 | | - |
61 | | - # - name: Configure AWS Credentials |
62 | | - # uses: aws-actions/configure-aws-credentials@v4 |
63 | | - # with: |
64 | | - # aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
65 | | - # aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
66 | | - # aws-region: ${{ secrets.AWS_REGION }} |
67 | | - |
68 | | - # - name: Login to Amazon ECR |
69 | | - # id: login-ecr |
70 | | - # uses: aws-actions/amazon-ecr-login@v2 |
71 | | - # with: |
72 | | - # mask-password: 'false' |
73 | | - |
74 | | - # - name: Get Latest Commit ID |
75 | | - # id: get_commit_id |
76 | | - # run: | |
77 | | - # latest_commit=$(git rev-parse HEAD) |
78 | | - # echo "commit_id=$latest_commit" >> $GITHUB_OUTPUT |
| 44 | + build-and-push-ecr-image: |
| 45 | + name: 📦 Build and Push Docker Image |
| 46 | + runs-on: ubuntu-22.04 |
| 47 | + outputs: |
| 48 | + # commit_id: ${{ steps.get_commit_id.outputs.commit_id }} |
| 49 | + registry: ${{ steps.login-ecr.outputs.registry }} |
| 50 | + docker_username: ${{ steps.login-ecr.outputs.docker_username_306093656765_dkr_ecr_ap_south_1_amazonaws_com }} |
| 51 | + docker_password: ${{ steps.login-ecr.outputs.docker_password_306093656765_dkr_ecr_ap_south_1_amazonaws_com }} |
| 52 | + steps: |
| 53 | + - name: Checkout Code |
| 54 | + uses: actions/checkout@v3 |
| 55 | + |
| 56 | + - name: Install Utilities |
| 57 | + run: | |
| 58 | + sudo apt-get update |
| 59 | + sudo apt-get install -y jq unzip |
| 60 | +
|
| 61 | + - name: Configure AWS Credentials |
| 62 | + uses: aws-actions/configure-aws-credentials@v4 |
| 63 | + with: |
| 64 | + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
| 65 | + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
| 66 | + aws-region: ${{ secrets.AWS_REGION }} |
| 67 | + |
| 68 | + - name: Login to Amazon ECR |
| 69 | + id: login-ecr |
| 70 | + uses: aws-actions/amazon-ecr-login@v2 |
| 71 | + with: |
| 72 | + mask-password: 'false' |
| 73 | + |
| 74 | + # - name: Get Latest Commit ID |
| 75 | + # id: get_commit_id |
| 76 | + # run: | |
| 77 | + # latest_commit=$(git rev-parse HEAD) |
| 78 | + # echo "commit_id=$latest_commit" >> $GITHUB_OUTPUT |
79 | 79 |
|
80 | | - # - name: Display the commit ID |
81 | | - # run: | |
82 | | - # echo "Latest commit ID is: ${{ steps.get_commit_id.outputs.commit_id }}" |
83 | | - |
84 | | - # # - name: Build and Push Docker Image |
85 | | - # # id: build-image |
86 | | - # # env: |
87 | | - # # ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} |
88 | | - # # IMAGE_TAG: latest |
89 | | - # # run: | |
90 | | - # # # Build development container image and push to ECR |
91 | | - # # echo "Building and pushing image to $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" |
92 | | - # # docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . |
93 | | - # # docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG |
94 | | - # # echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT |
| 80 | + # - name: Display the commit ID |
| 81 | + # run: | |
| 82 | + # echo "Latest commit ID is: ${{ steps.get_commit_id.outputs.commit_id }}" |
| 83 | + |
| 84 | + - name: Build and Push Docker Image |
| 85 | + id: build-image |
| 86 | + env: |
| 87 | + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} |
| 88 | + IMAGE_TAG: latest |
| 89 | + run: | |
| 90 | + # Build development container image and push to ECR |
| 91 | + echo "Building and pushing image to $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" |
| 92 | + docker build --platform linux/amd64 -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . |
| 93 | + docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG |
| 94 | + echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT |
95 | 95 |
|
96 | 96 | # #---------------------------------------- |
97 | 97 | # # JOB 2: Launch EC2 instance with GPU for training |
98 | 98 | # #---------------------------------------- |
99 | 99 |
|
100 | 100 | launch-runner: |
101 | 101 | name: 🚀 Launch EC2 GPU Runner |
102 | | - runs-on: ubuntu-latest |
103 | | - # needs: build-and-push-ecr-image |
| 102 | + runs-on: ubuntu-22.04 |
| 103 | + needs: build-and-push-ecr-image |
104 | 104 | outputs: |
105 | 105 | label: ${{ steps.start-ec2-runner.outputs.label }} |
106 | 106 | ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} |
@@ -158,7 +158,7 @@ jobs: |
158 | 158 | runs-on: ${{ needs.launch-runner.outputs.label }} |
159 | 159 | # outputs: |
160 | 160 | # commit_id: ${{ steps.get_commit_id_ec2.outputs.commit_id }} |
161 | | - timeout-minutes: 45 |
| 161 | + timeout-minutes: 30 |
162 | 162 |
|
163 | 163 | steps: |
164 | 164 | # - uses: iterative/setup-cml@v2 |
@@ -197,11 +197,21 @@ jobs: |
197 | 197 | # pip install -r requirements.cpu.txt |
198 | 198 | # pip install pyopenssl --upgrade |
199 | 199 |
|
200 | | - # 306093656765.dkr.ecr.ap-south-1.amazonaws.com/emlo-session-10-image |
| 200 | + - name: Verify GPU Access via torch and lightining |
| 201 | + run: | |
| 202 | + docker run --gpus all \ |
| 203 | + --privileged --ipc=host \ |
| 204 | + -e NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics \ |
| 205 | + -e NVIDIA_VISIBLE_DEVICES=all \ |
| 206 | + --rm ${{ secrets.AWS_ECR_LOGIN_URI }}/${{ secrets.ECR_REPOSITORY_NAME }}:latest \ |
| 207 | + python -c "import torch; from lightning.pytorch.accelerators import CUDAAccelerator; print(f'PyTorch CUDA: {torch.cuda.is_available()}'); print(f'Lightning GPUs: {CUDAAccelerator.auto_device_count()}')" |
201 | 208 |
|
202 | 209 | - name: Run DVC commands in container |
203 | 210 | run: | |
204 | 211 | docker run --gpus all \ |
| 212 | + --privileged --ipc=host \ |
| 213 | + -e NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics \ |
| 214 | + -e NVIDIA_VISIBLE_DEVICES=all \ |
205 | 215 | --name session-18-container \ |
206 | 216 | --shm-size=8g \ |
207 | 217 | -v "$(pwd):/workspace" \ |
@@ -282,7 +292,7 @@ jobs: |
282 | 292 | needs: |
283 | 293 | - launch-runner |
284 | 294 | - do-the-job |
285 | | - runs-on: ubuntu-latest |
| 295 | + runs-on: ubuntu-22.04 |
286 | 296 | if: ${{ always() }} |
287 | 297 | steps: |
288 | 298 | - name: Configure AWS credentials |
|
0 commit comments