From fb8b9a1ba0c31f17ebda6aacfde266f835e25013 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Fri, 14 Nov 2025 10:53:27 +0100
Subject: [PATCH 01/15] chore: added CI model layer caching

---
 .github/workflows/cicd.yml | 37 ++++++++++++++++++++++++++++++++++++-
 docker/vllm.Dockerfile     | 21 +++++++++++++--------
 2 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 99857cc6..bf2ec049 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -110,14 +110,49 @@ jobs:
         include:
           - component: api
             build_args: "--target nilai --platform linux/amd64"
+          - component: vllm
+            model_to_cache: "openai/gpt-oss-20b"
     steps:
       - name: Checkout
         uses: actions/checkout@v2
 
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Build ${{ matrix.component }} image
         run: |
           echo "Building ${{ matrix.component }} image..."
-          docker build -t nillion/nilai-${{ matrix.component }}:latest -f docker/${{ matrix.component }}.Dockerfile ${{ matrix.build_args || '' }} .
+
+          # Convert repository name to lowercase for Docker registry compatibility
+          REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')
+
+          # Set cache and build args based on component
+          CACHE_FROM="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache"
+          CACHE_TO="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache,mode=max"
+
+          # Add model caching for vllm component
+          EXTRA_BUILD_ARGS=""
+          if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then
+            EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }} --build-arg HF_TOKEN=${{ secrets.HF_TOKEN }}"
+          fi
+
+          docker buildx build \
+            -t nillion/nilai-${{ matrix.component }}:latest \
+            -f docker/${{ matrix.component }}.Dockerfile \
+            --cache-from=${CACHE_FROM} \
+            --cache-to=${CACHE_TO} \
+            --load \
+            ${{ matrix.build_args || '' }} \
+            ${EXTRA_BUILD_ARGS} \
+            .
+
           echo "✅ ${{ matrix.component }} build completed successfully"
 
   e2e-tests:
diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile
index eb938667..c3259434 100644
--- a/docker/vllm.Dockerfile
+++ b/docker/vllm.Dockerfile
@@ -1,13 +1,8 @@
 FROM vllm/vllm-openai:v0.10.1
 
-# # Specify model name and path during build
-# ARG MODEL_NAME=llama_1b_cpu
-# ARG MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct
-
-# # Set environment variables
-# ENV MODEL_NAME=${MODEL_NAME}
-# ENV MODEL_PATH=${MODEL_PATH}
-# ENV EXEC_PATH=nilai_models.models.${MODEL_NAME}:app
+# Specify model to pre-download during build (optional, for caching)
+ARG MODEL_TO_CACHE=""
+ARG HF_TOKEN=""
 
 COPY --link . /daemon/
 COPY --link vllm_templates /opt/vllm/templates
@@ -22,6 +17,16 @@ RUN apt-get update && \
     apt-get autoremove && \
     rm -rf /var/lib/apt/lists/*
 
+# Pre-download model if MODEL_TO_CACHE is provided
+# This creates a cached layer with the model to avoid re-downloading in CI
+RUN if [ -n "$MODEL_TO_CACHE" ]; then \
+        echo "Pre-downloading model: $MODEL_TO_CACHE"; \
+        export HF_TOKEN="${HF_TOKEN}"; \
+        python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')"; \
+    else \
+        echo "No model specified for caching, will download at runtime"; \
+    fi
+
 # Expose port 8000 for incoming requests
 EXPOSE 8000
 

From 0bc77ce5cbe6b95420b8dfe42abe394cf40bf120 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Fri, 14 Nov 2025 11:52:48 +0100
Subject: [PATCH 02/15] fix: removed secrets from build

---
 .github/workflows/cicd.yml | 11 ++++++++++-
 docker/vllm.Dockerfile     | 11 +++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index bf2ec049..099d9ab0 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -11,6 +11,7 @@ on:
 permissions:
   id-token: write # Required for OIDC
   contents: read  # Required for checkout
+  packages: write # Required for pushing cache layers to GHCR
 
 jobs:
   test:
@@ -118,6 +119,9 @@ jobs:
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: image=moby/buildkit:latest
+          buildkitd-flags: --allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host
 
       - name: Login to GitHub Container Registry
         uses: docker/login-action@v3
@@ -127,6 +131,8 @@ jobs:
           password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Build ${{ matrix.component }} image
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           echo "Building ${{ matrix.component }} image..."
 
@@ -139,8 +145,10 @@ jobs:
 
           # Add model caching for vllm component
           EXTRA_BUILD_ARGS=""
+          SECRET_ARGS=""
           if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then
-            EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }} --build-arg HF_TOKEN=${{ secrets.HF_TOKEN }}"
+            EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }}"
+            SECRET_ARGS="--secret id=hf_token,env=HF_TOKEN"
           fi
 
           docker buildx build \
@@ -151,6 +159,7 @@ jobs:
             --load \
             ${{ matrix.build_args || '' }} \
             ${EXTRA_BUILD_ARGS} \
+            ${SECRET_ARGS} \
             .
 
           echo "✅ ${{ matrix.component }} build completed successfully"
diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile
index c3259434..3f2d8fa3 100644
--- a/docker/vllm.Dockerfile
+++ b/docker/vllm.Dockerfile
@@ -2,7 +2,6 @@ FROM vllm/vllm-openai:v0.10.1
 
 # Specify model to pre-download during build (optional, for caching)
 ARG MODEL_TO_CACHE=""
-ARG HF_TOKEN=""
 
 COPY --link . /daemon/
 COPY --link vllm_templates /opt/vllm/templates
@@ -19,10 +18,14 @@ RUN apt-get update && \
 
 # Pre-download model if MODEL_TO_CACHE is provided
 # This creates a cached layer with the model to avoid re-downloading in CI
-RUN if [ -n "$MODEL_TO_CACHE" ]; then \
+RUN --mount=type=secret,id=hf_token \
+    if [ -n "$MODEL_TO_CACHE" ]; then \
         echo "Pre-downloading model: $MODEL_TO_CACHE"; \
-        export HF_TOKEN="${HF_TOKEN}"; \
-        python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')"; \
+        if [ -f /run/secrets/hf_token ]; then \
+            export HF_TOKEN="$(cat /run/secrets/hf_token)"; \
+        fi; \
+        python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')" \
+        || { echo >&2 "ERROR: Failed to pre-download model '$MODEL_TO_CACHE'. Check your network connection, HF_TOKEN, and model name."; exit 1; }; \
     else \
         echo "No model specified for caching, will download at runtime"; \
     fi

From 2ed95a19165f0cc05fac942af5c02fc5032ad811 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Fri, 14 Nov 2025 16:02:25 +0100
Subject: [PATCH 03/15] feat: cache runner model

---
 .github/workflows/cicd.yml                    | 43 ++++++++++++++-----
 .../docker-compose.gemma-4b-gpu.ci.yml        |  4 +-
 .../compose/docker-compose.gpt-20b-gpu.ci.yml |  4 +-
 .../docker-compose.llama-1b-gpu.ci.yml        |  4 +-
 .../compose/docker-compose.qwen-2b-gpu.ci.yml |  5 +--
 docker/vllm.Dockerfile                        | 18 +-------
 6 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 099d9ab0..961683f3 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -130,9 +130,40 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Build ${{ matrix.component }} image
+      - name: Pre-pull Docker base image (for vllm)
+        if: matrix.component == 'vllm'
+        run: |
+          echo "Pre-pulling vllm base image to avoid rate limiting during build..."
+          docker pull vllm/vllm-openai:v0.10.1
+
+      - name: Setup HuggingFace cache directory
+        if: matrix.component == 'vllm' && matrix.model_to_cache != ''
+        run: |
+          mkdir -p /home/ec2-user/.cache/huggingface
+          echo "Cache directory created at /home/ec2-user/.cache/huggingface"
+
+      - name: Cache HuggingFace models
+        if: matrix.component == 'vllm' && matrix.model_to_cache != ''
+        uses: actions/cache@v4
+        id: cache-hf-models
+        with:
+          path: /home/ec2-user/.cache/huggingface
+          key: huggingface-models-${{ matrix.model_to_cache }}-v1
+          restore-keys: |
+            huggingface-models-${{ matrix.model_to_cache }}-
+            huggingface-models-
+
+      - name: Download HuggingFace model
+        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          echo "Downloading model ${{ matrix.model_to_cache }} to cache..."
+          python3 -c "from huggingface_hub import snapshot_download; import os; os.environ['HF_TOKEN'] = '${{ secrets.HF_TOKEN }}'; snapshot_download('${{ matrix.model_to_cache }}', cache_dir='/home/ec2-user/.cache/huggingface'); print('Model cached successfully')" \
+            || { echo "Failed to download model"; exit 1; }
+          echo "Model download completed successfully"
+
+      - name: Build ${{ matrix.component }} image
         run: |
           echo "Building ${{ matrix.component }} image..."
 
@@ -143,14 +174,6 @@ jobs:
           CACHE_FROM="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache"
           CACHE_TO="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache,mode=max"
 
-          # Add model caching for vllm component
-          EXTRA_BUILD_ARGS=""
-          SECRET_ARGS=""
-          if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then
-            EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }}"
-            SECRET_ARGS="--secret id=hf_token,env=HF_TOKEN"
-          fi
-
           docker buildx build \
             -t nillion/nilai-${{ matrix.component }}:latest \
             -f docker/${{ matrix.component }}.Dockerfile \
@@ -158,8 +181,6 @@ jobs:
             --cache-to=${CACHE_TO} \
             --load \
             ${{ matrix.build_args || '' }} \
-            ${EXTRA_BUILD_ARGS} \
-            ${SECRET_ARGS} \
             .
 
           echo "✅ ${{ matrix.component }} build completed successfully"
diff --git a/docker/compose/docker-compose.gemma-4b-gpu.ci.yml b/docker/compose/docker-compose.gemma-4b-gpu.ci.yml
index 29423275..f80076a2 100644
--- a/docker/compose/docker-compose.gemma-4b-gpu.ci.yml
+++ b/docker/compose/docker-compose.gemma-4b-gpu.ci.yml
@@ -36,12 +36,10 @@ services:
       - VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
       - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
     volumes:
-      - hugging_face_models:/root/.cache/huggingface
+      - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface  # Mount runner's HF cache
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
       interval: 30s
       retries: 3
       start_period: 60s
       timeout: 10s
-volumes:
-  hugging_face_models:
diff --git a/docker/compose/docker-compose.gpt-20b-gpu.ci.yml b/docker/compose/docker-compose.gpt-20b-gpu.ci.yml
index dcfef4cb..9aa1dc47 100644
--- a/docker/compose/docker-compose.gpt-20b-gpu.ci.yml
+++ b/docker/compose/docker-compose.gpt-20b-gpu.ci.yml
@@ -34,12 +34,10 @@ services:
       - ETCD_PORT=2379
       - TOOL_SUPPORT=true
     volumes:
-      - hugging_face_models:/root/.cache/huggingface  # cache models
+      - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface  # Mount runner's HF cache
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
       interval: 30s
       retries: 10
       start_period: 900s
       timeout: 15s
-volumes:
-  hugging_face_models:
diff --git a/docker/compose/docker-compose.llama-1b-gpu.ci.yml b/docker/compose/docker-compose.llama-1b-gpu.ci.yml
index cca105f7..1c82d2a9 100644
--- a/docker/compose/docker-compose.llama-1b-gpu.ci.yml
+++ b/docker/compose/docker-compose.llama-1b-gpu.ci.yml
@@ -37,12 +37,10 @@ services:
       - TOOL_SUPPORT=true
       - CUDA_LAUNCH_BLOCKING=1
     volumes:
-      - hugging_face_models:/root/.cache/huggingface  # cache models
+      - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface  # Mount runner's HF cache
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
       interval: 30s
       retries: 3
       start_period: 60s
       timeout: 10s
-volumes:
-  hugging_face_models:
diff --git a/docker/compose/docker-compose.qwen-2b-gpu.ci.yml b/docker/compose/docker-compose.qwen-2b-gpu.ci.yml
index 7d040caf..14a31815 100644
--- a/docker/compose/docker-compose.qwen-2b-gpu.ci.yml
+++ b/docker/compose/docker-compose.qwen-2b-gpu.ci.yml
@@ -52,13 +52,10 @@ services:
       VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1"
       PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:True"
     volumes:
-      - hugging_face_models:/root/.cache/huggingface
+      - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface  # Mount runner's HF cache
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
       interval: 30s
       retries: 3
       start_period: 60s
       timeout: 10s
-
-volumes:
-  hugging_face_models:
diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile
index 3f2d8fa3..2b7018d9 100644
--- a/docker/vllm.Dockerfile
+++ b/docker/vllm.Dockerfile
@@ -1,8 +1,5 @@
 FROM vllm/vllm-openai:v0.10.1
 
-# Specify model to pre-download during build (optional, for caching)
-ARG MODEL_TO_CACHE=""
-
 COPY --link . /daemon/
 COPY --link vllm_templates /opt/vllm/templates
 
@@ -16,19 +13,8 @@ RUN apt-get update && \
     apt-get autoremove && \
     rm -rf /var/lib/apt/lists/*
 
-# Pre-download model if MODEL_TO_CACHE is provided
-# This creates a cached layer with the model to avoid re-downloading in CI
-RUN --mount=type=secret,id=hf_token \
-    if [ -n "$MODEL_TO_CACHE" ]; then \
-        echo "Pre-downloading model: $MODEL_TO_CACHE"; \
-        if [ -f /run/secrets/hf_token ]; then \
-            export HF_TOKEN="$(cat /run/secrets/hf_token)"; \
-        fi; \
-        python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')" \
-        || { echo >&2 "ERROR: Failed to pre-download model '$MODEL_TO_CACHE'. Check your network connection, HF_TOKEN, and model name."; exit 1; }; \
-    else \
-        echo "No model specified for caching, will download at runtime"; \
-    fi
+# Create cache directory structure (will be mounted from host at runtime)
+RUN mkdir -p /root/.cache/huggingface
 
 # Expose port 8000 for incoming requests
 EXPOSE 8000

From 796fca01709eff5ca5b5b2803fe3299b3193dca7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Fri, 14 Nov 2025 17:31:09 +0100
Subject: [PATCH 04/15] feat: improvements

---
 .github/workflows/cicd.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 961683f3..48459d9b 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -20,13 +20,13 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - uses: astral-sh/setup-uv@v4
+      - uses: astral-sh/setup-uv@v7
         with:
           enable-cache: true
           cache-dependency-glob: "**/pyproject.toml"
 
       - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ env.UV_CACHE_DIR }}
           key: ${{ runner.os }}-uv-${{ hashFiles('**/pyproject.toml') }}
@@ -115,8 +115,7 @@ jobs:
             model_to_cache: "openai/gpt-oss-20b"
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
-
+        uses: actions/checkout@v4
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
         with:
@@ -191,9 +190,9 @@ jobs:
     runs-on: ${{ needs.start-runner.outputs.label }}
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
-      - uses: astral-sh/setup-uv@v4
+      - uses: astral-sh/setup-uv@v7
         with:
           enable-cache: true
           cache-dependency-glob: "**/pyproject.toml"

From 83f82c059c401c2b5abef3d577bee1948af5aae0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Fri, 14 Nov 2025 17:42:10 +0100
Subject: [PATCH 05/15] feat: added buildx

---
 .github/workflows/cicd.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 48459d9b..51d6a88b 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -116,6 +116,17 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+
+      - name: Install Docker Buildx plugin
+        run: |
+          set -euo pipefail
+          BUILDX_VERSION="v0.14.1"
+          mkdir -p ~/.docker/cli-plugins
+          curl -sSL "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64" \
+            -o ~/.docker/cli-plugins/docker-buildx
+          chmod +x ~/.docker/cli-plugins/docker-buildx
+          docker buildx version
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
         with:

From e39c55a1580107a8ab3b6c2857ea94e82972aee8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Wed, 19 Nov 2025 09:36:25 +0100
Subject: [PATCH 06/15] feat: added uv

---
 .github/workflows/cicd.yml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 51d6a88b..be23c0ad 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -163,13 +163,27 @@ jobs:
             huggingface-models-${{ matrix.model_to_cache }}-
             huggingface-models-
 
+      - name: Setup uv for model download
+        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
+        uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+          cache-dependency-glob: "**/pyproject.toml"
+
+      - name: Install dependencies for model download
+        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
+        run: |
+          export ACLOCAL=aclocal
+          export AUTOMAKE=automake
+          uv sync
+
       - name: Download HuggingFace model
         if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           echo "Downloading model ${{ matrix.model_to_cache }} to cache..."
-          python3 -c "from huggingface_hub import snapshot_download; import os; os.environ['HF_TOKEN'] = '${{ secrets.HF_TOKEN }}'; snapshot_download('${{ matrix.model_to_cache }}', cache_dir='/home/ec2-user/.cache/huggingface'); print('Model cached successfully')" \
+          uv run python -c "from huggingface_hub import snapshot_download; import os; os.environ['HF_TOKEN'] = '${{ secrets.HF_TOKEN }}'; snapshot_download('${{ matrix.model_to_cache }}', cache_dir='/home/ec2-user/.cache/huggingface'); print('Model cached successfully')" \
             || { echo "Failed to download model"; exit 1; }
           echo "Model download completed successfully"
 

From 99963d680878d8914ef0feb0c066330cf9fde079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Wed, 19 Nov 2025 09:52:42 +0100
Subject: [PATCH 07/15] fix: autoconf

---
 .github/workflows/cicd.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index be23c0ad..24e5f311 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -173,6 +173,7 @@ jobs:
       - name: Install dependencies for model download
         if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
         run: |
+          apt-get update && apt-get install curl git pkg-config automake file python3.12-dev -y
           export ACLOCAL=aclocal
           export AUTOMAKE=automake
           uv sync

From d42456b9d0a7cbf1f65c7b7cb6219b979d9db4ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Wed, 19 Nov 2025 10:40:32 +0100
Subject: [PATCH 08/15] fix: nilai build cache

---
 .github/workflows/cicd.yml | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 24e5f311..c10a2031 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -195,15 +195,25 @@ jobs:
           # Convert repository name to lowercase for Docker registry compatibility
           REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')
 
-          # Set cache and build args based on component
-          CACHE_FROM="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache"
-          CACHE_TO="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache,mode=max"
+          # Set cache references
+          CACHE_REF="ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache"
+
+          # Check if cache exists and is accessible
+          echo "Checking cache availability..."
+          CACHE_ARGS=""
+          if docker manifest inspect ${CACHE_REF} >/dev/null 2>&1; then
+            echo "✅ Cache found, using registry cache"
+            CACHE_ARGS="--cache-from=type=registry,ref=${CACHE_REF} --cache-to=type=registry,ref=${CACHE_REF},mode=max"
+          else
+            echo "⚠️  No cache found or cache inaccessible, building without import cache"
+            CACHE_ARGS="--cache-to=type=registry,ref=${CACHE_REF},mode=max"
+          fi
 
+          # Build with appropriate cache configuration
           docker buildx build \
             -t nillion/nilai-${{ matrix.component }}:latest \
             -f docker/${{ matrix.component }}.Dockerfile \
-            --cache-from=${CACHE_FROM} \
-            --cache-to=${CACHE_TO} \
+            ${CACHE_ARGS} \
             --load \
             ${{ matrix.build_args || '' }} \
             .

From 5f1d23176e0f24ab5d48a6074d438a2411c54286 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Wed, 19 Nov 2025 10:53:57 +0100
Subject: [PATCH 09/15] feat: retry logic + diagnosis

---
 .github/workflows/cicd.yml | 67 +++++++++++++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index c10a2031..4136afcc 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -140,6 +140,16 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
+      - name: Check system resources
+        run: |
+          echo "=== System Resources ==="
+          df -h
+          free -h
+          echo "=== Docker Info ==="
+          docker info
+          echo "=== Docker System Usage ==="
+          docker system df
+
       - name: Pre-pull Docker base image (for vllm)
         if: matrix.component == 'vllm'
         run: |
@@ -209,14 +219,55 @@ jobs:
             CACHE_ARGS="--cache-to=type=registry,ref=${CACHE_REF},mode=max"
           fi
 
-          # Build with appropriate cache configuration
-          docker buildx build \
-            -t nillion/nilai-${{ matrix.component }}:latest \
-            -f docker/${{ matrix.component }}.Dockerfile \
-            ${CACHE_ARGS} \
-            --load \
-            ${{ matrix.build_args || '' }} \
-            .
+          # Function to build with retry logic
+          build_with_retry() {
+            local attempt=1
+            local max_attempts=3
+
+            while [ $attempt -le $max_attempts ]; do
+              echo "🔄 Build attempt $attempt of $max_attempts..."
+
+              if docker buildx build \
+                -t nillion/nilai-${{ matrix.component }}:latest \
+                -f docker/${{ matrix.component }}.Dockerfile \
+                ${CACHE_ARGS} \
+                --load \
+                ${{ matrix.build_args || '' }} \
+                .; then
+                echo "✅ Build succeeded on attempt $attempt"
+                return 0
+              else
+                echo "❌ Build failed on attempt $attempt"
+                if [ $attempt -lt $max_attempts ]; then
+                  echo "⏳ Waiting 30 seconds before retry..."
+                  sleep 30
+
+                  # Clean up any partial builds
+                  echo "🧹 Cleaning up Docker system..."
+                  docker system prune -f || true
+
+                  # On retry, disable cache export to reduce complexity
+                  if [ $attempt -eq 2 ]; then
+                    echo "⚠️  Disabling cache export for retry..."
+                    CACHE_ARGS="--cache-from=type=registry,ref=${CACHE_REF}"
+                  fi
+
+                  # On final retry, disable all cache
+                  if [ $attempt -eq 3 ]; then
+                    echo "⚠️  Disabling all cache for final retry..."
+                    CACHE_ARGS=""
+                  fi
+                fi
+                attempt=$((attempt + 1))
+              fi
+            done
+
+            echo "💥 All build attempts failed"
+            return 1
+          }
+
+          # Execute build with retry logic
+          build_with_retry
 
           echo "✅ ${{ matrix.component }} build completed successfully"
 

From c56633247acef94bd0d1dd5e9be4c57ae8fcf76d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= <jose.cabrero@nillion.com>
Date: Wed, 19 Nov 2025 11:14:57 +0100
Subject: [PATCH 10/15] feat: disable unattended upgrades

---
 .github/workflows/cicd.yml | 52 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 4136afcc..6e1cb701 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -117,6 +117,32 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Disable unattended upgrades
+        run: |
+          echo "Disabling unattended upgrades to prevent interference with CI builds..."
+
+          # Stop unattended-upgrades service
+          sudo systemctl stop unattended-upgrades || true
+          sudo systemctl disable unattended-upgrades || true
+
+          # Kill any running unattended-upgrades processes
+          sudo pkill -f unattended-upgrade || true
+
+          # Remove or disable the unattended-upgrades configuration
+          sudo systemctl mask unattended-upgrades || true
+
+          # Wait for any ongoing package operations to complete
+          while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
+            echo "Waiting for package manager lock to be released..."
+            sleep 5
+          done
+
+          # Disable automatic updates in APT configuration
+          echo 'APT::Periodic::Update-Package-Lists "0";' | sudo tee /etc/apt/apt.conf.d/20auto-upgrades
+          echo 'APT::Periodic::Unattended-Upgrade "0";' | sudo tee -a /etc/apt/apt.conf.d/20auto-upgrades
+
+          echo "✅ Unattended upgrades disabled successfully"
+
       - name: Install Docker Buildx plugin
         run: |
           set -euo pipefail
@@ -279,6 +305,32 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Disable unattended upgrades
+        run: |
+          echo "Disabling unattended upgrades to prevent interference with CI builds..."
+
+          # Stop unattended-upgrades service
+          sudo systemctl stop unattended-upgrades || true
+          sudo systemctl disable unattended-upgrades || true
+
+          # Kill any running unattended-upgrades processes
+          sudo pkill -f unattended-upgrade || true
+
+          # Remove or disable the unattended-upgrades configuration
+          sudo systemctl mask unattended-upgrades || true
+
+          # Wait for any ongoing package operations to complete
+          while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
+            echo "Waiting for package manager lock to be released..."
+            sleep 5
+          done
+
+          # Disable automatic updates in APT configuration
+          echo 'APT::Periodic::Update-Package-Lists "0";' | sudo tee /etc/apt/apt.conf.d/20auto-upgrades
+          echo 'APT::Periodic::Unattended-Upgrade "0";' | sudo tee -a /etc/apt/apt.conf.d/20auto-upgrades
+
+          echo "✅ Unattended upgrades disabled successfully"
+
       - uses: astral-sh/setup-uv@v7
         with:
           enable-cache: true

From 947e67eec367d7f05dc8b223caf9280a6dd77de9 Mon Sep 17 00:00:00 2001
From: blefo <lefort.baptiste@yahoo.fr>
Date: Wed, 26 Nov 2025 16:14:30 +0100
Subject: [PATCH 11/15] feat: enhance model caching with GHCR integration and
 debugging steps

---
 .github/workflows/cicd.yml | 79 ++++++++++++++++++++++++++++++++------
 1 file changed, 67 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 6e1cb701..cf30688d 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -188,26 +188,61 @@ jobs:
           mkdir -p /home/ec2-user/.cache/huggingface
           echo "Cache directory created at /home/ec2-user/.cache/huggingface"
 
-      - name: Cache HuggingFace models
+      - name: Restore model from GHCR
         if: matrix.component == 'vllm' && matrix.model_to_cache != ''
-        uses: actions/cache@v4
-        id: cache-hf-models
-        with:
-          path: /home/ec2-user/.cache/huggingface
-          key: huggingface-models-${{ matrix.model_to_cache }}-v1
-          restore-keys: |
-            huggingface-models-${{ matrix.model_to_cache }}-
-            huggingface-models-
+        id: restore-model
+        run: |
+          MODEL_CACHE_DIR="/home/ec2-user/.cache/huggingface"
+          HF_DIR_NAME="models--$(echo ${{ matrix.model_to_cache }} | sed 's/\//--/g')"
+          FULL_PATH="$MODEL_CACHE_DIR/$HF_DIR_NAME"
+
+          if [ -d "$FULL_PATH" ]; then
+            echo "Model found on host filesystem at $FULL_PATH"
+            echo "Skipping GHCR pull to save I/O."
+            echo "cache-hit=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1"
+          MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]')
+
+          echo "Attempting to pull model cache image: $MODEL_IMAGE"
+
+          if docker pull "$MODEL_IMAGE"; then
+            echo "Image found. Copying model files to host..."
+            mkdir -p "$MODEL_CACHE_DIR"
+
+            CONTAINER_ID=$(docker create "$MODEL_IMAGE")
+            docker cp "$CONTAINER_ID":/model/. "$MODEL_CACHE_DIR/"
+            docker rm "$CONTAINER_ID"
+            echo "Model restored from GHCR."
+            echo "cache-hit=true" >> $GITHUB_OUTPUT
+          else
+            echo "Model cache not found in GHCR."
+            echo "cache-hit=false" >> $GITHUB_OUTPUT
+          fi
 
+      - name: DEBUG - Verify Cache Structure
+        if: matrix.component == 'vllm'
+        run: |
+          echo "Listing /home/ec2-user/.cache/huggingface contents:"
+          ls -F /home/ec2-user/.cache/huggingface/ || echo "Directory not found"
+          
+          echo "Checking for specific model folder:"
+          ls -F /home/ec2-user/.cache/huggingface/models--openai--gpt-oss-20b/ || echo "Model folder not found"
+          
+          echo "Checking snapshot content (first few files):"
+          find /home/ec2-user/.cache/huggingface -maxdepth 4 | head -n 10    
+          
       - name: Setup uv for model download
-        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
+        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true'
         uses: astral-sh/setup-uv@v7
         with:
           enable-cache: true
           cache-dependency-glob: "**/pyproject.toml"
 
       - name: Install dependencies for model download
-        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
+        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true'
         run: |
           apt-get update && apt-get install curl git pkg-config automake file python3.12-dev -y
           export ACLOCAL=aclocal
@@ -215,7 +250,7 @@ jobs:
           uv sync
 
       - name: Download HuggingFace model
-        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true'
+        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true'
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
@@ -224,6 +259,26 @@ jobs:
             || { echo "Failed to download model"; exit 1; }
           echo "Model download completed successfully"
 
+      - name: Save model to GHCR
+        if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true'
+        run: |
+          echo "Saving model to GHCR..."
+
+          MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1"
+          MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]')
+
+          echo "FROM scratch" > Dockerfile.model
+          echo "COPY . /model" >> Dockerfile.model
+
+          cd /home/ec2-user/.cache/huggingface
+
+          echo "Building cache image..."
+          docker build -t "$MODEL_IMAGE" -f $GITHUB_WORKSPACE/Dockerfile.model .
+
+          echo "Pushing cache image to GHCR..."
+          docker push "$MODEL_IMAGE"
+          echo "Model cached to GHCR."
+
       - name: Build ${{ matrix.component }} image
         run: |
           echo "Building ${{ matrix.component }} image..."

From 2747772888eef82ffbd6434a0874f66e48c50fdd Mon Sep 17 00:00:00 2001
From: blefo <lefort.baptiste@yahoo.fr>
Date: Wed, 26 Nov 2025 16:29:04 +0100
Subject: [PATCH 12/15] fix: remove the debug step

---
 .github/workflows/cicd.yml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index cf30688d..89537ed4 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -222,18 +222,6 @@ jobs:
             echo "cache-hit=false" >> $GITHUB_OUTPUT
           fi
 
-      - name: DEBUG - Verify Cache Structure
-        if: matrix.component == 'vllm'
-        run: |
-          echo "Listing /home/ec2-user/.cache/huggingface contents:"
-          ls -F /home/ec2-user/.cache/huggingface/ || echo "Directory not found"
-          
-          echo "Checking for specific model folder:"
-          ls -F /home/ec2-user/.cache/huggingface/models--openai--gpt-oss-20b/ || echo "Model folder not found"
-          
-          echo "Checking snapshot content (first few files):"
-          find /home/ec2-user/.cache/huggingface -maxdepth 4 | head -n 10    
-          
       - name: Setup uv for model download
         if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true'
         uses: astral-sh/setup-uv@v7

From 44884fe6ba449af938fd6a8a059480c3c5e342b8 Mon Sep 17 00:00:00 2001
From: blefo <lefort.baptiste@yahoo.fr>
Date: Wed, 26 Nov 2025 16:31:51 +0100
Subject: [PATCH 13/15] fix: resolve conflict

---
 .github/workflows/cicd.yml | 12 ++++++++++++
 docker/vllm.Dockerfile     | 17 ++++++++++++-----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 89537ed4..cf30688d 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -222,6 +222,18 @@ jobs:
             echo "cache-hit=false" >> $GITHUB_OUTPUT
           fi
 
+      - name: DEBUG - Verify Cache Structure
+        if: matrix.component == 'vllm'
+        run: |
+          echo "Listing /home/ec2-user/.cache/huggingface contents:"
+          ls -F /home/ec2-user/.cache/huggingface/ || echo "Directory not found"
+          
+          echo "Checking for specific model folder:"
+          ls -F /home/ec2-user/.cache/huggingface/models--openai--gpt-oss-20b/ || echo "Model folder not found"
+          
+          echo "Checking snapshot content (first few files):"
+          find /home/ec2-user/.cache/huggingface -maxdepth 4 | head -n 10    
+          
       - name: Setup uv for model download
         if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true'
         uses: astral-sh/setup-uv@v7
diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile
index 2b7018d9..c7602047 100644
--- a/docker/vllm.Dockerfile
+++ b/docker/vllm.Dockerfile
@@ -1,5 +1,15 @@
-FROM vllm/vllm-openai:v0.10.1
+FROM vllm/vllm-openai:v0.11.2
 
+# # Specify model name and path during build
+# ARG MODEL_NAME=llama_1b_cpu
+# ARG MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct
+
+# # Set environment variables
+# ENV MODEL_NAME=${MODEL_NAME}
+# ENV MODEL_PATH=${MODEL_PATH}
+# ENV EXEC_PATH=nilai_models.models.${MODEL_NAME}:app
+
+ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 COPY --link . /daemon/
 COPY --link vllm_templates /opt/vllm/templates
 
@@ -13,12 +23,9 @@ RUN apt-get update && \
     apt-get autoremove && \
     rm -rf /var/lib/apt/lists/*
 
-# Create cache directory structure (will be mounted from host at runtime)
-RUN mkdir -p /root/.cache/huggingface
-
 # Expose port 8000 for incoming requests
 EXPOSE 8000
 
 ENTRYPOINT ["bash", "run.sh"]
 
-CMD [""]
+CMD [""]
\ No newline at end of file

From ab1e28d78116b83631b824341790c83cb70b26c9 Mon Sep 17 00:00:00 2001
From: blefo <lefort.baptiste@yahoo.fr>
Date: Thu, 27 Nov 2025 10:29:08 +0100
Subject: [PATCH 14/15] refactor: improve model image tagging in CI/CD workflow

---
 .github/workflows/cicd.yml | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index cf30688d..5efc6b4d 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -180,7 +180,7 @@ jobs:
         if: matrix.component == 'vllm'
         run: |
           echo "Pre-pulling vllm base image to avoid rate limiting during build..."
-          docker pull vllm/vllm-openai:v0.10.1
+          docker pull vllm/vllm-openai:v0.11.2
 
       - name: Setup HuggingFace cache directory
         if: matrix.component == 'vllm' && matrix.model_to_cache != ''
@@ -203,8 +203,10 @@ jobs:
             exit 0
           fi
 
-          MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1"
-          MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]')
+          OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+          RAW_TAG="${{ matrix.model_to_cache }}-v1"
+          SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g')
+          MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}"
 
           echo "Attempting to pull model cache image: $MODEL_IMAGE"
 
@@ -264,8 +266,12 @@ jobs:
         run: |
           echo "Saving model to GHCR..."
 
-          MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1"
-          MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]')
+          OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+          RAW_TAG="${{ matrix.model_to_cache }}-v1"
+          SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g')
+          MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}"
+
+          echo "Using cache image: $MODEL_IMAGE"
 
           echo "FROM scratch" > Dockerfile.model
           echo "COPY . /model" >> Dockerfile.model

From 348e6ef206941fdd337b8a9f91b127aee8385cc4 Mon Sep 17 00:00:00 2001
From: blefo <lefort.baptiste@yahoo.fr>
Date: Thu, 27 Nov 2025 12:18:29 +0100
Subject: [PATCH 15/15] refactor: update CI/CD workflow to use repository name
 for model image tagging

---
 .github/workflows/cicd.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
index 5efc6b4d..04ebbebd 100644
--- a/.github/workflows/cicd.yml
+++ b/.github/workflows/cicd.yml
@@ -164,7 +164,7 @@ jobs:
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
+          password: ${{ github.token }}
 
       - name: Check system resources
         run: |
@@ -203,10 +203,10 @@ jobs:
             exit 0
           fi
 
-          OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+          REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')
           RAW_TAG="${{ matrix.model_to_cache }}-v1"
           SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g')
-          MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}"
+          MODEL_IMAGE="ghcr.io/${REPO_LOWER}/nilai-model-cache:${SAFE_TAG}"
 
           echo "Attempting to pull model cache image: $MODEL_IMAGE"
 
@@ -266,10 +266,10 @@ jobs:
         run: |
           echo "Saving model to GHCR..."
 
-          OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+          REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')
           RAW_TAG="${{ matrix.model_to_cache }}-v1"
           SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g')
-          MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}"
+          MODEL_IMAGE="ghcr.io/${REPO_LOWER}/nilai-model-cache:${SAFE_TAG}"
 
           echo "Using cache image: $MODEL_IMAGE"