From fb8b9a1ba0c31f17ebda6aacfde266f835e25013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Fri, 14 Nov 2025 10:53:27 +0100 Subject: [PATCH 01/15] chore: added CI model layer caching --- .github/workflows/cicd.yml | 37 ++++++++++++++++++++++++++++++++++++- docker/vllm.Dockerfile | 21 +++++++++++++-------- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 99857cc6..bf2ec049 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -110,14 +110,49 @@ jobs: include: - component: api build_args: "--target nilai --platform linux/amd64" + - component: vllm + model_to_cache: "openai/gpt-oss-20b" steps: - name: Checkout uses: actions/checkout@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build ${{ matrix.component }} image run: | echo "Building ${{ matrix.component }} image..." - docker build -t nillion/nilai-${{ matrix.component }}:latest -f docker/${{ matrix.component }}.Dockerfile ${{ matrix.build_args || '' }} . + + # Convert repository name to lowercase for Docker registry compatibility + REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') + + # Set cache and build args based on component + CACHE_FROM="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache" + CACHE_TO="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache,mode=max" + + # Add model caching for vllm component + EXTRA_BUILD_ARGS="" + if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then + EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }} --build-arg HF_TOKEN=${{ secrets.HF_TOKEN }}" + fi + + docker buildx build \ + -t nillion/nilai-${{ matrix.component }}:latest \ + -f docker/${{ matrix.component }}.Dockerfile \ + --cache-from=${CACHE_FROM} \ + --cache-to=${CACHE_TO} \ + --load \ + ${{ matrix.build_args || '' }} \ + ${EXTRA_BUILD_ARGS} \ + . + echo "✅ ${{ matrix.component }} build completed successfully" e2e-tests: diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile index eb938667..c3259434 100644 --- a/docker/vllm.Dockerfile +++ b/docker/vllm.Dockerfile @@ -1,13 +1,8 @@ FROM vllm/vllm-openai:v0.10.1 -# # Specify model name and path during build -# ARG MODEL_NAME=llama_1b_cpu -# ARG MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct - -# # Set environment variables -# ENV MODEL_NAME=${MODEL_NAME} -# ENV MODEL_PATH=${MODEL_PATH} -# ENV EXEC_PATH=nilai_models.models.${MODEL_NAME}:app +# Specify model to pre-download during build (optional, for caching) +ARG MODEL_TO_CACHE="" +ARG HF_TOKEN="" COPY --link . /daemon/ COPY --link vllm_templates /opt/vllm/templates @@ -22,6 +17,16 @@ RUN apt-get update && \ apt-get autoremove && \ rm -rf /var/lib/apt/lists/* +# Pre-download model if MODEL_TO_CACHE is provided +# This creates a cached layer with the model to avoid re-downloading in CI +RUN if [ -n "$MODEL_TO_CACHE" ]; then \ + echo "Pre-downloading model: $MODEL_TO_CACHE"; \ + export HF_TOKEN="${HF_TOKEN}"; \ + python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')"; \ + else \ + echo "No model specified for caching, will download at runtime"; \ + fi + # Expose port 8000 for incoming requests EXPOSE 8000 From 0bc77ce5cbe6b95420b8dfe42abe394cf40bf120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Fri, 14 Nov 2025 11:52:48 +0100 Subject: [PATCH 02/15] fix: removed secrets from build --- .github/workflows/cicd.yml | 11 ++++++++++- docker/vllm.Dockerfile | 11 +++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index bf2ec049..099d9ab0 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -11,6 +11,7 @@ on: permissions: id-token: write # Required for OIDC contents: read # Required for checkout + packages: write # Required for pushing cache layers to GHCR jobs: test: @@ -118,6 +119,9 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + with: + driver-opts: image=moby/buildkit:latest + buildkitd-flags: --allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host - name: Login to GitHub Container Registry uses: docker/login-action@v3 @@ -127,6 +131,8 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build ${{ matrix.component }} image + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | echo "Building ${{ matrix.component }} image..." @@ -139,8 +145,10 @@ jobs: # Add model caching for vllm component EXTRA_BUILD_ARGS="" + SECRET_ARGS="" if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then - EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }} --build-arg HF_TOKEN=${{ secrets.HF_TOKEN }}" + EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }}" + SECRET_ARGS="--secret id=hf_token,env=HF_TOKEN" fi docker buildx build \ @@ -151,6 +159,7 @@ jobs: --load \ ${{ matrix.build_args || '' }} \ ${EXTRA_BUILD_ARGS} \ + ${SECRET_ARGS} \ . echo "✅ ${{ matrix.component }} build completed successfully" diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile index c3259434..3f2d8fa3 100644 --- a/docker/vllm.Dockerfile +++ b/docker/vllm.Dockerfile @@ -2,7 +2,6 @@ FROM vllm/vllm-openai:v0.10.1 # Specify model to pre-download during build (optional, for caching) ARG MODEL_TO_CACHE="" -ARG HF_TOKEN="" COPY --link . /daemon/ COPY --link vllm_templates /opt/vllm/templates @@ -19,10 +18,14 @@ RUN apt-get update && \ # Pre-download model if MODEL_TO_CACHE is provided # This creates a cached layer with the model to avoid re-downloading in CI -RUN if [ -n "$MODEL_TO_CACHE" ]; then \ +RUN --mount=type=secret,id=hf_token \ + if [ -n "$MODEL_TO_CACHE" ]; then \ echo "Pre-downloading model: $MODEL_TO_CACHE"; \ - export HF_TOKEN="${HF_TOKEN}"; \ - python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')"; \ + if [ -f /run/secrets/hf_token ]; then \ + export HF_TOKEN="$(cat /run/secrets/hf_token)"; \ + fi; \ + python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')" \ + || { echo >&2 "ERROR: Failed to pre-download model '$MODEL_TO_CACHE'. Check your network connection, HF_TOKEN, and model name."; exit 1; }; \ else \ echo "No model specified for caching, will download at runtime"; \ fi From 2ed95a19165f0cc05fac942af5c02fc5032ad811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Fri, 14 Nov 2025 16:02:25 +0100 Subject: [PATCH 03/15] feat: cache runner model --- .github/workflows/cicd.yml | 43 ++++++++++++++----- .../docker-compose.gemma-4b-gpu.ci.yml | 4 +- .../compose/docker-compose.gpt-20b-gpu.ci.yml | 4 +- .../docker-compose.llama-1b-gpu.ci.yml | 4 +- .../compose/docker-compose.qwen-2b-gpu.ci.yml | 5 +-- docker/vllm.Dockerfile | 18 +------- 6 files changed, 38 insertions(+), 40 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 099d9ab0..961683f3 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -130,9 +130,40 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build ${{ matrix.component }} image + - name: Pre-pull Docker base image (for vllm) + if: matrix.component == 'vllm' + run: | + echo "Pre-pulling vllm base image to avoid rate limiting during build..." + docker pull vllm/vllm-openai:v0.10.1 + + - name: Setup HuggingFace cache directory + if: matrix.component == 'vllm' && matrix.model_to_cache != '' + run: | + mkdir -p /home/ec2-user/.cache/huggingface + echo "Cache directory created at /home/ec2-user/.cache/huggingface" + + - name: Cache HuggingFace models + if: matrix.component == 'vllm' && matrix.model_to_cache != '' + uses: actions/cache@v4 + id: cache-hf-models + with: + path: /home/ec2-user/.cache/huggingface + key: huggingface-models-${{ matrix.model_to_cache }}-v1 + restore-keys: | + huggingface-models-${{ matrix.model_to_cache }}- + huggingface-models- + + - name: Download HuggingFace model + if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + echo "Downloading model ${{ matrix.model_to_cache }} to cache..." + python3 -c "from huggingface_hub import snapshot_download; import os; os.environ['HF_TOKEN'] = '${{ secrets.HF_TOKEN }}'; snapshot_download('${{ matrix.model_to_cache }}', cache_dir='/home/ec2-user/.cache/huggingface'); print('Model cached successfully')" \ + || { echo "Failed to download model"; exit 1; } + echo "Model download completed successfully" + + - name: Build ${{ matrix.component }} image run: | echo "Building ${{ matrix.component }} image..." @@ -143,14 +174,6 @@ jobs: CACHE_FROM="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache" CACHE_TO="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache,mode=max" - # Add model caching for vllm component - EXTRA_BUILD_ARGS="" - SECRET_ARGS="" - if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then - EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }}" - SECRET_ARGS="--secret id=hf_token,env=HF_TOKEN" - fi - docker buildx build \ -t nillion/nilai-${{ matrix.component }}:latest \ -f docker/${{ matrix.component }}.Dockerfile \ @@ -158,8 +181,6 @@ jobs: --cache-to=${CACHE_TO} \ --load \ ${{ matrix.build_args || '' }} \ - ${EXTRA_BUILD_ARGS} \ - ${SECRET_ARGS} \ . echo "✅ ${{ matrix.component }} build completed successfully" diff --git a/docker/compose/docker-compose.gemma-4b-gpu.ci.yml b/docker/compose/docker-compose.gemma-4b-gpu.ci.yml index 29423275..f80076a2 100644 --- a/docker/compose/docker-compose.gemma-4b-gpu.ci.yml +++ b/docker/compose/docker-compose.gemma-4b-gpu.ci.yml @@ -36,12 +36,10 @@ services: - VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True volumes: - - hugging_face_models:/root/.cache/huggingface + - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface # Mount runner's HF cache healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s retries: 3 start_period: 60s timeout: 10s -volumes: - hugging_face_models: diff --git a/docker/compose/docker-compose.gpt-20b-gpu.ci.yml b/docker/compose/docker-compose.gpt-20b-gpu.ci.yml index dcfef4cb..9aa1dc47 100644 --- a/docker/compose/docker-compose.gpt-20b-gpu.ci.yml +++ b/docker/compose/docker-compose.gpt-20b-gpu.ci.yml @@ -34,12 +34,10 @@ services: - ETCD_PORT=2379 - TOOL_SUPPORT=true volumes: - - hugging_face_models:/root/.cache/huggingface # cache models + - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface # Mount runner's HF cache healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s retries: 10 start_period: 900s timeout: 15s -volumes: - hugging_face_models: diff --git a/docker/compose/docker-compose.llama-1b-gpu.ci.yml b/docker/compose/docker-compose.llama-1b-gpu.ci.yml index cca105f7..1c82d2a9 100644 --- a/docker/compose/docker-compose.llama-1b-gpu.ci.yml +++ b/docker/compose/docker-compose.llama-1b-gpu.ci.yml @@ -37,12 +37,10 @@ services: - TOOL_SUPPORT=true - CUDA_LAUNCH_BLOCKING=1 volumes: - - hugging_face_models:/root/.cache/huggingface # cache models + - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface # Mount runner's HF cache healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s retries: 3 start_period: 60s timeout: 10s -volumes: - hugging_face_models: diff --git a/docker/compose/docker-compose.qwen-2b-gpu.ci.yml b/docker/compose/docker-compose.qwen-2b-gpu.ci.yml index 7d040caf..14a31815 100644 --- a/docker/compose/docker-compose.qwen-2b-gpu.ci.yml +++ b/docker/compose/docker-compose.qwen-2b-gpu.ci.yml @@ -52,13 +52,10 @@ services: VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1" PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:True" volumes: - - hugging_face_models:/root/.cache/huggingface + - /home/ec2-user/.cache/huggingface:/root/.cache/huggingface # Mount runner's HF cache healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s retries: 3 start_period: 60s timeout: 10s - -volumes: - hugging_face_models: diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile index 3f2d8fa3..2b7018d9 100644 --- a/docker/vllm.Dockerfile +++ b/docker/vllm.Dockerfile @@ -1,8 +1,5 @@ FROM vllm/vllm-openai:v0.10.1 -# Specify model to pre-download during build (optional, for caching) -ARG MODEL_TO_CACHE="" - COPY --link . /daemon/ COPY --link vllm_templates /opt/vllm/templates @@ -16,19 +13,8 @@ RUN apt-get update && \ apt-get autoremove && \ rm -rf /var/lib/apt/lists/* -# Pre-download model if MODEL_TO_CACHE is provided -# This creates a cached layer with the model to avoid re-downloading in CI -RUN --mount=type=secret,id=hf_token \ - if [ -n "$MODEL_TO_CACHE" ]; then \ - echo "Pre-downloading model: $MODEL_TO_CACHE"; \ - if [ -f /run/secrets/hf_token ]; then \ - export HF_TOKEN="$(cat /run/secrets/hf_token)"; \ - fi; \ - python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')" \ - || { echo >&2 "ERROR: Failed to pre-download model '$MODEL_TO_CACHE'. Check your network connection, HF_TOKEN, and model name."; exit 1; }; \ - else \ - echo "No model specified for caching, will download at runtime"; \ - fi +# Create cache directory structure (will be mounted from host at runtime) +RUN mkdir -p /root/.cache/huggingface # Expose port 8000 for incoming requests EXPOSE 8000 From 796fca01709eff5ca5b5b2803fe3299b3193dca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Fri, 14 Nov 2025 17:31:09 +0100 Subject: [PATCH 04/15] feat: improvements --- .github/workflows/cicd.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 961683f3..48459d9b 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -20,13 +20,13 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: astral-sh/setup-uv@v4 + - uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" - name: Cache dependencies - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ${{ env.UV_CACHE_DIR }} key: ${{ runner.os }}-uv-${{ hashFiles('**/pyproject.toml') }} @@ -115,8 +115,7 @@ jobs: model_to_cache: "openai/gpt-oss-20b" steps: - name: Checkout - uses: actions/checkout@v2 - + uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: @@ -191,9 +190,9 @@ jobs: runs-on: ${{ needs.start-runner.outputs.label }} steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - - uses: astral-sh/setup-uv@v4 + - uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" From 83f82c059c401c2b5abef3d577bee1948af5aae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Fri, 14 Nov 2025 17:42:10 +0100 Subject: [PATCH 05/15] feat: added buildx --- .github/workflows/cicd.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 48459d9b..51d6a88b 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -116,6 +116,17 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + + - name: Install Docker Buildx plugin + run: | + set -euo pipefail + BUILDX_VERSION="v0.14.1" + mkdir -p ~/.docker/cli-plugins + curl -sSL "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64" \ + -o ~/.docker/cli-plugins/docker-buildx + chmod +x ~/.docker/cli-plugins/docker-buildx + docker buildx version + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: From e39c55a1580107a8ab3b6c2857ea94e82972aee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Wed, 19 Nov 2025 09:36:25 +0100 Subject: [PATCH 06/15] feat: added uv --- .github/workflows/cicd.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 51d6a88b..be23c0ad 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -163,13 +163,27 @@ jobs: huggingface-models-${{ matrix.model_to_cache }}- huggingface-models- + - name: Setup uv for model download + if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" + + - name: Install dependencies for model download + if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' + run: | + export ACLOCAL=aclocal + export AUTOMAKE=automake + uv sync + - name: Download HuggingFace model if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | echo "Downloading model ${{ matrix.model_to_cache }} to cache..." - python3 -c "from huggingface_hub import snapshot_download; import os; os.environ['HF_TOKEN'] = '${{ secrets.HF_TOKEN }}'; snapshot_download('${{ matrix.model_to_cache }}', cache_dir='/home/ec2-user/.cache/huggingface'); print('Model cached successfully')" \ + uv run python -c "from huggingface_hub import snapshot_download; import os; os.environ['HF_TOKEN'] = '${{ secrets.HF_TOKEN }}'; snapshot_download('${{ matrix.model_to_cache }}', cache_dir='/home/ec2-user/.cache/huggingface'); print('Model cached successfully')" \ || { echo "Failed to download model"; exit 1; } echo "Model download completed successfully" From 99963d680878d8914ef0feb0c066330cf9fde079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Wed, 19 Nov 2025 09:52:42 +0100 Subject: [PATCH 07/15] fix: autoconf --- .github/workflows/cicd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index be23c0ad..24e5f311 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -173,6 +173,7 @@ jobs: - name: Install dependencies for model download if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' run: | + apt-get update && apt-get install curl git pkg-config automake file python3.12-dev -y export ACLOCAL=aclocal export AUTOMAKE=automake uv sync From d42456b9d0a7cbf1f65c7b7cb6219b979d9db4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Wed, 19 Nov 2025 10:40:32 +0100 Subject: [PATCH 08/15] fix: nilai build cache --- .github/workflows/cicd.yml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 24e5f311..c10a2031 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -195,15 +195,25 @@ jobs: # Convert repository name to lowercase for Docker registry compatibility REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') - # Set cache and build args based on component - CACHE_FROM="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache" - CACHE_TO="type=registry,ref=ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache,mode=max" + # Set cache references + CACHE_REF="ghcr.io/${REPO_LOWER}/nilai-${{ matrix.component }}:buildcache" + + # Check if cache exists and is accessible + echo "Checking cache availability..." + CACHE_ARGS="" + if docker manifest inspect ${CACHE_REF} >/dev/null 2>&1; then + echo "✅ Cache found, using registry cache" + CACHE_ARGS="--cache-from=type=registry,ref=${CACHE_REF} --cache-to=type=registry,ref=${CACHE_REF},mode=max" + else + echo "⚠️ No cache found or cache inaccessible, building without import cache" + CACHE_ARGS="--cache-to=type=registry,ref=${CACHE_REF},mode=max" + fi + # Build with appropriate cache configuration docker buildx build \ -t nillion/nilai-${{ matrix.component }}:latest \ -f docker/${{ matrix.component }}.Dockerfile \ - --cache-from=${CACHE_FROM} \ - --cache-to=${CACHE_TO} \ + ${CACHE_ARGS} \ --load \ ${{ matrix.build_args || '' }} \ . From 5f1d23176e0f24ab5d48a6074d438a2411c54286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Wed, 19 Nov 2025 10:53:57 +0100 Subject: [PATCH 09/15] feat: retry logic + diagnosis --- .github/workflows/cicd.yml | 67 +++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index c10a2031..4136afcc 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -140,6 +140,16 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Check system resources + run: | + echo "=== System Resources ===" + df -h + free -h + echo "=== Docker Info ===" + docker info + echo "=== Docker System Usage ===" + docker system df + - name: Pre-pull Docker base image (for vllm) if: matrix.component == 'vllm' run: | @@ -209,14 +219,55 @@ jobs: CACHE_ARGS="--cache-to=type=registry,ref=${CACHE_REF},mode=max" fi - # Build with appropriate cache configuration - docker buildx build \ - -t nillion/nilai-${{ matrix.component }}:latest \ - -f docker/${{ matrix.component }}.Dockerfile \ - ${CACHE_ARGS} \ - --load \ - ${{ matrix.build_args || '' }} \ - . + # Function to build with retry logic + build_with_retry() { + local attempt=1 + local max_attempts=3 + + while [ $attempt -le $max_attempts ]; do + echo "🔄 Build attempt $attempt of $max_attempts..." + + if docker buildx build \ + -t nillion/nilai-${{ matrix.component }}:latest \ + -f docker/${{ matrix.component }}.Dockerfile \ + ${CACHE_ARGS} \ + --load \ + ${{ matrix.build_args || '' }} \ + .; then + echo "✅ Build succeeded on attempt $attempt" + return 0 + else + echo "❌ Build failed on attempt $attempt" + if [ $attempt -lt $max_attempts ]; then + echo "⏳ Waiting 30 seconds before retry..." + sleep 30 + + # Clean up any partial builds + echo "🧹 Cleaning up Docker system..." + docker system prune -f || true + + # On retry, disable cache export to reduce complexity + if [ $attempt -eq 2 ]; then + echo "⚠️ Disabling cache export for retry..." + CACHE_ARGS="--cache-from=type=registry,ref=${CACHE_REF}" + fi + + # On final retry, disable all cache + if [ $attempt -eq 3 ]; then + echo "⚠️ Disabling all cache for final retry..." + CACHE_ARGS="" + fi + fi + attempt=$((attempt + 1)) + fi + done + + echo "💥 All build attempts failed" + return 1 + } + + # Execute build with retry logic + build_with_retry echo "✅ ${{ matrix.component }} build completed successfully" From c56633247acef94bd0d1dd5e9be4c57ae8fcf76d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Cabrero-Holgueras?= Date: Wed, 19 Nov 2025 11:14:57 +0100 Subject: [PATCH 10/15] feat: disable unattended upgrades --- .github/workflows/cicd.yml | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 4136afcc..6e1cb701 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -117,6 +117,32 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Disable unattended upgrades + run: | + echo "Disabling unattended upgrades to prevent interference with CI builds..." + + # Stop unattended-upgrades service + sudo systemctl stop unattended-upgrades || true + sudo systemctl disable unattended-upgrades || true + + # Kill any running unattended-upgrades processes + sudo pkill -f unattended-upgrade || true + + # Remove or disable the unattended-upgrades configuration + sudo systemctl mask unattended-upgrades || true + + # Wait for any ongoing package operations to complete + while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do + echo "Waiting for package manager lock to be released..." + sleep 5 + done + + # Disable automatic updates in APT configuration + echo 'APT::Periodic::Update-Package-Lists "0";' | sudo tee /etc/apt/apt.conf.d/20auto-upgrades + echo 'APT::Periodic::Unattended-Upgrade "0";' | sudo tee -a /etc/apt/apt.conf.d/20auto-upgrades + + echo "✅ Unattended upgrades disabled successfully" + - name: Install Docker Buildx plugin run: | set -euo pipefail @@ -279,6 +305,32 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Disable unattended upgrades + run: | + echo "Disabling unattended upgrades to prevent interference with CI builds..." + + # Stop unattended-upgrades service + sudo systemctl stop unattended-upgrades || true + sudo systemctl disable unattended-upgrades || true + + # Kill any running unattended-upgrades processes + sudo pkill -f unattended-upgrade || true + + # Remove or disable the unattended-upgrades configuration + sudo systemctl mask unattended-upgrades || true + + # Wait for any ongoing package operations to complete + while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do + echo "Waiting for package manager lock to be released..." + sleep 5 + done + + # Disable automatic updates in APT configuration + echo 'APT::Periodic::Update-Package-Lists "0";' | sudo tee /etc/apt/apt.conf.d/20auto-upgrades + echo 'APT::Periodic::Unattended-Upgrade "0";' | sudo tee -a /etc/apt/apt.conf.d/20auto-upgrades + + echo "✅ Unattended upgrades disabled successfully" + - uses: astral-sh/setup-uv@v7 with: enable-cache: true From 947e67eec367d7f05dc8b223caf9280a6dd77de9 Mon Sep 17 00:00:00 2001 From: blefo Date: Wed, 26 Nov 2025 16:14:30 +0100 Subject: [PATCH 11/15] feat: enhance model caching with GHCR integration and debugging steps --- .github/workflows/cicd.yml | 79 ++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 6e1cb701..cf30688d 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -188,26 +188,61 @@ jobs: mkdir -p /home/ec2-user/.cache/huggingface echo "Cache directory created at /home/ec2-user/.cache/huggingface" - - name: Cache HuggingFace models + - name: Restore model from GHCR if: matrix.component == 'vllm' && matrix.model_to_cache != '' - uses: actions/cache@v4 - id: cache-hf-models - with: - path: /home/ec2-user/.cache/huggingface - key: huggingface-models-${{ matrix.model_to_cache }}-v1 - restore-keys: | - huggingface-models-${{ matrix.model_to_cache }}- - huggingface-models- + id: restore-model + run: | + MODEL_CACHE_DIR="/home/ec2-user/.cache/huggingface" + HF_DIR_NAME="models--$(echo ${{ matrix.model_to_cache }} | sed 's/\//--/g')" + FULL_PATH="$MODEL_CACHE_DIR/$HF_DIR_NAME" + + if [ -d "$FULL_PATH" ]; then + echo "Model found on host filesystem at $FULL_PATH" + echo "Skipping GHCR pull to save I/O." + echo "cache-hit=true" >> $GITHUB_OUTPUT + exit 0 + fi + + MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1" + MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]') + + echo "Attempting to pull model cache image: $MODEL_IMAGE" + + if docker pull "$MODEL_IMAGE"; then + echo "Image found. Copying model files to host..." + mkdir -p "$MODEL_CACHE_DIR" + + CONTAINER_ID=$(docker create "$MODEL_IMAGE") + docker cp "$CONTAINER_ID":/model/. "$MODEL_CACHE_DIR/" + docker rm "$CONTAINER_ID" + echo "Model restored from GHCR." + echo "cache-hit=true" >> $GITHUB_OUTPUT + else + echo "Model cache not found in GHCR." + echo "cache-hit=false" >> $GITHUB_OUTPUT + fi + - name: DEBUG - Verify Cache Structure + if: matrix.component == 'vllm' + run: | + echo "Listing /home/ec2-user/.cache/huggingface contents:" + ls -F /home/ec2-user/.cache/huggingface/ || echo "Directory not found" + + echo "Checking for specific model folder:" + ls -F /home/ec2-user/.cache/huggingface/models--openai--gpt-oss-20b/ || echo "Model folder not found" + + echo "Checking snapshot content (first few files):" + find /home/ec2-user/.cache/huggingface -maxdepth 4 | head -n 10 + - name: Setup uv for model download - if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' + if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true' uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "**/pyproject.toml" - name: Install dependencies for model download - if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' + if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true' run: | apt-get update && apt-get install curl git pkg-config automake file python3.12-dev -y export ACLOCAL=aclocal @@ -215,7 +250,7 @@ jobs: uv sync - name: Download HuggingFace model - if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.cache-hf-models.outputs.cache-hit != 'true' + if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -224,6 +259,26 @@ jobs: || { echo "Failed to download model"; exit 1; } echo "Model download completed successfully" + - name: Save model to GHCR + if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true' + run: | + echo "Saving model to GHCR..." + + MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1" + MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]') + + echo "FROM scratch" > Dockerfile.model + echo "COPY . /model" >> Dockerfile.model + + cd /home/ec2-user/.cache/huggingface + + echo "Building cache image..." + docker build -t "$MODEL_IMAGE" -f $GITHUB_WORKSPACE/Dockerfile.model . + + echo "Pushing cache image to GHCR..." + docker push "$MODEL_IMAGE" + echo "Model cached to GHCR." + - name: Build ${{ matrix.component }} image run: | echo "Building ${{ matrix.component }} image..." From 2747772888eef82ffbd6434a0874f66e48c50fdd Mon Sep 17 00:00:00 2001 From: blefo Date: Wed, 26 Nov 2025 16:29:04 +0100 Subject: [PATCH 12/15] fix: remove the debug step --- .github/workflows/cicd.yml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index cf30688d..89537ed4 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -222,18 +222,6 @@ jobs: echo "cache-hit=false" >> $GITHUB_OUTPUT fi - - name: DEBUG - Verify Cache Structure - if: matrix.component == 'vllm' - run: | - echo "Listing /home/ec2-user/.cache/huggingface contents:" - ls -F /home/ec2-user/.cache/huggingface/ || echo "Directory not found" - - echo "Checking for specific model folder:" - ls -F /home/ec2-user/.cache/huggingface/models--openai--gpt-oss-20b/ || echo "Model folder not found" - - echo "Checking snapshot content (first few files):" - find /home/ec2-user/.cache/huggingface -maxdepth 4 | head -n 10 - - name: Setup uv for model download if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true' uses: astral-sh/setup-uv@v7 From 44884fe6ba449af938fd6a8a059480c3c5e342b8 Mon Sep 17 00:00:00 2001 From: blefo Date: Wed, 26 Nov 2025 16:31:51 +0100 Subject: [PATCH 13/15] fix: resolve conflict --- .github/workflows/cicd.yml | 12 ++++++++++++ docker/vllm.Dockerfile | 17 ++++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 89537ed4..cf30688d 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -222,6 +222,18 @@ jobs: echo "cache-hit=false" >> $GITHUB_OUTPUT fi + - name: DEBUG - Verify Cache Structure + if: matrix.component == 'vllm' + run: | + echo "Listing /home/ec2-user/.cache/huggingface contents:" + ls -F /home/ec2-user/.cache/huggingface/ || echo "Directory not found" + + echo "Checking for specific model folder:" + ls -F /home/ec2-user/.cache/huggingface/models--openai--gpt-oss-20b/ || echo "Model folder not found" + + echo "Checking snapshot content (first few files):" + find /home/ec2-user/.cache/huggingface -maxdepth 4 | head -n 10 + - name: Setup uv for model download if: matrix.component == 'vllm' && matrix.model_to_cache != '' && steps.restore-model.outputs.cache-hit != 'true' uses: astral-sh/setup-uv@v7 diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile index 2b7018d9..c7602047 100644 --- a/docker/vllm.Dockerfile +++ b/docker/vllm.Dockerfile @@ -1,5 +1,15 @@ -FROM vllm/vllm-openai:v0.10.1 +FROM vllm/vllm-openai:v0.11.2 +# # Specify model name and path during build +# ARG MODEL_NAME=llama_1b_cpu +# ARG MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct + +# # Set environment variables +# ENV MODEL_NAME=${MODEL_NAME} +# ENV MODEL_PATH=${MODEL_PATH} +# ENV EXEC_PATH=nilai_models.models.${MODEL_NAME}:app + +ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True COPY --link . /daemon/ COPY --link vllm_templates /opt/vllm/templates @@ -13,12 +23,9 @@ RUN apt-get update && \ apt-get autoremove && \ rm -rf /var/lib/apt/lists/* -# Create cache directory structure (will be mounted from host at runtime) -RUN mkdir -p /root/.cache/huggingface - # Expose port 8000 for incoming requests EXPOSE 8000 ENTRYPOINT ["bash", "run.sh"] -CMD [""] +CMD [""] \ No newline at end of file From ab1e28d78116b83631b824341790c83cb70b26c9 Mon Sep 17 00:00:00 2001 From: blefo Date: Thu, 27 Nov 2025 10:29:08 +0100 Subject: [PATCH 14/15] refactor: improve model image tagging in CI/CD workflow --- .github/workflows/cicd.yml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index cf30688d..5efc6b4d 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -180,7 +180,7 @@ jobs: if: matrix.component == 'vllm' run: | echo "Pre-pulling vllm base image to avoid rate limiting during build..." - docker pull vllm/vllm-openai:v0.10.1 + docker pull vllm/vllm-openai:v0.11.2 - name: Setup HuggingFace cache directory if: matrix.component == 'vllm' && matrix.model_to_cache != '' @@ -203,8 +203,10 @@ jobs: exit 0 fi - MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1" - MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]') + OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + RAW_TAG="${{ matrix.model_to_cache }}-v1" + SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g') + MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}" echo "Attempting to pull model cache image: $MODEL_IMAGE" @@ -264,8 +266,12 @@ jobs: run: | echo "Saving model to GHCR..." - MODEL_IMAGE="ghcr.io/${{ github.repository_owner }}/nilai-model-cache:${{ matrix.model_to_cache }}-v1" - MODEL_IMAGE=$(echo "$MODEL_IMAGE" | tr '[:upper:]' '[:lower:]') + OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + RAW_TAG="${{ matrix.model_to_cache }}-v1" + SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g') + MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}" + + echo "Using cache image: $MODEL_IMAGE" echo "FROM scratch" > Dockerfile.model echo "COPY . /model" >> Dockerfile.model From 348e6ef206941fdd337b8a9f91b127aee8385cc4 Mon Sep 17 00:00:00 2001 From: blefo Date: Thu, 27 Nov 2025 12:18:29 +0100 Subject: [PATCH 15/15] refactor: update CI/CD workflow to use repository name for model image tagging --- .github/workflows/cicd.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 5efc6b4d..04ebbebd 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -164,7 +164,7 @@ jobs: with: registry: ghcr.io username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + password: ${{ github.token }} - name: Check system resources run: | @@ -203,10 +203,10 @@ jobs: exit 0 fi - OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') RAW_TAG="${{ matrix.model_to_cache }}-v1" SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g') - MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}" + MODEL_IMAGE="ghcr.io/${REPO_LOWER}/nilai-model-cache:${SAFE_TAG}" echo "Attempting to pull model cache image: $MODEL_IMAGE" @@ -266,10 +266,10 @@ jobs: run: | echo "Saving model to GHCR..." - OWNER_LOWER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') RAW_TAG="${{ matrix.model_to_cache }}-v1" SAFE_TAG=$(echo "$RAW_TAG" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9_.-]/-/g') - MODEL_IMAGE="ghcr.io/${OWNER_LOWER}/nilai-model-cache:${SAFE_TAG}" + MODEL_IMAGE="ghcr.io/${REPO_LOWER}/nilai-model-cache:${SAFE_TAG}" echo "Using cache image: $MODEL_IMAGE"