From c2498122a1df45cf5c70b6a0f1cfe0dc806946af Mon Sep 17 00:00:00 2001 From: Emanuel Joivo Date: Mon, 29 Sep 2025 14:49:35 -0300 Subject: [PATCH] feat: implement retry mechanism with Slack notifications for concurrency failures - Add exponential backoff retry logic (3 attempts: 5s, 10s, 20s delays) - Implement git pull --rebase to handle concurrent changes gracefully - Add comprehensive error handling for fetch, pull, commit, and push operations - Add rich Slack notifications with detailed failure context - Include repository, environment, app name, image tag, and workflow run links - Add optional slack_webhook_url input parameter - Maintain backward compatibility with existing workflows - Add detailed logging for each retry attempt Resolves concurrency failures in deployment-catalog commits when multiple deployments run simultaneously, eliminating the need for manual intervention. --- action.yml | 4 ++ entrypoint.sh | 152 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 147 insertions(+), 9 deletions(-) diff --git a/action.yml b/action.yml index 6137265..516fae4 100644 --- a/action.yml +++ b/action.yml @@ -56,6 +56,10 @@ inputs: description: "GitHub Token with read-only access (usually GITHUB_TOKEN). Used to facilitate Docker builds that need to pull private repos." required: false default: "" + slack_webhook_url: + description: "Slack webhook URL for deployment failure notifications. Optional - if not provided, notifications will be skipped." + required: false + default: "" runs: using: "docker" image: "Dockerfile" diff --git a/entrypoint.sh b/entrypoint.sh index eaf21e8..822b746 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -307,17 +307,151 @@ check_if_is_already_updated() { fi } -push() { +# Retry mechanism with exponential backoff +push_with_retry() { + local max_retries=3 + local base_delay=5 + local attempt=1 + cd "$DEPLOYMENT_REPO_PATH" - git fetch || exit 1 - if [[ $(git rev-parse HEAD) != $(git rev-parse @{u}) ]]; then - echo "Remote has changes, pulling them." - git pull || exit 1 - else - echo "Remote is up to date." + + while [[ $attempt -le $max_retries ]]; do + echo -e "${YELLOW}🔄 Attempt $attempt/$max_retries: Syncing with remote repository...${NC}" + + # Fetch latest changes + if ! git fetch; then + echo -e "${RED}❌ Failed to fetch from remote${NC}" + if [[ $attempt -eq $max_retries ]]; then + send_slack_notification "Failed to fetch from deployment-catalog after $max_retries attempts" + exit 1 + fi + ((attempt++)) + continue + fi + + # Check if remote has changes and pull with rebase + if [[ $(git rev-parse HEAD) != $(git rev-parse @{u}) ]]; then + echo -e "${YELLOW}📥 Remote has changes, pulling with rebase...${NC}" + if ! git pull --rebase; then + echo -e "${RED}❌ Failed to pull with rebase${NC}" + if [[ $attempt -eq $max_retries ]]; then + send_slack_notification "Failed to pull changes from deployment-catalog after $max_retries attempts" + exit 1 + fi + ((attempt++)) + continue + fi + else + echo -e "${GREEN}✅ Remote is up to date${NC}" + fi + + # Commit changes + echo -e "${YELLOW}💾 Committing changes...${NC}" + if ! git commit -m "chore(${APP_NAME}/${ENVIRONMENT}): updating image tag :)"; then + echo -e "${RED}❌ Failed to commit changes${NC}" + if [[ $attempt -eq $max_retries ]]; then + send_slack_notification "Failed to commit changes to deployment-catalog after $max_retries attempts" + exit 1 + fi + ((attempt++)) + continue + fi + + # Push changes + echo -e "${YELLOW}📤 Pushing changes...${NC}" + if git push; then + echo -e "${GREEN}✅ Successfully pushed changes to deployment-catalog${NC}" + return 0 + else + local push_exit_code=$? + echo -e "${RED}❌ Failed to push changes (exit code: $push_exit_code)${NC}" + + if [[ $attempt -eq $max_retries ]]; then + send_slack_notification "Failed to push changes to deployment-catalog after $max_retries attempts. Manual intervention required." + exit 1 + fi + + # Calculate exponential backoff delay + local delay=$((base_delay * (2 ** (attempt - 1)))) + echo -e "${YELLOW}⏳ Waiting ${delay}s before retry...${NC}" + sleep $delay + ((attempt++)) + fi + done +} + +# Slack notification function +send_slack_notification() { + local message="$1" + local slack_webhook="${INPUT_SLACK_WEBHOOK_URL:-}" + + if [[ -z "$slack_webhook" ]]; then + echo -e "${YELLOW}⚠️ Slack webhook not configured, skipping notification${NC}" + return 0 fi - git commit -m "chore(${APP_NAME}/${ENVIRONMENT}): updating image tag :)" || exit 1 - git push || exit 1 + + local payload=$(cat <", + "short": false + } + ], + "footer": "cluster-cd-action", + "ts": $(date +%s) + } + ] +} +EOF +) + + echo -e "${YELLOW}📢 Sending Slack notification...${NC}" + if curl -X POST -H 'Content-type: application/json' \ + --data "$payload" \ + "$slack_webhook" > /dev/null 2>&1; then + echo -e "${GREEN}✅ Slack notification sent successfully${NC}" + else + echo -e "${RED}❌ Failed to send Slack notification${NC}" + fi +} + +# Legacy push function for backward compatibility +push() { + push_with_retry } done_msg() {