From edfc381768b2519a1ed556fda2133a30a1e2549d Mon Sep 17 00:00:00 2001 From: ssvoss Date: Mon, 22 Dec 2025 11:39:06 -0500 Subject: [PATCH 01/11] move normalize baseline to shared script --- .github/workflows/run_semgrep_scan.yml | 97 ++++---- package-lock.json | 45 ++++ package.json | 3 + .../index.integration.test.js | 11 +- .../index.integration.test.js | 11 +- scripts/shared/normalize-push-baseline.js | 120 ++++++++++ .../shared/normalize-push-baseline.test.js | 215 ++++++++++++++++++ scripts/util/test-helpers.js | 15 ++ 8 files changed, 438 insertions(+), 79 deletions(-) create mode 100644 scripts/shared/normalize-push-baseline.js create mode 100644 scripts/shared/normalize-push-baseline.test.js create mode 100644 scripts/util/test-helpers.js diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index 6f66db9..e2617fb 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -8,49 +8,49 @@ on: workflow_call: inputs: commit_identifier: - description: "Commit SHA or ref to scan (default: current ref)" + description: 'Commit SHA or ref to scan (default: current ref)' type: string required: true cancel_in_progress: - description: "Cancel in-progress run for the same ref" + description: 'Cancel in-progress run for the same ref' type: boolean default: true semgrep_config: - description: "Rulesets to run with Semgrep" + description: 'Rulesets to run with Semgrep' type: string - default: "p/default" + default: 'p/default' fail_severity: - description: "error | warning | info" + description: 'error | warning | info' type: string - default: "error" + default: 'error' scan_mode: description: What should Semgrep scan? "full | diff | baseline" type: string - default: "full" + default: 'full' pr_filter_mode: description: What should reviewdog display (does NOT change what Semgrep scans)? "added | diff_context | nofilter" type: string - default: "added" + default: 'added' baseline_ref: - description: "Ref for diff/baseline (e.g., origin/main)" + description: 'Ref for diff/baseline (e.g., origin/main)' type: string - default: "origin/main" + default: 'origin/main' pr_reporter: - description: "review output: github-pr-review | github-pr-check" + description: 'review output: github-pr-review | github-pr-check' type: string - default: "github-pr-review" + default: 'github-pr-review' outputs: total_findings: - description: "Total number of findings" + description: 'Total number of findings' value: ${{ jobs.semgrep.outputs.total_findings }} error_count: - description: "Number of ERROR findings" + description: 'Number of ERROR findings' value: ${{ jobs.semgrep.outputs.error_count }} warning_count: - description: "Number of WARNING findings" + description: 'Number of WARNING findings' value: ${{ jobs.semgrep.outputs.warning_count }} info_count: - description: "Number of INFO findings" + description: 'Number of INFO findings' value: ${{ jobs.semgrep.outputs.info_count }} workflow_dispatch: @@ -74,9 +74,9 @@ jobs: outputs: total_findings: ${{ steps.semgrep_metrics.outputs.total }} - error_count: ${{ steps.semgrep_metrics.outputs.errors }} - warning_count: ${{ steps.semgrep_metrics.outputs.warnings }} - info_count: ${{ steps.semgrep_metrics.outputs.info }} + error_count: ${{ steps.semgrep_metrics.outputs.errors }} + warning_count: ${{ steps.semgrep_metrics.outputs.warnings }} + info_count: ${{ steps.semgrep_metrics.outputs.info }} steps: - name: Checkout code @@ -93,47 +93,29 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} commit-identifier: ${{ inputs.commit_identifier }} - - name: Compute baseline (for diff/baseline) + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version-file: .nvmrc + + - name: Install dependencies + run: npm ci --omit=dev + + - name: Normalize diff/baseline (push vs PR) if: ${{ inputs.scan_mode != 'full' }} + id: normalize_baseline env: - EVENT_NAME: ${{ github.event_name }} - BASE_REF: ${{ github.base_ref }} - BASELINE_REF: ${{ inputs.baseline_ref }} + INPUT_BASELINE: ${{ inputs.baseline_ref }} HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} - run: | - set -Eeuo pipefail - - if [[ "$EVENT_NAME" == "pull_request" || "$HAS_PR" == "true" ]]; then - # Resolve the PR base branch name - if [[ "$EVENT_NAME" == "pull_request" && -n "$BASE_REF" ]]; then - base="$BASE_REF" - else - # push + open PR → ask GitHub for the base branch of that PR - base="$(gh pr list --state open --head "$GITHUB_REF_NAME" --json baseRefName -q '.[0].baseRefName' || true)" - fi - - if [[ -z "$base" ]]; then - echo "Could not resolve PR base; falling back to configured baseline: $BASELINE_REF" - echo "BASELINE=$BASELINE_REF" >> "$GITHUB_ENV" - git fetch origin "${BASELINE_REF#origin/}:${BASELINE_REF#origin/}" --depth=1 || true - else - echo "Using PR base: origin/$base" - echo "BASELINE=origin/$base" >> "$GITHUB_ENV" - git fetch origin "$base:$base" --depth=1 - fi - else - echo "Using configured baseline: $BASELINE_REF" - echo "BASELINE=$BASELINE_REF" >> "$GITHUB_ENV" - git fetch origin "${BASELINE_REF#origin/}:${BASELINE_REF#origin/}" --depth=1 || true - fi + run: node scripts/shared/normalize-push-baseline.js - name: Set up Reviewdog uses: reviewdog/action-setup@v1 with: reviewdog_version: v0.20.3 - - name: Override Reviewdog settings if not a PR - id: verified_settings + - name: Normalize Reviewdog settings (push vs PR) + id: normalized_settings env: EVENT_NAME: ${{ github.event_name }} REPORTER: ${{ inputs.pr_reporter }} @@ -145,10 +127,8 @@ jobs: echo "Input reporter: $REPORTER" echo "Input filter mode: $FILTER_MODE" - if [[ "$EVENT_NAME" == "pull_request" || "$HAS_PR" == "true" ]]; then - # Keep PR-oriented settings - : - else + # Normalize settings so later steps do not need to branch on PR context + if [[ "$EVENT_NAME" != "pull_request" && "$HAS_PR" != "true" ]]; then echo "No PR context; using github-check reporter with nofilter" REPORTER=github-check FILTER_MODE=nofilter @@ -163,10 +143,10 @@ jobs: - name: Run Semgrep id: semgrep env: - SCAN_MODE: ${{ inputs.scan_mode }} - BASELINE: ${{ env.BASELINE }} + SCAN_MODE: ${{ inputs.scan_mode }} + BASELINE: ${{ env.BASELINE }} SEMGREP_RULES: ${{ inputs.semgrep_config }} - FAIL_LEVEL: ${{ inputs.fail_severity }} + FAIL_LEVEL: ${{ inputs.fail_severity }} run: | set -euo pipefail @@ -370,7 +350,6 @@ jobs: }); } - - name: Fail on findings at/above threshold if: | (inputs.fail_severity == 'error' && fromJSON(steps.semgrep_metrics.outputs.errors) > 0) || diff --git a/package-lock.json b/package-lock.json index 15a9728..5f5049e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,9 @@ "name": "core-github-actions", "version": "1.0.0", "license": "UNLICENSED", + "dependencies": { + "node-fetch": "2.7.0" + }, "devDependencies": { "@eslint/js": "9.39.2", "@eslint/json": "0.14.0", @@ -5175,6 +5178,26 @@ "dev": true, "license": "MIT" }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/node-int64": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", @@ -5956,6 +5979,12 @@ "node": ">=8.0" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -6167,6 +6196,22 @@ "makeerror": "1.0.12" } }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 8443723..716eec4 100644 --- a/package.json +++ b/package.json @@ -31,5 +31,8 @@ "format:fix": "prettier --write './*.js' './*.mjs' './*.json' './*.md' 'scripts/**/*.js' '.github/actions/**/*.*'", "scan": "semgrep --config=p/ci --config=p/security-audit --config=p/javascript ./*.js ./*.mjs ./*.json scripts/ .github/actions/", "ci": "npm run audit && npm run test && npm run lint:check && npm run format:check && npm run scan" + }, + "dependencies": { + "node-fetch": "2.7.0" } } diff --git a/scripts/internal-ci/get-version-tags/index.integration.test.js b/scripts/internal-ci/get-version-tags/index.integration.test.js index f6ac87d..368ff9e 100644 --- a/scripts/internal-ci/get-version-tags/index.integration.test.js +++ b/scripts/internal-ci/get-version-tags/index.integration.test.js @@ -1,4 +1,5 @@ const { versionLabelPrefix, untrackedLabel } = require('../validate-version-labels/.'); +const { parseGithubOutput } = require('../../util/test-helpers'); describe('get-version-tags main module integration', () => { const fs = require('fs'); @@ -8,16 +9,6 @@ describe('get-version-tags main module integration', () => { const scriptPath = path.resolve(__dirname, 'index.js'); const projectRoot = path.resolve(__dirname, '../../..'); - function parseGithubOutput(file) { - return Object.fromEntries( - fs - .readFileSync(file, 'utf8') - .split('\n') - .filter(Boolean) - .map(line => line.split(/=(.*)/).slice(0, 2)) - ); - } - it('outputs correct values when untracked label is provided', () => { const labelInput = `${untrackedLabel}`; const unique = Date.now() + Math.random(); diff --git a/scripts/internal-ci/validate-version-labels/index.integration.test.js b/scripts/internal-ci/validate-version-labels/index.integration.test.js index fc2c869..08cb8c0 100644 --- a/scripts/internal-ci/validate-version-labels/index.integration.test.js +++ b/scripts/internal-ci/validate-version-labels/index.integration.test.js @@ -1,4 +1,5 @@ const { versionLabelPrefix, untrackedLabel } = require('.'); +const { parseGithubOutput } = require('../../util/test-helpers'); describe('validate-version-labels main module integration', () => { const fs = require('fs'); @@ -8,16 +9,6 @@ describe('validate-version-labels main module integration', () => { const scriptPath = path.resolve(__dirname, 'index.js'); const projectRoot = path.resolve(__dirname, '../../..'); - function parseGithubOutput(file) { - return Object.fromEntries( - fs - .readFileSync(file, 'utf8') - .split('\n') - .filter(Boolean) - .map(line => line.split(/=(.*)/).slice(0, 2)) - ); - } - it('accepts stdin, succeeds with valid component version label, and writes correct env values', () => { const labelInput = `${versionLabelPrefix}actions/pr-open-check/1.0.0`; const outputFile = path.join(tmp, 'gha_output.txt'); diff --git a/scripts/shared/normalize-push-baseline.js b/scripts/shared/normalize-push-baseline.js new file mode 100644 index 0000000..c5e65db --- /dev/null +++ b/scripts/shared/normalize-push-baseline.js @@ -0,0 +1,120 @@ +#!/usr/bin/env node +/** + * Normalize the baseline ref for push events in GitHub Actions. + * + * If a push event is detected but an open PR exists for the branch, + * this script outputs the PR's base branch as the baseline. Otherwise, + * it uses the provided INPUT_BASELINE environment variable. + * + * Usage: node normalize-push-baseline.js + * + * Inputs (via environment variables): + * HAS_PR - 'true' if the current context has an associated PR + * INPUT_BASELINE - the input baseline ref + * GITHUB_EVENT_NAME - GitHub provided environment variable for the GitHub event name (e.g., 'push', 'pull_request') + * GITHUB_BASE_REF - GitHub provided environment variable for the base ref from GitHub event (if any) + * GITHUB_REF_NAME - GitHub provided environment variable for the branch name of the current ref + * GITHUB_TOKEN - GitHub provided environment variable for API access token + * GITHUB_REPOSITORY - GitHub provided environment variable for the repository in 'owner/repo' format + * GITHUB_OUTPUT - GitHub provided environment variable for step outputs file path + * + * Outputs the resolved baseline ref to GITHUB_OUTPUT for GitHub Actions. + */ + +const fetch = require('node-fetch'); + +async function getPrBaseBranch(owner, repo, branch, token) { + // Use GitHub API to find open PR for the branch and get its base branch + const url = `https://api.github.com/repos/${owner}/${repo}/pulls?state=open&head=${owner}:${branch}`; + const res = await fetch(url, { + headers: { + Authorization: `Bearer ${token}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'normalize-push-baseline-script', + }, + }); + if (!res.ok) return null; + const prs = await res.json(); + if (prs.length > 0 && prs[0].base && prs[0].base.ref) { + return prs[0].base.ref; + } + return null; +} + +function validateEnvVar(name) { + if (!process.env[name]) { + console.error(`::error::Environment variable ${name} is required`); + process.exit(1); + } +} + +async function normalizeBaseline({ + hasPr, + inputBaseline, + eventName, + ghBaseRef, + githubRefName, + githubToken, + repoFull, +}) { + const [owner, repo] = repoFull.split('/'); + let baseline = inputBaseline; + if (eventName === 'pull_request' || hasPr === 'true') { + let base = ghBaseRef; + if (!(eventName === 'pull_request' && ghBaseRef)) { + // Not a PR event or baseRef not set, try to resolve via API + base = await getPrBaseBranch(owner, repo, githubRefName, githubToken); + } + if (base) { + baseline = `origin/${base}`; + } else { + baseline = inputBaseline; + } + } + return baseline; +} + +if (require.main === module) { + validateEnvVar('HAS_PR'); + validateEnvVar('INPUT_BASELINE'); + validateEnvVar('GITHUB_EVENT_NAME'); + validateEnvVar('GITHUB_REF_NAME'); + validateEnvVar('GITHUB_TOKEN'); + validateEnvVar('GITHUB_REPOSITORY'); + + const hasPr = process.env.HAS_PR; + const inputBaseline = process.env.INPUT_BASELINE; + const eventName = process.env.GITHUB_EVENT_NAME; + const ghBaseRef = process.env.GITHUB_BASE_REF; + const githubRefName = process.env.GITHUB_REF_NAME; + const githubToken = process.env.GITHUB_TOKEN; + const repoFull = process.env.GITHUB_REPOSITORY; + + (async () => { + try { + const baseline = await normalizeBaseline({ + hasPr, + inputBaseline, + eventName, + ghBaseRef, + githubRefName, + githubToken, + repoFull, + }); + + // Output to GITHUB_OUTPUT for GitHub Actions step output + const githubOutput = process.env.GITHUB_OUTPUT; + if (githubOutput) { + const fs = require('fs'); + fs.appendFileSync(githubOutput, `baseline=${baseline}\n`); + } else { + console.log(baseline); + } + } catch (err) { + console.error(err.message || err); + process.exit(1); + } + })(); +} + +module.exports = { validateEnvVar, getPrBaseBranch, normalizeBaseline }; diff --git a/scripts/shared/normalize-push-baseline.test.js b/scripts/shared/normalize-push-baseline.test.js new file mode 100644 index 0000000..3fd5c23 --- /dev/null +++ b/scripts/shared/normalize-push-baseline.test.js @@ -0,0 +1,215 @@ +jest.mock('node-fetch'); +const fetch = require('node-fetch'); +const { validateEnvVar, getPrBaseBranch, normalizeBaseline } = require('./normalize-push-baseline'); +const { parseGithubOutput } = require('../util/test-helpers'); + +describe('getPrBaseBranch', () => { + const OWNER = 'test-owner'; + const REPO = 'test-repo'; + const BRANCH = 'feature-branch'; + const TOKEN = 'ghp_testtoken'; + + afterEach(() => { + fetch.mockClear(); + }); + + it('returns base branch when PR exists', async () => { + const mockResponse = [ + { + base: { ref: 'main' }, + }, + ]; + fetch.mockResolvedValue({ + ok: true, + json: async () => mockResponse, + }); + + const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); + expect(baseBranch).toBe('main'); + }); + + it('returns null when no PR exists', async () => { + fetch.mockResolvedValue({ + ok: true, + json: async () => [], + }); + + const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); + expect(baseBranch).toBeNull(); + }); + + it('returns null on fetch error', async () => { + fetch.mockResolvedValue({ + ok: false, + }); + + const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); + expect(baseBranch).toBeNull(); + }); +}); + +describe('normalizeBaseline', () => { + const REPO_FULL = 'test-owner/test-repo'; + const GITHUB_TOKEN = 'ghp_testtoken'; + + afterEach(() => { + fetch.mockClear(); + }); + + it('returns input baseline for non-PR event', async () => { + const baseline = await normalizeBaseline({ + hasPr: 'false', + inputBaseline: 'origin/main', + eventName: 'push', + ghBaseRef: '', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repoFull: REPO_FULL, + }); + expect(baseline).toBe('origin/main'); + }); + + it('returns origin/baseRef for PR event with baseRef', async () => { + const baseline = await normalizeBaseline({ + hasPr: 'true', + inputBaseline: 'origin/main', + eventName: 'pull_request', + ghBaseRef: 'develop', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repoFull: REPO_FULL, + }); + expect(baseline).toBe('origin/develop'); + }); + + it('fetches base branch when baseRef is not provided', async () => { + const mockResponse = [ + { + base: { ref: 'staging' }, + }, + ]; + fetch.mockResolvedValue({ + ok: true, + json: async () => mockResponse, + }); + + const baseline = await normalizeBaseline({ + hasPr: 'true', + inputBaseline: 'origin/main', + eventName: 'pull_request', + ghBaseRef: '', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repoFull: REPO_FULL, + }); + expect(baseline).toBe('origin/staging'); + }); + + it('falls back to input baseline when base branch cannot be fetched', async () => { + fetch.mockResolvedValue({ + ok: false, + }); + + const baseline = await normalizeBaseline({ + hasPr: 'true', + inputBaseline: 'origin/main', + eventName: 'pull_request', + ghBaseRef: '', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repoFull: REPO_FULL, + }); + expect(baseline).toBe('origin/main'); + }); +}); + +describe('validateEnvVar', () => { + const ORIGINAL_EXIT = process.exit; + const ORIGINAL_CONSOLE_ERROR = console.error; + + beforeEach(() => { + process.exit = jest.fn(); + console.error = jest.fn(); + }); + + afterEach(() => { + process.exit = ORIGINAL_EXIT; + console.error = ORIGINAL_CONSOLE_ERROR; + }); + + it('does not exit when env var is set', () => { + process.env.TEST_VAR = 'value'; + validateEnvVar('TEST_VAR'); + expect(process.exit).not.toHaveBeenCalled(); + expect(console.error).not.toHaveBeenCalled(); + delete process.env.TEST_VAR; + }); + + it('exits with error when env var is not set', () => { + delete process.env.TEST_VAR; + validateEnvVar('TEST_VAR'); + expect(console.error).toHaveBeenCalledWith( + '::error::Environment variable TEST_VAR is required' + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); +}); + +describe('normalize-push-baseline main module integration', () => { + const fs = require('fs'); + const path = require('path'); + const { spawnSync } = require('child_process'); + const tmp = require('os').tmpdir(); + const scriptPath = path.resolve(__dirname, 'normalize-push-baseline.js'); + const ORIGINAL_EXIT = process.exit; + const ORIGINAL_CONSOLE_ERROR = console.error; + const ORIGINAL_CONSOLE_LOG = console.log; + + beforeEach(() => { + process.exit = jest.fn(); + console.error = jest.fn(); + console.log = jest.fn(); + fetch.mockClear(); + }); + + afterEach(() => { + process.exit = ORIGINAL_EXIT; + console.error = ORIGINAL_CONSOLE_ERROR; + console.log = ORIGINAL_CONSOLE_LOG; + }); + + it('exits with error if required env vars are missing', async () => { + const result = spawnSync('node', [scriptPath], { + cwd: tmp, + env: { ...process.env }, + encoding: 'utf-8', + }); + + expect(result.status).not.toBe(0); + expect(result.stderr).toContain('::error::Environment variable'); + }); + + it('writes github action output with normalized baseline', async () => { + const outputFile = path.join(tmp, 'gha_output.txt'); + const result = spawnSync('node', [scriptPath], { + cwd: tmp, + env: { + ...process.env, + HAS_PR: 'true', + INPUT_BASELINE: 'origin/main', + GITHUB_EVENT_NAME: 'pull_request', + GITHUB_REF_NAME: 'feature-branch', + GITHUB_TOKEN: 'ghp_testtoken', + GITHUB_REPOSITORY: 'test-owner/test-repo', + GITHUB_BASE_REF: 'develop', + GITHUB_OUTPUT: outputFile, + }, + encoding: 'utf-8', + }); + + expect(result.status).toBe(0); + const outputs = parseGithubOutput(outputFile); + expect(outputs.baseline).toBe('origin/develop'); + fs.unlinkSync(outputFile); + }); +}); diff --git a/scripts/util/test-helpers.js b/scripts/util/test-helpers.js new file mode 100644 index 0000000..d920bb9 --- /dev/null +++ b/scripts/util/test-helpers.js @@ -0,0 +1,15 @@ +const fs = require('fs'); + +function parseGithubOutput(file) { + return Object.fromEntries( + fs + .readFileSync(file, 'utf8') + .split('\n') + .filter(Boolean) + .map(line => line.split(/=(.*)/).slice(0, 2)) + ); +} + +module.exports = { + parseGithubOutput, +}; From bb28a5cd98a02713d4bfed2c1a6ffbe799157608 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Mon, 22 Dec 2025 11:45:43 -0500 Subject: [PATCH 02/11] switch to standard gha runner instead of semgrep container --- .github/workflows/run_semgrep_scan.yml | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index e2617fb..29e7135 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -69,9 +69,6 @@ jobs: name: Run Semgrep runs-on: ubuntu-latest - container: - image: semgrep/semgrep:1.137.0 - outputs: total_findings: ${{ steps.semgrep_metrics.outputs.total }} error_count: ${{ steps.semgrep_metrics.outputs.errors }} @@ -92,12 +89,6 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} commit-identifier: ${{ inputs.commit_identifier }} - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version-file: .nvmrc - - name: Install dependencies run: npm ci --omit=dev @@ -109,6 +100,9 @@ jobs: HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} run: node scripts/shared/normalize-push-baseline.js + - name: Install Semgrep + run: python3 -m pip install semgrep + - name: Set up Reviewdog uses: reviewdog/action-setup@v1 with: @@ -137,9 +131,6 @@ jobs: echo "REPORTER=$REPORTER" >> "$GITHUB_OUTPUT" echo "FILTER_MODE=$FILTER_MODE" >> "$GITHUB_OUTPUT" - - name: Install jq - run: apk add --no-cache jq - - name: Run Semgrep id: semgrep env: From 4792da82522e9a96f451cb282b3bb4aa69c75bb2 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Mon, 22 Dec 2025 11:47:37 -0500 Subject: [PATCH 03/11] fixes for normalize-push-baseline script --- .github/actions/pr-open-check/action.yml | 2 ++ .github/workflows/run_semgrep_scan.yml | 36 +++++++++++++----------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/.github/actions/pr-open-check/action.yml b/.github/actions/pr-open-check/action.yml index ffabc38..d754e7a 100644 --- a/.github/actions/pr-open-check/action.yml +++ b/.github/actions/pr-open-check/action.yml @@ -57,10 +57,12 @@ runs: pr_url=$(jq -r '[.[] | select(.state == "open")][0].html_url // empty' <<<"$pr_json") if [[ -n "$pr_number" ]]; then + echo "Found open PR #$pr_number for commit '$COMMIT'" echo "pr_exists=true" >> "$GITHUB_OUTPUT" echo "pr_number=$pr_number" >> "$GITHUB_OUTPUT" echo "pr_url=$pr_url" >> "$GITHUB_OUTPUT" else + echo "No open PR found for commit '$COMMIT'" echo "pr_exists=false" >> "$GITHUB_OUTPUT" echo "pr_number=" >> "$GITHUB_OUTPUT" echo "pr_url=" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index 29e7135..f0618ff 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -89,6 +89,7 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} commit-identifier: ${{ inputs.commit_identifier }} + - name: Install dependencies run: npm ci --omit=dev @@ -98,12 +99,13 @@ jobs: env: INPUT_BASELINE: ${{ inputs.baseline_ref }} HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: node scripts/shared/normalize-push-baseline.js - name: Install Semgrep run: python3 -m pip install semgrep - - name: Set up Reviewdog + - name: Set up Reviewdogß uses: reviewdog/action-setup@v1 with: reviewdog_version: v0.20.3 @@ -117,25 +119,25 @@ jobs: HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} run: | set -Eeuo pipefail - echo "event=$EVENT_NAME, has_pr=$HAS_PR" - echo "Input reporter: $REPORTER" - echo "Input filter mode: $FILTER_MODE" + echo "Inputs: event=$EVENT_NAME, has_pr=$HAS_PR" + echo "- reporter: $REPORTER" + echo "- filter mode: $FILTER_MODE" # Normalize settings so later steps do not need to branch on PR context if [[ "$EVENT_NAME" != "pull_request" && "$HAS_PR" != "true" ]]; then - echo "No PR context; using github-check reporter with nofilter" - REPORTER=github-check - FILTER_MODE=nofilter + REPORTER="github-check" + FILTER_MODE="nofilter" + echo -e "\nNo PR context; using github-check reporter with nofilter" fi - echo "REPORTER=$REPORTER" >> "$GITHUB_OUTPUT" - echo "FILTER_MODE=$FILTER_MODE" >> "$GITHUB_OUTPUT" + echo "reporter=$REPORTER" >> "$GITHUB_OUTPUT" + echo "filterMode=$FILTER_MODE" >> "$GITHUB_OUTPUT" - name: Run Semgrep id: semgrep env: SCAN_MODE: ${{ inputs.scan_mode }} - BASELINE: ${{ env.BASELINE }} + BASELINE: ${{ steps.normalize_baseline.outputs.normalized_baseline }} SEMGREP_RULES: ${{ inputs.semgrep_config }} FAIL_LEVEL: ${{ inputs.fail_severity }} run: | @@ -195,10 +197,10 @@ jobs: INFO: ${{ steps.semgrep_metrics.outputs.info }} SEMGREP_RULES: ${{ inputs.semgrep_config }} SCAN_MODE: ${{ inputs.scan_mode }} - BASELINE: ${{ env.BASELINE }} + BASELINE: ${{ steps.normalize_baseline.outputs.normalized_baseline }} FAIL_LEVEL: ${{ inputs.fail_severity }} - FILTER_MODE: ${{ steps.verified_settings.outputs.FILTER_MODE }} - REVIEWDOG_REPORTER: ${{ steps.verified_settings.outputs.REPORTER }} + FILTER_MODE: ${{ steps.normalized_settings.outputs.filterMode }} + REVIEWDOG_REPORTER: ${{ steps.normalized_settings.outputs.reporter }} HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} PR_NUMBER: ${{ steps.pr_check.outputs.pr_number }} PR_URL: ${{ steps.pr_check.outputs.pr_url }} @@ -244,8 +246,8 @@ jobs: if: ${{ steps.semgrep_metrics.outputs.total != '0' }} env: REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REVIEWDOG_REPORTER: ${{ steps.verified_settings.outputs.REPORTER }} - REVIEWDOG_FILTER_MODE: ${{ steps.verified_settings.outputs.FILTER_MODE }} + REVIEWDOG_REPORTER: ${{ steps.normalized_settings.outputs.reporter }} + REVIEWDOG_FILTER_MODE: ${{ steps.normalized_settings.outputs.filterMode }} REVIEWDOG_FAIL_LEVEL: ${{ inputs.fail_severity }} run: | cat reviewdog_input.txt | reviewdog \ @@ -273,8 +275,8 @@ jobs: const scanMode = "${{ inputs.scan_mode }}"; const rules = "${{ inputs.semgrep_config }}"; const failLevel = "${{ inputs.fail_severity }}"; - const filterMode = "${{ steps.verified_settings.outputs.FILTER_MODE }}"; - const reporter = "${{ steps.verified_settings.outputs.REPORTER }}"; + const filterMode = "${{ steps.normalized_settings.outputs.filterMode }}"; + const reporter = "${{ steps.normalized_settings.outputs.reporter }}"; let emoji = "✅"; let status = "passed"; From 24ea42ea16d13bd1c71d9b49bf12572b2d6b9289 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Mon, 22 Dec 2025 14:28:25 -0500 Subject: [PATCH 04/11] update semgrep scan to handle multiple configs and targets --- .github/workflows/internal_on_push_ci.yml | 9 ++-- .github/workflows/run_semgrep_scan.yml | 58 +++++++++++++++-------- .vscode/settings.json | 2 +- scripts/shared/normalize-push-baseline.js | 3 +- 4 files changed, 46 insertions(+), 26 deletions(-) diff --git a/.github/workflows/internal_on_push_ci.yml b/.github/workflows/internal_on_push_ci.yml index 614fe8f..3870b67 100644 --- a/.github/workflows/internal_on_push_ci.yml +++ b/.github/workflows/internal_on_push_ci.yml @@ -45,8 +45,9 @@ jobs: with: commit_identifier: ${{ github.sha }} cancel_in_progress: true - semgrep_config: 'p/default' + semgrep_config: 'p/ci p/security-audit p/javascript' + semgrep_targets: './*.js ./*.mjs ./*.json scripts/ .github/actions/' ## only scanning recently changed files, eventually should cover whole .github/actions/ folder fail_severity: 'error' - scan_mode: 'diff' - pr_filter_mode: 'added' - pr_reporter: 'github-pr-review' + semgrep_scan_mode: 'diff' + reviewdog_filter_mode: 'added' + reviewdog_reporter: 'github-pr-review' diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index f0618ff..2c6e131 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -16,27 +16,31 @@ on: type: boolean default: true semgrep_config: - description: 'Rulesets to run with Semgrep' + description: 'Rulesets to run with Semgrep (space or newline separated)' type: string default: 'p/default' + semgrep_targets: + description: 'Files or directories to scan (space or newline separated; default: entire repo)' + type: string + default: '' fail_severity: description: 'error | warning | info' type: string default: 'error' - scan_mode: + semgrep_scan_mode: description: What should Semgrep scan? "full | diff | baseline" type: string default: 'full' - pr_filter_mode: - description: What should reviewdog display (does NOT change what Semgrep scans)? "added | diff_context | nofilter" - type: string - default: 'added' baseline_ref: description: 'Ref for diff/baseline (e.g., origin/main)' type: string default: 'origin/main' - pr_reporter: - description: 'review output: github-pr-review | github-pr-check' + reviewdog_filter_mode: + description: What should reviewdog display (does not change what Semgrep scans)? "added | diff_context | nofilter" + type: string + default: 'nofilter' + reviewdog_reporter: + description: 'review output: github-pr-review | github-pr-check (only applies if PR context exists, on push github-check is used)' type: string default: 'github-pr-review' outputs: @@ -81,7 +85,7 @@ jobs: with: ref: ${{ inputs.commit_identifier }} # Full history only when diff/baseline is requested - fetch-depth: ${{ inputs.scan_mode == 'full' && '1' || '0' }} + fetch-depth: ${{ inputs.semgrep_scan_mode == 'full' && '1' || '0' }} - name: Check for open PR (by commit) id: pr_check @@ -94,7 +98,7 @@ jobs: run: npm ci --omit=dev - name: Normalize diff/baseline (push vs PR) - if: ${{ inputs.scan_mode != 'full' }} + if: ${{ inputs.semgrep_scan_mode != 'full' }} id: normalize_baseline env: INPUT_BASELINE: ${{ inputs.baseline_ref }} @@ -105,7 +109,7 @@ jobs: - name: Install Semgrep run: python3 -m pip install semgrep - - name: Set up Reviewdogß + - name: Set up Reviewdog uses: reviewdog/action-setup@v1 with: reviewdog_version: v0.20.3 @@ -114,8 +118,8 @@ jobs: id: normalized_settings env: EVENT_NAME: ${{ github.event_name }} - REPORTER: ${{ inputs.pr_reporter }} - FILTER_MODE: ${{ inputs.pr_filter_mode }} + REPORTER: ${{ inputs.reviewdog_reporter }} + FILTER_MODE: ${{ inputs.reviewdog_filter_mode }} HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} run: | set -Eeuo pipefail @@ -136,24 +140,40 @@ jobs: - name: Run Semgrep id: semgrep env: - SCAN_MODE: ${{ inputs.scan_mode }} + SCAN_MODE: ${{ inputs.semgrep_scan_mode }} BASELINE: ${{ steps.normalize_baseline.outputs.normalized_baseline }} - SEMGREP_RULES: ${{ inputs.semgrep_config }} + SEMGREP_CONFIG: ${{ inputs.semgrep_config }} + SEMGREP_TARGETS: ${{ inputs.semgrep_targets }} FAIL_LEVEL: ${{ inputs.fail_severity }} run: | set -euo pipefail - echo "▶️ Semgrep rules: ${SEMGREP_RULES}" + echo "▶️ Semgrep config: ${SEMGREP_CONFIG}" + echo "▶️ Semgrep targets: ${SEMGREP_TARGETS}" echo "🔎 Scan mode: ${SCAN_MODE}" [ "${SCAN_MODE}" = "full" ] && echo "⚠️ Full scan may be slow for large repositories" [ "${SCAN_MODE}" != "full" ] && echo "📍 Baseline commit: ${BASELINE}" echo "⛔ Fail level: ${FAIL_LEVEL}" - CMD=(semgrep --config="${SEMGREP_RULES}" --severity="${FAIL_LEVEL^^}" --json) + # Build config args + CONFIG_ARGS=() + while read -r config; do + [ -n "$config" ] && CONFIG_ARGS+=(--config="$config") + done <<< "${SEMGREP_CONFIG}" + + # Build target args + TARGET_ARGS=() + while read -r target; do + [ -n "$target" ] && TARGET_ARGS+=("$target") + done <<< "${SEMGREP_TARGETS}" + + CMD=(semgrep "${CONFIG_ARGS[@]}" --severity="${FAIL_LEVEL^^}" --json) if [ "${SCAN_MODE}" = "diff" ] || [ "${SCAN_MODE}" = "baseline" ]; then CMD+=(--baseline-commit="${BASELINE}") fi + CMD+=("${TARGET_ARGS[@]}") + echo "Running: ${CMD[*]}" # Execute; don't fail the step on Semgrep exit (we compute status ourselves) if ! "${CMD[@]}" > semgrep.json; then echo "Semgrep exited non-zero; proceeding with whatever results are available." @@ -196,7 +216,7 @@ jobs: WARN: ${{ steps.semgrep_metrics.outputs.warnings }} INFO: ${{ steps.semgrep_metrics.outputs.info }} SEMGREP_RULES: ${{ inputs.semgrep_config }} - SCAN_MODE: ${{ inputs.scan_mode }} + SCAN_MODE: ${{ inputs.semgrep_scan_mode }} BASELINE: ${{ steps.normalize_baseline.outputs.normalized_baseline }} FAIL_LEVEL: ${{ inputs.fail_severity }} FILTER_MODE: ${{ steps.normalized_settings.outputs.filterMode }} @@ -272,7 +292,7 @@ jobs: const warns = Number("${{ steps.semgrep_metrics.outputs.warnings || '0' }}"); const info = Number("${{ steps.semgrep_metrics.outputs.info || '0' }}"); - const scanMode = "${{ inputs.scan_mode }}"; + const scanMode = "${{ inputs.semgrep_scan_mode }}"; const rules = "${{ inputs.semgrep_config }}"; const failLevel = "${{ inputs.fail_severity }}"; const filterMode = "${{ steps.normalized_settings.outputs.filterMode }}"; diff --git a/.vscode/settings.json b/.vscode/settings.json index 7f3605b..dc3cf2e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -43,5 +43,5 @@ // Optional: Spell checker "cSpell.enabled": true, - "cSpell.words": ["opensesame", "semgrep"] + "cSpell.words": ["opensesame", "reviewdog", "semgrep"] } diff --git a/scripts/shared/normalize-push-baseline.js b/scripts/shared/normalize-push-baseline.js index c5e65db..cd32ae4 100644 --- a/scripts/shared/normalize-push-baseline.js +++ b/scripts/shared/normalize-push-baseline.js @@ -107,9 +107,8 @@ if (require.main === module) { if (githubOutput) { const fs = require('fs'); fs.appendFileSync(githubOutput, `baseline=${baseline}\n`); - } else { - console.log(baseline); } + console.log(`Normalized baseline: ${baseline}`); } catch (err) { console.error(err.message || err); process.exit(1); From 0ad66c3d3f87de3a42756e39b3fe3f7e4ca48377 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Mon, 22 Dec 2025 14:45:58 -0500 Subject: [PATCH 05/11] additional inputs for flexibility --- .github/workflows/run_semgrep_scan.yml | 45 +++++++++++++++++++------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index 2c6e131..df15584 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -16,11 +16,19 @@ on: type: boolean default: true semgrep_config: - description: 'Rulesets to run with Semgrep (space or newline separated)' + description: Semgrep rulesets to use. Accepts a YAML array, newline or space-separated list. Default is "p/default" type: string - default: 'p/default' + default: p/default semgrep_targets: - description: 'Files or directories to scan (space or newline separated; default: entire repo)' + description: les/directories to scan. Accepts a YAML array, newline or space-separated list. Default is current directory (`.`) + type: string + default: . + extra_args: + description: 'Additional arguments to pass to Semgrep (e.g., --exclude, --timeout). Optional.' + type: string + default: '' + semgrep_version: + description: 'Semgrep version to install. Default: latest.' type: string default: '' fail_severity: @@ -107,7 +115,14 @@ jobs: run: node scripts/shared/normalize-push-baseline.js - name: Install Semgrep - run: python3 -m pip install semgrep + env: + SEMGREP_VERSION: ${{ inputs.semgrep_version }} + run: | + if [ -n "$SEMGREP_VERSION" ]; then + python3 -m pip install semgrep==$SEMGREP_VERSION + else + python3 -m pip install semgrep + fi - name: Set up Reviewdog uses: reviewdog/action-setup@v1 @@ -150,26 +165,34 @@ jobs: echo "▶️ Semgrep config: ${SEMGREP_CONFIG}" echo "▶️ Semgrep targets: ${SEMGREP_TARGETS}" + echo "▶️ Extra args: ${EXTRA_ARGS}" echo "🔎 Scan mode: ${SCAN_MODE}" [ "${SCAN_MODE}" = "full" ] && echo "⚠️ Full scan may be slow for large repositories" [ "${SCAN_MODE}" != "full" ] && echo "📍 Baseline commit: ${BASELINE}" echo "⛔ Fail level: ${FAIL_LEVEL}" - # Build config args + # Parse configs as YAML array, newline, or space separated + parse_list() { + python3 -c 'import sys, yaml; print("\n".join(yaml.safe_load(sys.stdin)))' 2>/dev/null || cat + } + CONFIG_ARGS=() - while read -r config; do + echo "$SEMGREP_CONFIG" | parse_list | while read -r config; do [ -n "$config" ] && CONFIG_ARGS+=(--config="$config") - done <<< "${SEMGREP_CONFIG}" + done - # Build target args TARGET_ARGS=() - while read -r target; do + echo "$SEMGREP_TARGETS" | parse_list | while read -r target; do [ -n "$target" ] && TARGET_ARGS+=("$target") - done <<< "${SEMGREP_TARGETS}" + done CMD=(semgrep "${CONFIG_ARGS[@]}" --severity="${FAIL_LEVEL^^}" --json) if [ "${SCAN_MODE}" = "diff" ] || [ "${SCAN_MODE}" = "baseline" ]; then - CMD+=(--baseline-commit="${BASELINE}") + CMD+=(--baseline-commit="$BASELINE") + fi + if [ -n "$EXTRA_ARGS" ]; then + # shellcheck disable=SC2206 + CMD+=( $EXTRA_ARGS ) fi CMD+=("${TARGET_ARGS[@]}") From 7f1c3f27cce899e1b029f52ed47e8716acac3c1d Mon Sep 17 00:00:00 2001 From: ssvoss Date: Tue, 23 Dec 2025 09:06:41 -0500 Subject: [PATCH 06/11] consolidate run semgrep into external script file --- .github/workflows/run_semgrep_scan.yml | 210 ++++------- scripts/shared/normalize-push-baseline.js | 119 ------ .../shared/normalize-push-baseline.test.js | 215 ----------- scripts/shared/run-semgrep.js | 343 +++++++++++++++++ scripts/shared/run-semgrep.unit.test.js | 347 ++++++++++++++++++ scripts/util/env-helpers.js | 10 + scripts/util/env-helpers.unit.test.js | 33 ++ 7 files changed, 796 insertions(+), 481 deletions(-) delete mode 100644 scripts/shared/normalize-push-baseline.js delete mode 100644 scripts/shared/normalize-push-baseline.test.js create mode 100644 scripts/shared/run-semgrep.js create mode 100644 scripts/shared/run-semgrep.unit.test.js create mode 100644 scripts/util/env-helpers.js create mode 100644 scripts/util/env-helpers.unit.test.js diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index df15584..3b48736 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -1,4 +1,4 @@ -name: Semgrep CE Scan +name: Semgrep Scan defaults: run: @@ -82,10 +82,13 @@ jobs: runs-on: ubuntu-latest outputs: - total_findings: ${{ steps.semgrep_metrics.outputs.total }} - error_count: ${{ steps.semgrep_metrics.outputs.errors }} - warning_count: ${{ steps.semgrep_metrics.outputs.warnings }} - info_count: ${{ steps.semgrep_metrics.outputs.info }} + total_findings: ${{ steps.semgrep.outputs.totalFindings }} + error_count: ${{ steps.semgrep.outputs.numErrors }} + warning_count: ${{ steps.semgrep.outputs.numWarnings }} + info_count: ${{ steps.semgrep.outputs.numInfo }} + scan_status: ${{ steps.semgrep.outputs.scanStatus }} + scan_md_summary: ${{ steps.semgrep.outputs.scanFindings }} + normalized_baseline: ${{ steps.semgrep.outputs.normalizedBaseline }} steps: - name: Checkout code @@ -102,18 +105,14 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} commit-identifier: ${{ inputs.commit_identifier }} + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version-file: .nvmrc + - name: Install dependencies run: npm ci --omit=dev - - name: Normalize diff/baseline (push vs PR) - if: ${{ inputs.semgrep_scan_mode != 'full' }} - id: normalize_baseline - env: - INPUT_BASELINE: ${{ inputs.baseline_ref }} - HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: node scripts/shared/normalize-push-baseline.js - - name: Install Semgrep env: SEMGREP_VERSION: ${{ inputs.semgrep_version }} @@ -124,13 +123,37 @@ jobs: python3 -m pip install semgrep fi + - name: Run Semgrep + id: semgrep + env: + INPUT_BASELINE: ${{ inputs.baseline_ref }} + HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SCAN_MODE: ${{ inputs.semgrep_scan_mode }} + SEMGREP_CONFIG: ${{ inputs.semgrep_config }} + SEMGREP_TARGETS: ${{ inputs.semgrep_targets }} + FAIL_LEVEL: ${{ inputs.fail_severity }} + EXTRA_ARGS: ${{ inputs.extra_args }} + run: node scripts/shared/run-semgrep.js + + - name: Upload Semgrep JSON artifact + if: ${{ steps.semgrep.outputs.totalFindings > 0 }} + uses: actions/upload-artifact@v4 + with: + name: semgrep-${{ github.run_id }}.json + path: semgrep_results.json + if-no-files-found: error + retention-days: 7 + - name: Set up Reviewdog + if: ${{ steps.semgrep.outputs.totalFindings > 0 }} uses: reviewdog/action-setup@v1 with: reviewdog_version: v0.20.3 - name: Normalize Reviewdog settings (push vs PR) id: normalized_settings + if: ${{ steps.semgrep.outputs.totalFindings > 0 }} env: EVENT_NAME: ${{ github.event_name }} REPORTER: ${{ inputs.reviewdog_reporter }} @@ -152,101 +175,30 @@ jobs: echo "reporter=$REPORTER" >> "$GITHUB_OUTPUT" echo "filterMode=$FILTER_MODE" >> "$GITHUB_OUTPUT" - - name: Run Semgrep - id: semgrep + - name: Reviewdog report + if: ${{ steps.semgrep.outputs.totalFindings > 0 }} env: - SCAN_MODE: ${{ inputs.semgrep_scan_mode }} - BASELINE: ${{ steps.normalize_baseline.outputs.normalized_baseline }} - SEMGREP_CONFIG: ${{ inputs.semgrep_config }} - SEMGREP_TARGETS: ${{ inputs.semgrep_targets }} - FAIL_LEVEL: ${{ inputs.fail_severity }} - run: | - set -euo pipefail - - echo "▶️ Semgrep config: ${SEMGREP_CONFIG}" - echo "▶️ Semgrep targets: ${SEMGREP_TARGETS}" - echo "▶️ Extra args: ${EXTRA_ARGS}" - echo "🔎 Scan mode: ${SCAN_MODE}" - [ "${SCAN_MODE}" = "full" ] && echo "⚠️ Full scan may be slow for large repositories" - [ "${SCAN_MODE}" != "full" ] && echo "📍 Baseline commit: ${BASELINE}" - echo "⛔ Fail level: ${FAIL_LEVEL}" - - # Parse configs as YAML array, newline, or space separated - parse_list() { - python3 -c 'import sys, yaml; print("\n".join(yaml.safe_load(sys.stdin)))' 2>/dev/null || cat - } - - CONFIG_ARGS=() - echo "$SEMGREP_CONFIG" | parse_list | while read -r config; do - [ -n "$config" ] && CONFIG_ARGS+=(--config="$config") - done - - TARGET_ARGS=() - echo "$SEMGREP_TARGETS" | parse_list | while read -r target; do - [ -n "$target" ] && TARGET_ARGS+=("$target") - done - - CMD=(semgrep "${CONFIG_ARGS[@]}" --severity="${FAIL_LEVEL^^}" --json) - if [ "${SCAN_MODE}" = "diff" ] || [ "${SCAN_MODE}" = "baseline" ]; then - CMD+=(--baseline-commit="$BASELINE") - fi - if [ -n "$EXTRA_ARGS" ]; then - # shellcheck disable=SC2206 - CMD+=( $EXTRA_ARGS ) - fi - CMD+=("${TARGET_ARGS[@]}") - - echo "Running: ${CMD[*]}" - # Execute; don't fail the step on Semgrep exit (we compute status ourselves) - if ! "${CMD[@]}" > semgrep.json; then - echo "Semgrep exited non-zero; proceeding with whatever results are available." - fi - test -s semgrep.json || echo '{"results":[]}' > semgrep.json - echo "Semgrep JSON size: $(wc -c < semgrep.json) bytes" - - - name: Extract Semgrep metrics - id: semgrep_metrics + REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REVIEWDOG_REPORTER: ${{ steps.normalized_settings.outputs.reporter }} + REVIEWDOG_FILTER_MODE: ${{ steps.normalized_settings.outputs.filterMode }} + REVIEWDOG_FAIL_LEVEL: ${{ inputs.fail_severity }} run: | - set -euo pipefail - # Safety: empty file → empty results array - test -s semgrep.json || echo '{"results":[]}' > semgrep.json - - TOTAL=$(jq '.results | length' semgrep.json) - ERR=$(jq '[.results[] | select(.extra.severity == "ERROR")] | length' semgrep.json) - WARN=$(jq '[.results[] | select(.extra.severity == "WARNING")] | length' semgrep.json) - INFO=$(jq '[.results[] | select(.extra.severity == "INFO")] | length' semgrep.json) - - { - echo "total=${TOTAL}" - echo "errors=${ERR}" - echo "warnings=${WARN}" - echo "info=${INFO}" - } >> "$GITHUB_OUTPUT" - - - name: Upload Semgrep JSON artifact - if: ${{ fromJSON(steps.semgrep_metrics.outputs.total) > 0 }} - uses: actions/upload-artifact@v4 - with: - name: semgrep-${{ github.run_id }}.json - path: semgrep.json - if-no-files-found: error - retention-days: 7 + cat reviewdog_input.txt | reviewdog \ + -efm="%t:%f:%l %m" \ + -name="semgrep" \ + -reporter="$REVIEWDOG_REPORTER" \ + -filter-mode="$REVIEWDOG_FILTER_MODE" \ + -fail-level="$REVIEWDOG_FAIL_LEVEL" - name: Job summary env: - TOTAL: ${{ steps.semgrep_metrics.outputs.total }} - ERR: ${{ steps.semgrep_metrics.outputs.errors }} - WARN: ${{ steps.semgrep_metrics.outputs.warnings }} - INFO: ${{ steps.semgrep_metrics.outputs.info }} - SEMGREP_RULES: ${{ inputs.semgrep_config }} - SCAN_MODE: ${{ inputs.semgrep_scan_mode }} - BASELINE: ${{ steps.normalize_baseline.outputs.normalized_baseline }} - FAIL_LEVEL: ${{ inputs.fail_severity }} - FILTER_MODE: ${{ steps.normalized_settings.outputs.filterMode }} - REVIEWDOG_REPORTER: ${{ steps.normalized_settings.outputs.reporter }} HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} PR_NUMBER: ${{ steps.pr_check.outputs.pr_number }} PR_URL: ${{ steps.pr_check.outputs.pr_url }} + SCAN_SUMMARY: ${{ steps.semgrep.outputs.scanSummary }} + CONFIG_SUMMARY: ${{ steps.semgrep.outputs.configSummary }} + FILTER_MODE: ${{ steps.normalized_settings.outputs.filterMode }} + REVIEWDOG_REPORTER: ${{ steps.normalized_settings.outputs.reporter }} run: | { echo "## 🔎 Semgrep Summary" @@ -259,46 +211,13 @@ jobs: echo "- **PR context**: None for this push; PR-only input settings were overridden." fi fi + } >> $GITHUB_STEP_SUMMARY - echo "" - echo "| Total | Errors | Warnings | Info |" - echo "|------:|------:|--------:|----:|" - echo "| ${TOTAL:-0} | ${ERR:-0} | ${WARN:-0} | ${INFO:-0} |" - echo "" - echo "- **Rules**: \`${SEMGREP_RULES}\`" - echo "- **Scan mode**: \`${SCAN_MODE}\`" - if [ "${SCAN_MODE}" != "full" ]; then - echo "- **Baseline**: \`${BASELINE}\`" - else - echo "- **Baseline**: \`n/a\`" - fi - echo "- **Fail level**: \`${FAIL_LEVEL}\`" - echo "- **Review filter**: \`${FILTER_MODE}\`" - echo "- **Reviewdog reporter**: \`${REVIEWDOG_REPORTER}\`" - } >> "$GITHUB_STEP_SUMMARY" - - - name: Prepare reviewdog input - id: semgrep_reviewdog_input - if: ${{ steps.semgrep_metrics.outputs.total != '0' }} - run: | - jq -r '.results[] | "\(.extra.severity[0:1]):\(.path):\(.end.line) \(.extra.message)"' \ - semgrep.json > reviewdog_input.txt || : - # If jq finds no results, file will be empty; that's fine. + echo -e "${SCAN_SUMMARY}" >> $GITHUB_STEP_SUMMARY + echo -e "${CONFIG_SUMMARY}" >> $GITHUB_STEP_SUMMARY - - name: Reviewdog report - if: ${{ steps.semgrep_metrics.outputs.total != '0' }} - env: - REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REVIEWDOG_REPORTER: ${{ steps.normalized_settings.outputs.reporter }} - REVIEWDOG_FILTER_MODE: ${{ steps.normalized_settings.outputs.filterMode }} - REVIEWDOG_FAIL_LEVEL: ${{ inputs.fail_severity }} - run: | - cat reviewdog_input.txt | reviewdog \ - -efm="%t:%f:%l %m" \ - -name="semgrep" \ - -reporter="$REVIEWDOG_REPORTER" \ - -filter-mode="$REVIEWDOG_FILTER_MODE" \ - -fail-level="$REVIEWDOG_FAIL_LEVEL" + echo "- **Review filter**: \`${FILTER_MODE}\`" + echo "- **Reviewdog reporter**: \`${REVIEWDOG_REPORTER}\`" - name: Upsert PR summary comment if: ${{ github.event_name == 'pull_request' || steps.pr_check.outputs.pr_exists == 'true' }} @@ -310,10 +229,10 @@ jobs: script: | const prNumber = context.payload.pull_request?.number ?? process.env.PR_NUMBER; - const total = Number("${{ steps.semgrep_metrics.outputs.total || '0' }}"); - const errors = Number("${{ steps.semgrep_metrics.outputs.errors || '0' }}"); - const warns = Number("${{ steps.semgrep_metrics.outputs.warnings || '0' }}"); - const info = Number("${{ steps.semgrep_metrics.outputs.info || '0' }}"); + const total = Number("${{ steps.semgrep.outputs.totalFindings || '0' }}"); + const errors = Number("${{ steps.semgrep.outputs.numErrors || '0' }}"); + const warns = Number("${{ steps.semgrep.outputs.numWarnings || '0' }}"); + const info = Number("${{ steps.semgrep.outputs.numInfo || '0' }}"); const scanMode = "${{ inputs.semgrep_scan_mode }}"; const rules = "${{ inputs.semgrep_config }}"; @@ -387,10 +306,7 @@ jobs: } - name: Fail on findings at/above threshold - if: | - (inputs.fail_severity == 'error' && fromJSON(steps.semgrep_metrics.outputs.errors) > 0) || - (inputs.fail_severity == 'warning' && (fromJSON(steps.semgrep_metrics.outputs.errors) > 0 || fromJSON(steps.semgrep_metrics.outputs.warnings) > 0)) || - (inputs.fail_severity == 'info' && fromJSON(steps.semgrep_metrics.outputs.total) > 0) + if: ${{ steps.semgrep.outputs.scanStatus == 'failed' }} env: FAIL_SEVERITY: ${{ inputs.fail_severity }} run: | diff --git a/scripts/shared/normalize-push-baseline.js b/scripts/shared/normalize-push-baseline.js deleted file mode 100644 index cd32ae4..0000000 --- a/scripts/shared/normalize-push-baseline.js +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env node -/** - * Normalize the baseline ref for push events in GitHub Actions. - * - * If a push event is detected but an open PR exists for the branch, - * this script outputs the PR's base branch as the baseline. Otherwise, - * it uses the provided INPUT_BASELINE environment variable. - * - * Usage: node normalize-push-baseline.js - * - * Inputs (via environment variables): - * HAS_PR - 'true' if the current context has an associated PR - * INPUT_BASELINE - the input baseline ref - * GITHUB_EVENT_NAME - GitHub provided environment variable for the GitHub event name (e.g., 'push', 'pull_request') - * GITHUB_BASE_REF - GitHub provided environment variable for the base ref from GitHub event (if any) - * GITHUB_REF_NAME - GitHub provided environment variable for the branch name of the current ref - * GITHUB_TOKEN - GitHub provided environment variable for API access token - * GITHUB_REPOSITORY - GitHub provided environment variable for the repository in 'owner/repo' format - * GITHUB_OUTPUT - GitHub provided environment variable for step outputs file path - * - * Outputs the resolved baseline ref to GITHUB_OUTPUT for GitHub Actions. - */ - -const fetch = require('node-fetch'); - -async function getPrBaseBranch(owner, repo, branch, token) { - // Use GitHub API to find open PR for the branch and get its base branch - const url = `https://api.github.com/repos/${owner}/${repo}/pulls?state=open&head=${owner}:${branch}`; - const res = await fetch(url, { - headers: { - Authorization: `Bearer ${token}`, - Accept: 'application/vnd.github.v3+json', - 'User-Agent': 'normalize-push-baseline-script', - }, - }); - if (!res.ok) return null; - const prs = await res.json(); - if (prs.length > 0 && prs[0].base && prs[0].base.ref) { - return prs[0].base.ref; - } - return null; -} - -function validateEnvVar(name) { - if (!process.env[name]) { - console.error(`::error::Environment variable ${name} is required`); - process.exit(1); - } -} - -async function normalizeBaseline({ - hasPr, - inputBaseline, - eventName, - ghBaseRef, - githubRefName, - githubToken, - repoFull, -}) { - const [owner, repo] = repoFull.split('/'); - let baseline = inputBaseline; - if (eventName === 'pull_request' || hasPr === 'true') { - let base = ghBaseRef; - if (!(eventName === 'pull_request' && ghBaseRef)) { - // Not a PR event or baseRef not set, try to resolve via API - base = await getPrBaseBranch(owner, repo, githubRefName, githubToken); - } - if (base) { - baseline = `origin/${base}`; - } else { - baseline = inputBaseline; - } - } - return baseline; -} - -if (require.main === module) { - validateEnvVar('HAS_PR'); - validateEnvVar('INPUT_BASELINE'); - validateEnvVar('GITHUB_EVENT_NAME'); - validateEnvVar('GITHUB_REF_NAME'); - validateEnvVar('GITHUB_TOKEN'); - validateEnvVar('GITHUB_REPOSITORY'); - - const hasPr = process.env.HAS_PR; - const inputBaseline = process.env.INPUT_BASELINE; - const eventName = process.env.GITHUB_EVENT_NAME; - const ghBaseRef = process.env.GITHUB_BASE_REF; - const githubRefName = process.env.GITHUB_REF_NAME; - const githubToken = process.env.GITHUB_TOKEN; - const repoFull = process.env.GITHUB_REPOSITORY; - - (async () => { - try { - const baseline = await normalizeBaseline({ - hasPr, - inputBaseline, - eventName, - ghBaseRef, - githubRefName, - githubToken, - repoFull, - }); - - // Output to GITHUB_OUTPUT for GitHub Actions step output - const githubOutput = process.env.GITHUB_OUTPUT; - if (githubOutput) { - const fs = require('fs'); - fs.appendFileSync(githubOutput, `baseline=${baseline}\n`); - } - console.log(`Normalized baseline: ${baseline}`); - } catch (err) { - console.error(err.message || err); - process.exit(1); - } - })(); -} - -module.exports = { validateEnvVar, getPrBaseBranch, normalizeBaseline }; diff --git a/scripts/shared/normalize-push-baseline.test.js b/scripts/shared/normalize-push-baseline.test.js deleted file mode 100644 index 3fd5c23..0000000 --- a/scripts/shared/normalize-push-baseline.test.js +++ /dev/null @@ -1,215 +0,0 @@ -jest.mock('node-fetch'); -const fetch = require('node-fetch'); -const { validateEnvVar, getPrBaseBranch, normalizeBaseline } = require('./normalize-push-baseline'); -const { parseGithubOutput } = require('../util/test-helpers'); - -describe('getPrBaseBranch', () => { - const OWNER = 'test-owner'; - const REPO = 'test-repo'; - const BRANCH = 'feature-branch'; - const TOKEN = 'ghp_testtoken'; - - afterEach(() => { - fetch.mockClear(); - }); - - it('returns base branch when PR exists', async () => { - const mockResponse = [ - { - base: { ref: 'main' }, - }, - ]; - fetch.mockResolvedValue({ - ok: true, - json: async () => mockResponse, - }); - - const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); - expect(baseBranch).toBe('main'); - }); - - it('returns null when no PR exists', async () => { - fetch.mockResolvedValue({ - ok: true, - json: async () => [], - }); - - const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); - expect(baseBranch).toBeNull(); - }); - - it('returns null on fetch error', async () => { - fetch.mockResolvedValue({ - ok: false, - }); - - const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); - expect(baseBranch).toBeNull(); - }); -}); - -describe('normalizeBaseline', () => { - const REPO_FULL = 'test-owner/test-repo'; - const GITHUB_TOKEN = 'ghp_testtoken'; - - afterEach(() => { - fetch.mockClear(); - }); - - it('returns input baseline for non-PR event', async () => { - const baseline = await normalizeBaseline({ - hasPr: 'false', - inputBaseline: 'origin/main', - eventName: 'push', - ghBaseRef: '', - githubRefName: 'feature-branch', - githubToken: GITHUB_TOKEN, - repoFull: REPO_FULL, - }); - expect(baseline).toBe('origin/main'); - }); - - it('returns origin/baseRef for PR event with baseRef', async () => { - const baseline = await normalizeBaseline({ - hasPr: 'true', - inputBaseline: 'origin/main', - eventName: 'pull_request', - ghBaseRef: 'develop', - githubRefName: 'feature-branch', - githubToken: GITHUB_TOKEN, - repoFull: REPO_FULL, - }); - expect(baseline).toBe('origin/develop'); - }); - - it('fetches base branch when baseRef is not provided', async () => { - const mockResponse = [ - { - base: { ref: 'staging' }, - }, - ]; - fetch.mockResolvedValue({ - ok: true, - json: async () => mockResponse, - }); - - const baseline = await normalizeBaseline({ - hasPr: 'true', - inputBaseline: 'origin/main', - eventName: 'pull_request', - ghBaseRef: '', - githubRefName: 'feature-branch', - githubToken: GITHUB_TOKEN, - repoFull: REPO_FULL, - }); - expect(baseline).toBe('origin/staging'); - }); - - it('falls back to input baseline when base branch cannot be fetched', async () => { - fetch.mockResolvedValue({ - ok: false, - }); - - const baseline = await normalizeBaseline({ - hasPr: 'true', - inputBaseline: 'origin/main', - eventName: 'pull_request', - ghBaseRef: '', - githubRefName: 'feature-branch', - githubToken: GITHUB_TOKEN, - repoFull: REPO_FULL, - }); - expect(baseline).toBe('origin/main'); - }); -}); - -describe('validateEnvVar', () => { - const ORIGINAL_EXIT = process.exit; - const ORIGINAL_CONSOLE_ERROR = console.error; - - beforeEach(() => { - process.exit = jest.fn(); - console.error = jest.fn(); - }); - - afterEach(() => { - process.exit = ORIGINAL_EXIT; - console.error = ORIGINAL_CONSOLE_ERROR; - }); - - it('does not exit when env var is set', () => { - process.env.TEST_VAR = 'value'; - validateEnvVar('TEST_VAR'); - expect(process.exit).not.toHaveBeenCalled(); - expect(console.error).not.toHaveBeenCalled(); - delete process.env.TEST_VAR; - }); - - it('exits with error when env var is not set', () => { - delete process.env.TEST_VAR; - validateEnvVar('TEST_VAR'); - expect(console.error).toHaveBeenCalledWith( - '::error::Environment variable TEST_VAR is required' - ); - expect(process.exit).toHaveBeenCalledWith(1); - }); -}); - -describe('normalize-push-baseline main module integration', () => { - const fs = require('fs'); - const path = require('path'); - const { spawnSync } = require('child_process'); - const tmp = require('os').tmpdir(); - const scriptPath = path.resolve(__dirname, 'normalize-push-baseline.js'); - const ORIGINAL_EXIT = process.exit; - const ORIGINAL_CONSOLE_ERROR = console.error; - const ORIGINAL_CONSOLE_LOG = console.log; - - beforeEach(() => { - process.exit = jest.fn(); - console.error = jest.fn(); - console.log = jest.fn(); - fetch.mockClear(); - }); - - afterEach(() => { - process.exit = ORIGINAL_EXIT; - console.error = ORIGINAL_CONSOLE_ERROR; - console.log = ORIGINAL_CONSOLE_LOG; - }); - - it('exits with error if required env vars are missing', async () => { - const result = spawnSync('node', [scriptPath], { - cwd: tmp, - env: { ...process.env }, - encoding: 'utf-8', - }); - - expect(result.status).not.toBe(0); - expect(result.stderr).toContain('::error::Environment variable'); - }); - - it('writes github action output with normalized baseline', async () => { - const outputFile = path.join(tmp, 'gha_output.txt'); - const result = spawnSync('node', [scriptPath], { - cwd: tmp, - env: { - ...process.env, - HAS_PR: 'true', - INPUT_BASELINE: 'origin/main', - GITHUB_EVENT_NAME: 'pull_request', - GITHUB_REF_NAME: 'feature-branch', - GITHUB_TOKEN: 'ghp_testtoken', - GITHUB_REPOSITORY: 'test-owner/test-repo', - GITHUB_BASE_REF: 'develop', - GITHUB_OUTPUT: outputFile, - }, - encoding: 'utf-8', - }); - - expect(result.status).toBe(0); - const outputs = parseGithubOutput(outputFile); - expect(outputs.baseline).toBe('origin/develop'); - fs.unlinkSync(outputFile); - }); -}); diff --git a/scripts/shared/run-semgrep.js b/scripts/shared/run-semgrep.js new file mode 100644 index 0000000..42026ae --- /dev/null +++ b/scripts/shared/run-semgrep.js @@ -0,0 +1,343 @@ +/* + * Run Semgrep scan + * Normalizes baseline for diff scans depending on push vs PR context + * + * Expects the following environment variables: + * HAS_PR - whether the current context has an associated PR (true/false) + * PR_NUMBER - PR number if applicable + * PR_URL - PR URL if applicable + * INPUT_BASELINE - baseline ref to use for diffing (e.g., origin/main) + * GITHUB_EVENT_NAME - GitHub provided environment variable for event name (e.g., push, pull_request) + * GITHUB_REF - Github provided environment variable for the git ref that triggered the workflow + * GITHUB_REF_NAME - GitHub provided environment variable for the branch or tag name that triggered the workflow + * GITHUB_BASE_REF - GitHub provided environment variable for the base ref of a PR (if applicable) + * GITHUB_REPOSITORY - GitHub provided environment variable for the repository (e.g., owner/repo) + * GITHUB_TOKEN - GitHub token for API access + * SCAN_MODE - 'diff' or 'full' scan mode + * SEMGREP_CONFIG - Semgrep ruleset(s) to use + * SEMGREP_TARGETS - Targets to scan (default: current directory) + * FAIL_LEVEL - Severity level to fail on (e.g., ERROR, WARNING) + * EXTRA_ARGS - Additional arguments to pass to Semgrep + * + * Outputs: + * - Writes file for reviewdog annotations, reviewdog_input.txt + * - Sets GitHub Action outputs + * - normalizedBaseline - the resolved baseline ref + * - totalFindings - total number of findings + * - numErrors - number of ERROR severity findings + * - numWarnings - number of WARNING severity findings + * - numInfo - number of INFO severity findings + * - scanSummary - summary of findings in md format + * - configSummary - summary of scan config in md format + * - scanStatus - 'success' or 'failure' based on findings and fail level + */ + +const { spawnSync } = require('child_process'); +const fs = require('fs'); +const fetch = require('node-fetch'); +const { validateEnvVar } = require('../util/env-helpers'); + +const SEMGREP_RESULTS_FILE_NAME = 'semgrep_results.json'; +const REVIEWDOG_INPUT_FILE_NAME = 'reviewdog_input.txt'; + +async function getPrBaseBranch(owner, repo, branch, token) { + // Use GitHub API to find open PR for the branch and get its base branch + const url = `https://api.github.com/repos/${owner}/${repo}/pulls?state=open&head=${owner}:${branch}`; + const res = await fetch(url, { + headers: { + Authorization: `Bearer ${token}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'normalize-push-baseline-script', + }, + }); + if (!res.ok) return null; + const prs = await res.json(); + if (prs.length > 0 && prs[0].base && prs[0].base.ref) { + return prs[0].base.ref; + } + return null; +} + +/* Normalize the baseline ref for push events in GitHub Actions. + * + * If a push event is detected but an open PR exists for the branch, + * this script outputs the PR's base branch as the baseline. Otherwise, + * it uses the provided INPUT_BASELINE environment variable. + */ +async function normalizeBaseline(hasPr, inputBaseline, githubDetails) { + const { repo, eventName, refName, baseRef, githubToken } = githubDetails; + const [repoOwner, repoName] = repo.split('/'); + let baseline = inputBaseline; + if (eventName === 'pull_request' || hasPr === 'true') { + let base = baseRef; + if (!(eventName === 'pull_request' && baseRef)) { + // Not a PR event or baseRef not set, try to resolve via API + base = await getPrBaseBranch(repoOwner, repoName, refName, githubToken); + } + if (base) { + baseline = `origin/${base}`; + } else { + baseline = inputBaseline; + } + } + console.log(`Normalized baseline: ${baseline}\n`); + return baseline; +} + +function constructSemgrepCommand(baseline, config, resultsFileName) { + const { rules, targets, scanMode, failLevel, extraArgs } = config; + let cmd = 'semgrep'; + + // Add each --config argument + rules + .split(/[,\s]+/) + .filter(Boolean) + .forEach(cfg => { + cmd += ` --config ${cfg}`; + }); + + cmd += ` --severity ${failLevel.toUpperCase()} --json --output ${resultsFileName}`; + + if (scanMode === 'diff') { + cmd += ` --baseline-commit ${baseline}`; + } + + if (extraArgs) { + cmd += ` ${extraArgs}`; + } + + targets + .split(/[,\s]+/) + .filter(Boolean) + .forEach(tgt => { + cmd += ` ${tgt}`; + }); + + console.log(`Generated Semgrep command: ${cmd}`); + return cmd; +} + +function runSemgrepAndCapture(semgrepCmd, resultsFileName) { + // we are trusting internal engineers not to pass untrusted arguments to semgrep GHA + // nosemgrep: javascript.lang.security.detect-child-process.detect-child-process + const result = spawnSync(semgrepCmd, { encoding: 'utf-8', shell: true }); + + if (result.error) { + // This is a Node.js error, not a Semgrep error + console.error(`::error title=Semgrep spawn error::${result.error.message}`); + process.exit(2); + } + + if (result.stderr) { + console.error(`::error title=Semgrep stderr::${result.stderr}`); + } + + // code 0 (no findings) or 1 (has findings): continue, findings handled by scanStatus + if (result.status > 1) { + console.error(`::error title=Semgrep execution error::Exited with code ${result.status}`); + try { + const content = fs.readFileSync(resultsFileName, 'utf-8'); + console.log(`Semgrep results file content:\n${content}`); + } catch (e) { + console.log(`Error reading semgrep results file: ${e.message}`); + } + process.exit(result.status); + } + + // Ensure results file exists even if no findings + if (!fs.existsSync(resultsFileName) || fs.statSync(resultsFileName).size === 0) { + console.log('No results file found or file is empty, creating empty results file.'); + fs.writeFileSync(resultsFileName, '{"results":[]}'); + } +} + +function stageResultsForReviewdog(resultsFileName) { + const data = fs.readFileSync(resultsFileName, 'utf8'); + const resultsJson = JSON.parse(data); + + const lines = resultsJson.results.map(result => { + const severityInitial = result.extra.severity ? result.extra.severity.charAt(0) : 'I'; + return `${severityInitial}:${result.path}:${result.end.line} ${result.extra.message}`; + }); + + fs.writeFileSync(REVIEWDOG_INPUT_FILE_NAME, lines.join('\n')); +} + +function getSemgrepMetrics(resultsFileName) { + const data = fs.readFileSync(resultsFileName, 'utf8'); + const resultsJson = JSON.parse(data); + + let totalFindings = 0; + let numErrors = 0; + let numWarnings = 0; + let numInfo = 0; + + if (resultsJson.results && Array.isArray(resultsJson.results)) { + totalFindings = resultsJson.results.length; + resultsJson.results.forEach(result => { + switch (result.extra.severity) { + case 'ERROR': + numErrors += 1; + break; + case 'WARNING': + numWarnings += 1; + break; + case 'INFO': + numInfo += 1; + break; + default: + break; + } + }); + } + + return { totalFindings, numErrors, numWarnings, numInfo }; +} + +function writeFindingsMarkdown(metrics) { + const { totalFindings, numErrors, numWarnings, numInfo } = metrics; + + let lines = []; + lines.push('### Scan Findings'); + lines.push('| Total | Errors | Warnings | Info |'); + lines.push('|------:|------:|--------:|----:|'); + lines.push(`| ${totalFindings} | ${numErrors} | ${numWarnings} | ${numInfo} |`); + + return lines.join('\n'); +} + +function writeConfigMarkdown(baseline, config) { + const { rules, targets, scanMode, failLevel, extraArgs } = config; + + let lines = []; + lines.push('### Scan Config'); + lines.push(`- **Rules**: \`${rules}\``); + lines.push(`- **Targets**: \`${targets}\``); + lines.push(`- **Scan mode**: \`${scanMode}\``); + if (scanMode !== 'full') { + lines.push(`- **Baseline**: \`${baseline}\``); + } else { + lines.push('- **Baseline**: `n/a`'); + } + lines.push(`- **Fail level**: \`${failLevel}\``); + lines.push(`- **Extra args**: \`${extraArgs || 'n/a'}\``); + + return lines.join('\n'); +} + +function evaluateScanStatus(failLevel, metrics) { + const { numErrors, numWarnings, numInfo } = metrics; + const level = failLevel.toUpperCase(); + + let status = 'success'; + switch (level) { + case 'CRITICAL': + case 'ERROR': + case 'HIGH': + if (numErrors > 0) status = 'failure'; + break; + case 'WARNING': + case 'MEDIUM': + if (numErrors + numWarnings > 0) status = 'failure'; + break; + case 'INFO': + case 'LOW': + if (numErrors + numWarnings + numInfo > 0) status = 'failure'; + break; + default: + break; + } + return status; +} + +async function main() { + [ + 'HAS_PR', + 'INPUT_BASELINE', + 'GITHUB_EVENT_NAME', + 'GITHUB_REF_NAME', + 'GITHUB_TOKEN', + 'GITHUB_REPOSITORY', + 'SEMGREP_CONFIG', + 'SEMGREP_TARGETS', + 'SCAN_MODE', + 'FAIL_LEVEL', + ].forEach(key => validateEnvVar(key)); + + const prDetails = { + hasPr: process.env.HAS_PR, + prNumber: process.env.PR_NUMBER || '', + prUrl: process.env.PR_URL || '', + }; + const semgrepConfig = { + rules: process.env.SEMGREP_CONFIG, + targets: process.env.SEMGREP_TARGETS, + scanMode: process.env.SCAN_MODE, + failLevel: process.env.FAIL_LEVEL, + extraArgs: process.env.EXTRA_ARGS || '', + }; + const githubDetails = { + repo: process.env.GITHUB_REPOSITORY, + eventName: process.env.GITHUB_EVENT_NAME, + refName: process.env.GITHUB_REF_NAME, + baseRef: process.env.GITHUB_BASE_REF, + githubToken: process.env.GITHUB_TOKEN, + }; + const inputBaseline = process.env.INPUT_BASELINE; + + console.log(`prDetails: ${JSON.stringify(prDetails)}\n`); + console.log(`semgrepConfig: ${JSON.stringify(semgrepConfig)}\n`); + console.log(`githubDetails: ${JSON.stringify(githubDetails)}\n`); + console.log(`inputBaseline: ${inputBaseline}`); + + const baseline = await normalizeBaseline(prDetails.hasPr, inputBaseline, githubDetails); + + const semgrepCmd = constructSemgrepCommand(baseline, semgrepConfig, SEMGREP_RESULTS_FILE_NAME); + + runSemgrepAndCapture(semgrepCmd, SEMGREP_RESULTS_FILE_NAME); + + const metrics = getSemgrepMetrics(SEMGREP_RESULTS_FILE_NAME); + + stageResultsForReviewdog(SEMGREP_RESULTS_FILE_NAME); + + const scanSummary = writeFindingsMarkdown(metrics, baseline, semgrepConfig); + const configSummary = writeConfigMarkdown(baseline, semgrepConfig); + + const scanStatus = evaluateScanStatus(semgrepConfig.failLevel, metrics); + + // write GitHub Action outputs + const githubOutput = process.env.GITHUB_OUTPUT; + if (githubOutput) { + fs.appendFileSync(githubOutput, `normalizedBaseline=${baseline}\n`); + fs.appendFileSync(githubOutput, `scanSummary=${scanSummary.replace(/\n/g, '\\n')}\n`); + fs.appendFileSync(githubOutput, `configSummary=${configSummary.replace(/\n/g, '\\n')}\n`); + fs.appendFileSync(githubOutput, `scanStatus=${scanStatus}\n`); + fs.appendFileSync(githubOutput, `totalFindings=${metrics.totalFindings}\n`); + fs.appendFileSync(githubOutput, `numErrors=${metrics.numErrors}\n`); + fs.appendFileSync(githubOutput, `numWarnings=${metrics.numWarnings}\n`); + fs.appendFileSync(githubOutput, `numInfo=${metrics.numInfo}\n`); + } +} + +// Run the main function if this script is executed directly +if (require.main === module) { + main().catch(err => { + console.error(`::error title=Run Semgrep Scan::${err.message}`); + process.exit(1); + }); +} + +module.exports = { + main, + getPrBaseBranch, + normalizeBaseline, + constructSemgrepCommand, + runSemgrepAndCapture, + stageResultsForReviewdog, + getSemgrepMetrics, + writeFindingsMarkdown, + writeConfigMarkdown, + evaluateScanStatus, + SEMGREP_RESULTS_FILE_NAME, + REVIEWDOG_INPUT_FILE_NAME, +}; diff --git a/scripts/shared/run-semgrep.unit.test.js b/scripts/shared/run-semgrep.unit.test.js new file mode 100644 index 0000000..6871f03 --- /dev/null +++ b/scripts/shared/run-semgrep.unit.test.js @@ -0,0 +1,347 @@ +const fetch = require('node-fetch'); +const fs = require('fs'); +const { + getPrBaseBranch, + normalizeBaseline, + constructSemgrepCommand, + stageResultsForReviewdog, + getSemgrepMetrics, + writeFindingsMarkdown, + writeConfigMarkdown, + evaluateScanStatus, + REVIEWDOG_INPUT_FILE_NAME, +} = require('./run-semgrep'); + +const exampleSemgrepOutput = { + results: [ + { + path: 'src/error.js', + end: { line: 111 }, + extra: { + severity: 'ERROR', + message: 'This is an error message', + }, + }, + { + path: 'src/warning1.js', + end: { line: 222 }, + extra: { + severity: 'WARNING', + message: 'This is a warning message #1', + }, + }, + { + path: 'src/info.js', + end: { line: 333 }, + extra: { + severity: 'INFO', + message: 'This is an info message', + }, + }, + { + path: 'src/warning2.js', + end: { line: 444 }, + extra: { + severity: 'WARNING', + message: 'This is a warning message #2', + }, + }, + ], +}; +const emptySemgrepOutput = '{"results":[]}'; + +jest.mock('node-fetch'); + +describe('getPrBaseBranch', () => { + const OWNER = 'test-owner'; + const REPO = 'test-repo'; + const BRANCH = 'feature-branch'; + const TOKEN = 'ghp_testtoken'; + + afterEach(() => { + fetch.mockClear(); + }); + + it('returns base branch when PR exists', async () => { + const mockResponse = [ + { + base: { ref: 'main' }, + }, + ]; + fetch.mockResolvedValue({ + ok: true, + json: async () => mockResponse, + }); + + const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); + expect(baseBranch).toBe('main'); + }); + + it('returns null when no PR exists', async () => { + fetch.mockResolvedValue({ + ok: true, + json: async () => [], + }); + + const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); + expect(baseBranch).toBeNull(); + }); + + it('returns null on fetch error', async () => { + fetch.mockResolvedValue({ + ok: false, + }); + + const baseBranch = await getPrBaseBranch(OWNER, REPO, BRANCH, TOKEN); + expect(baseBranch).toBeNull(); + }); +}); + +describe('normalizeBaseline', () => { + const FULL_REPO_NAME = 'repo-owner/test-repo'; + const GITHUB_TOKEN = 'ghp_testtoken'; + const inputBaseline = 'origin/main'; + + afterEach(() => { + fetch.mockClear(); + }); + + it('returns input baseline for non-PR event', async () => { + const hasPr = 'false'; + const githubDetails = { + eventName: 'push', + baseRef: '', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repo: FULL_REPO_NAME, + }; + const baseline = await normalizeBaseline(hasPr, inputBaseline, githubDetails); + + expect(baseline).toBe('origin/main'); + }); + + it('returns origin/baseRef for PR event with baseRef', async () => { + const hasPr = 'true'; + const githubDetails = { + eventName: 'pull_request', + baseRef: 'develop', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repo: FULL_REPO_NAME, + }; + + const baseline = await normalizeBaseline(hasPr, inputBaseline, githubDetails); + + expect(baseline).toBe(`origin/${githubDetails.baseRef}`); + }); + + it('fetches base branch when baseRef is not provided', async () => { + const mockResponse = [ + { + base: { ref: 'staging' }, + }, + ]; + fetch.mockResolvedValue({ + ok: true, + json: async () => mockResponse, + }); + + const hasPr = 'true'; + const githubDetails = { + eventName: 'push', + baseRef: '', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repo: FULL_REPO_NAME, + }; + + const baseline = await normalizeBaseline(hasPr, inputBaseline, githubDetails); + + expect(baseline).toBe('origin/staging'); + }); + + it('falls back to input baseline when base branch cannot be fetched', async () => { + fetch.mockResolvedValue({ + ok: false, + }); + + const hasPr = 'true'; + const githubDetails = { + eventName: 'push', + baseRef: '', + githubRefName: 'feature-branch', + githubToken: GITHUB_TOKEN, + repo: FULL_REPO_NAME, + }; + + const baseline = await normalizeBaseline(hasPr, inputBaseline, githubDetails); + + expect(baseline).toBe('origin/main'); + }); +}); + +describe('constructSemgrepCommand', () => { + it('constructs args correctly for diff scan mode', () => { + const baseline = 'origin/main'; + const semgrepConfig = { + scanMode: 'diff', + rules: 'p/rule1 p/rule2', + targets: './src,./lib', + failLevel: 'warning', + extraArgs: '--other arg1 --another arg2', + }; + const cmd = constructSemgrepCommand(baseline, semgrepConfig, 'temp-results.json'); + + expect(cmd).toEqual( + 'semgrep --config p/rule1 --config p/rule2 --severity WARNING --json --output temp-results.json --baseline-commit origin/main --other arg1 --another arg2 ./src ./lib' + ); + }); + + it('constructs args correctly for full scan mode', () => { + const baseline = 'origin/main'; + const semgrepConfig = { + scanMode: 'full', + rules: 'p/rule1', + targets: '', + failLevel: 'error', + extraArgs: '', + }; + + const cmd = constructSemgrepCommand(baseline, semgrepConfig, 'temp-results.json'); + + expect(cmd).toEqual( + 'semgrep --config p/rule1 --severity ERROR --json --output temp-results.json' + ); + }); +}); + +describe('stageResultsForReviewdog', () => { + it('stages results file when it exists', () => { + const fakeInputFileName = 'fake-results.json'; + + jest.spyOn(fs, 'readFileSync').mockImplementation((fileName, encoding) => { + if (fileName === fakeInputFileName) { + return JSON.stringify(exampleSemgrepOutput); + } + }); + + let writtenContent = ''; + jest.spyOn(fs, 'writeFileSync').mockImplementation((fileName, data) => { + if (fileName === REVIEWDOG_INPUT_FILE_NAME) { + writtenContent = data; + } + }); + + stageResultsForReviewdog(fakeInputFileName); + + expect(writtenContent).toContain('E:src/error.js:111 This is an error message'); + expect(writtenContent).toContain('W:src/warning1.js:222 This is a warning message #1'); + expect(writtenContent).toContain('I:src/info.js:333 This is an info message'); + expect(writtenContent).toContain('W:src/warning2.js:444 This is a warning message #2'); + + jest.restoreAllMocks(); + }); +}); + +describe('getSemgrepMetrics', () => { + it('correctly parses semgrep JSON output', () => { + const fakeInputFileName = 'fake-results.json'; + + jest.spyOn(fs, 'readFileSync').mockImplementation((fileName, encoding) => { + if (fileName === fakeInputFileName) { + return JSON.stringify(exampleSemgrepOutput); + } + }); + + const metrics = getSemgrepMetrics(fakeInputFileName); + + expect(metrics.totalFindings).toBe(4); + expect(metrics.numErrors).toBe(1); + expect(metrics.numWarnings).toBe(2); + expect(metrics.numInfo).toBe(1); + + jest.restoreAllMocks(); + }); + + it('handles empty results', () => { + const fakeInputFileName = 'fake-results.json'; + + jest.spyOn(fs, 'readFileSync').mockImplementation((fileName, encoding) => { + if (fileName === fakeInputFileName) { + return JSON.stringify(emptySemgrepOutput); + } + }); + + const metrics = getSemgrepMetrics(fakeInputFileName); + + expect(metrics.totalFindings).toBe(0); + expect(metrics.numErrors).toBe(0); + expect(metrics.numWarnings).toBe(0); + expect(metrics.numInfo).toBe(0); + + jest.restoreAllMocks(); + }); +}); + +describe('writeFindingsMarkdown', () => { + it('writes markdown correctly', () => { + const metrics = { + totalFindings: 6, + numErrors: 1, + numWarnings: 2, + numInfo: 3, + }; + + const markdown = writeFindingsMarkdown(metrics); + + expect(markdown).toContain('### Scan Findings\n'); + expect(markdown).toContain('| Total | Errors | Warnings | Info |\n'); + expect(markdown).toContain('| 6 | 1 | 2 | 3 |'); + }); +}); + +describe('writeConfigMarkdown', () => { + it('writes config markdown correctly', () => { + const config = { + rules: 'p/rule1 p/rule2', + targets: './src ./lib', + scanMode: 'diff', + failLevel: 'warning', + extraArgs: '--json', + }; + const baseline = 'origin/main'; + + const markdown = writeConfigMarkdown(baseline, config); + + expect(markdown).toContain('### Scan Config\n'); + expect(markdown).toContain(`- **Rules**: \`${config.rules}\`\n`); + expect(markdown).toContain(`- **Targets**: \`${config.targets}\`\n`); + expect(markdown).toContain(`- **Scan mode**: \`${config.scanMode}\`\n`); + expect(markdown).toContain(`- **Baseline**: \`${baseline}\`\n`); + expect(markdown).toContain(`- **Fail level**: \`${config.failLevel}\`\n`); + expect(markdown).toContain(`- **Extra args**: \`${config.extraArgs}\``); + }); +}); + +describe('evaluateScanStatus', () => { + it('returns failed for error severity with errors', () => { + const metrics = { numErrors: 1 }; + const status = evaluateScanStatus('error', metrics); + expect(status).toBe('failure'); + }); + it('returns failed for warning severity with warnings', () => { + const metrics = { numErrors: 0, numWarnings: 1 }; + const status = evaluateScanStatus('warning', metrics); + expect(status).toBe('failure'); + }); + it('returns failed for info severity with any findings', () => { + const metrics = { numErrors: 0, numWarnings: 0, numInfo: 1 }; + const status = evaluateScanStatus('info', metrics); + expect(status).toBe('failure'); + }); + it('returns passed when no findings exceed severity', () => { + const metrics = { numErrors: 0, numWarnings: 1 }; + const status = evaluateScanStatus('error', metrics); + expect(status).toBe('success'); + }); +}); diff --git a/scripts/util/env-helpers.js b/scripts/util/env-helpers.js new file mode 100644 index 0000000..1851876 --- /dev/null +++ b/scripts/util/env-helpers.js @@ -0,0 +1,10 @@ +function validateEnvVar(name) { + if (!process.env[name]) { + console.error(`::error::Environment variable ${name} is required`); + process.exit(1); + } +} + +module.exports = { + validateEnvVar, +}; diff --git a/scripts/util/env-helpers.unit.test.js b/scripts/util/env-helpers.unit.test.js new file mode 100644 index 0000000..73c0020 --- /dev/null +++ b/scripts/util/env-helpers.unit.test.js @@ -0,0 +1,33 @@ +const { validateEnvVar } = require('./env-helpers'); + +describe('validateEnvVar', () => { + const ORIGINAL_EXIT = process.exit; + const ORIGINAL_CONSOLE_ERROR = console.error; + + beforeEach(() => { + process.exit = jest.fn(); + console.error = jest.fn(); + }); + + afterEach(() => { + process.exit = ORIGINAL_EXIT; + console.error = ORIGINAL_CONSOLE_ERROR; + }); + + it('does not exit when env var is set', () => { + process.env.TEST_VAR = 'value'; + validateEnvVar('TEST_VAR'); + expect(process.exit).not.toHaveBeenCalled(); + expect(console.error).not.toHaveBeenCalled(); + delete process.env.TEST_VAR; + }); + + it('exits with error when env var is not set', () => { + delete process.env.TEST_VAR; + validateEnvVar('TEST_VAR'); + expect(console.error).toHaveBeenCalledWith( + '::error::Environment variable TEST_VAR is required' + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); +}); From 50ebe2954211092134b0ffa7e7f75bd42c187bea Mon Sep 17 00:00:00 2001 From: ssvoss Date: Tue, 23 Dec 2025 14:08:57 -0500 Subject: [PATCH 07/11] new action for upserting pr comment, switch semgrep scan workflow to use new action --- .../actions/upsert-pr-comment/CHANGELOG.md | 18 +++ .github/actions/upsert-pr-comment/README.md | 67 +++++++++++ .github/actions/upsert-pr-comment/action.yml | 55 +++++++++ .github/workflows/run_semgrep_scan.yml | 107 ++++++------------ .vscode/settings.json | 2 +- package.json | 2 +- 6 files changed, 179 insertions(+), 72 deletions(-) create mode 100644 .github/actions/upsert-pr-comment/CHANGELOG.md create mode 100644 .github/actions/upsert-pr-comment/README.md create mode 100644 .github/actions/upsert-pr-comment/action.yml diff --git a/.github/actions/upsert-pr-comment/CHANGELOG.md b/.github/actions/upsert-pr-comment/CHANGELOG.md new file mode 100644 index 0000000..e9b9dc1 --- /dev/null +++ b/.github/actions/upsert-pr-comment/CHANGELOG.md @@ -0,0 +1,18 @@ +# upsert-pr-comment action Changelog + +All notable changes to the **upsert-pr-comment** action are documented in this file. + +## 1.0.0 + +### Added + +- Initial release of the reusable composite action for upserting PR comments. +- Supports creating or updating a PR comment based on a unique hidden marker. +- Accepts the following required inputs: + - `github-token`: GitHub token with repo scope + - `pr-number`: Pull Request number + - `comment-marker`: Unique marker to identify the comment + - `body-content`: Markdown content for the comment body +- Uses `actions/github-script@v7` to interact with the GitHub REST API. +- Automatically updates an existing comment if the marker is found, or creates a new comment if not. +- No external dependencies required beyond GitHub Actions standard runners. diff --git a/.github/actions/upsert-pr-comment/README.md b/.github/actions/upsert-pr-comment/README.md new file mode 100644 index 0000000..125507d --- /dev/null +++ b/.github/actions/upsert-pr-comment/README.md @@ -0,0 +1,67 @@ +# Upsert PR Comment Action + +## 🧭 Summary + +Creates or updates a comment on a GitHub Pull Request. The comments are identified by a unique hidden marker. This ensures only one comment per marker is present, updating the comment if one already exists for the marker or creating a new one if not. + +## Scope/Limitations + +- Supports upserting comments on an open PR in any repository where the action is used. +- Only works for PRs (not issues or other event types). +- Requires a GitHub token with appropriate permissions. +- The marker must be unique per comment type to avoid accidental overwrites. + +## 🔒 Permissions + +The following GitHub Actions permissions are required: + +```yaml +permissions: + contents: read + pull-requests: write +``` + +## Dependencies + +- Uses `actions/github-script@v7` +- Uses the GitHub REST API +- Runs on any GitHub-hosted runner + +## ⚙️ Inputs + +| Name | Required | Description | +| ---------------- | -------- | ---------------------------------------------------------------- | +| `github-token` | ✅ | GitHub token with repo scope (use `${{ secrets.GITHUB_TOKEN }}`) | +| `pr-number` | ✅ | Pull Request number | +| `comment-marker` | ✅ | Unique marker to identify the comment (hidden in HTML comment) | +| `body-content` | ✅ | Markdown content for the comment body | + +## 📤 Outputs + +This action does not set any outputs. + +## 🚀 Usage + +Basic usage example: + +```yaml +- name: Upsert PR summary comment + uses: ./.github/actions/upsert-pr-comment + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + pr-number: ${{ github.event.pull_request.number }} + comment-marker: 'my-unique-marker' + body-content: | + ## PR Scan Results + - All checks passed! +``` + +## 🧠 Notes + +- The comment marker is embedded as an HTML comment and should be unique for each comment type you want to upsert. +- If multiple workflows use the same marker, they will overwrite each other's comments. +- The action uses the GitHub REST API to list, update, or create comments. + +## Versioning + +This action uses namespaced tags for versioning and is tracked in the repository CHANGELOG. diff --git a/.github/actions/upsert-pr-comment/action.yml b/.github/actions/upsert-pr-comment/action.yml new file mode 100644 index 0000000..702eba0 --- /dev/null +++ b/.github/actions/upsert-pr-comment/action.yml @@ -0,0 +1,55 @@ +name: Upsert PR Comment +description: Creates or updates a PR comment based on a unique marker +inputs: + github-token: + description: GitHub token with repo scope (use GITHUB_TOKEN) + required: true + pr-number: + description: PR number + required: true + comment-marker: + description: Unique marker to identify the comment for upsert, value will be hidden in HTML comment + required: true + body-content: + description: Markdown content for the comment body + required: true +runs: + using: composite + steps: + - name: Upsert PR summary comment + uses: actions/github-script@v7 + env: + PR_NUMBER: ${{ inputs.pr-number }} + COMMENT_MARKER: ${{ inputs.comment-marker }} + BODY_CONTENT: ${{ inputs.body-content }} + with: + github-token: ${{ inputs.github-token }} + script: | + const body = ` + ${process.env.BODY_CONTENT} + `; + + // Upsert by hidden marker to avoid duplicate comments + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: process.env.PR_NUMBER, + per_page: 100, + }); + + const existing = comments.find(c => c.body && c.body.includes(``)); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: process.env.PR_NUMBER, + body, + }); + } diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index 3b48736..f9032b2 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -136,7 +136,7 @@ jobs: EXTRA_ARGS: ${{ inputs.extra_args }} run: node scripts/shared/run-semgrep.js - - name: Upload Semgrep JSON artifact + - name: Upload Artifact if: ${{ steps.semgrep.outputs.totalFindings > 0 }} uses: actions/upload-artifact@v4 with: @@ -190,7 +190,7 @@ jobs: -filter-mode="$REVIEWDOG_FILTER_MODE" \ -fail-level="$REVIEWDOG_FAIL_LEVEL" - - name: Job summary + - name: Write job summary env: HAS_PR: ${{ steps.pr_check.outputs.pr_exists }} PR_NUMBER: ${{ steps.pr_check.outputs.pr_number }} @@ -219,91 +219,58 @@ jobs: echo "- **Review filter**: \`${FILTER_MODE}\`" echo "- **Reviewdog reporter**: \`${REVIEWDOG_REPORTER}\`" - - name: Upsert PR summary comment + - name: Generate PR comment body if: ${{ github.event_name == 'pull_request' || steps.pr_check.outputs.pr_exists == 'true' }} + id: generate_body uses: actions/github-script@v7 env: PR_NUMBER: ${{ steps.pr_check.outputs.pr_number }} + SCAN_STATUS: ${{ steps.semgrep.outputs.scanStatus }} + TOTAL_FINDINGS: ${{ steps.semgrep.outputs.totalFindings }} + NUM_ERRORS: ${{ steps.semgrep.outputs.numErrors }} + NUM_WARNINGS: ${{ steps.semgrep.outputs.numWarnings }} + NUM_INFO: ${{ steps.semgrep.outputs.numInfo }} + CONFIG_SUMMARY: ${{ steps.semgrep.outputs.configSummary }} with: - github-token: ${{ secrets.GITHUB_TOKEN }} + result-encoding: string script: | - const prNumber = context.payload.pull_request?.number ?? process.env.PR_NUMBER; - - const total = Number("${{ steps.semgrep.outputs.totalFindings || '0' }}"); - const errors = Number("${{ steps.semgrep.outputs.numErrors || '0' }}"); - const warns = Number("${{ steps.semgrep.outputs.numWarnings || '0' }}"); - const info = Number("${{ steps.semgrep.outputs.numInfo || '0' }}"); - - const scanMode = "${{ inputs.semgrep_scan_mode }}"; - const rules = "${{ inputs.semgrep_config }}"; - const failLevel = "${{ inputs.fail_severity }}"; - const filterMode = "${{ steps.normalized_settings.outputs.filterMode }}"; - const reporter = "${{ steps.normalized_settings.outputs.reporter }}"; - - let emoji = "✅"; - let status = "passed"; - - if (failLevel === "error") { - if (errors > 0) { emoji = "❌"; status = "failed (errors)"; } - else if (warns > 0) { emoji = "⚠️"; status = "warnings"; } - else if (info > 0) { emoji = "ℹ️"; status = "info only"; } - } - else if (failLevel === "warning") { - if (errors > 0 || warns > 0) { emoji = "❌"; status = "failed (errors/warnings)"; } - else if (info > 0) { emoji = "ℹ️"; status = "info only"; } - } - else if (failLevel === "info") { - if (total > 0) { emoji = "❌"; status = "failed (findings)"; } - } + const scanStatus = process.env.SCAN_STATUS; + const total = Number(process.env.TOTAL_FINDINGS || '0'); + const errors = Number(process.env.NUM_ERRORS || '0'); + const warns = Number(process.env.NUM_WARNINGS || '0'); + const info = Number(process.env.NUM_INFO || '0'); + const configSummary = process.env.CONFIG_SUMMARY || ''; + + const emoji = scanStatus === 'success' ? "✅" : "❌"; + const statusText = scanStatus === 'success' ? "passed" : "failed"; + + const heading = `## ${emoji} Semgrep Security Scan ${statusText.charAt(0).toUpperCase() + statusText.slice(1)}` const parts = []; if (errors > 0) parts.push(`${errors} error${errors === 1 ? "" : "s"}`); if (warns > 0) parts.push(`${warns} warning${warns === 1 ? "" : "s"}`); if (info > 0) parts.push(`${info} info`); - const findings = total > 0 ? `\n\n**Findings:** ${parts.join(", ")}` : - (scanMode === "diff" ? "\n\n🎉 No security issues found in changed code!" : "\n\n🎉 No security issues found!"); - - const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; - - const body = ` - ## ${emoji} Semgrep Security Scan ${status.charAt(0).toUpperCase() + status.slice(1)} + const findings = total > 0 ? `\n\n**Findings:** ${parts.join(", ")}` : "\n\n🎉 No security issues found!"; - **Configuration:** \`${rules}\` - **Scan mode:** \`${scanMode}\`${scanMode === "diff" ? " (changed code only)" : ""} - **Severity threshold:** \`${failLevel}\` - **Review filter:** \`${filterMode}\` - **Reporter:** \`${reporter}\`${findings} + const runUrl = `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`; + const body = `${heading} + ${configSummary} + ${findings} [View run](${runUrl}) - - --- 🤖 Powered by Semgrep + reviewdog`; - // Upsert by hidden marker to avoid duplicate comments - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - per_page: 100, - }); - - const existing = comments.find(c => c.body && c.body.includes("")); - if (existing) { - await github.rest.issues.updateComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: existing.id, - body, - }); - } else { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - body, - }); - } + core.setOutput('body', body); + + - name: Upsert PR comment + if: ${{ github.event_name == 'pull_request' || steps.pr_check.outputs.pr_exists == 'true' }} + uses: ./.github/actions/pr-comment-upsert + with: + pr-number: ${{ steps.pr_check.outputs.pr_number }} + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-marker: semgrep-summary + body-content: ${{ steps.generate_body.outputs.body }} - name: Fail on findings at/above threshold if: ${{ steps.semgrep.outputs.scanStatus == 'failed' }} diff --git a/.vscode/settings.json b/.vscode/settings.json index dc3cf2e..ef7602b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -43,5 +43,5 @@ // Optional: Spell checker "cSpell.enabled": true, - "cSpell.words": ["opensesame", "reviewdog", "semgrep"] + "cSpell.words": ["opensesame", "reviewdog", "semgrep", "upserting"] } diff --git a/package.json b/package.json index 716eec4..2868b5b 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,7 @@ "format:check": "prettier --check './*.js' './*.mjs' './*.json' './*.md' 'scripts/**/*.js' '.github/actions/**/*.*'", "format:fix": "prettier --write './*.js' './*.mjs' './*.json' './*.md' 'scripts/**/*.js' '.github/actions/**/*.*'", "scan": "semgrep --config=p/ci --config=p/security-audit --config=p/javascript ./*.js ./*.mjs ./*.json scripts/ .github/actions/", - "ci": "npm run audit && npm run test && npm run lint:check && npm run format:check && npm run scan" + "check": "npm run audit && npm run test && npm run lint:check && npm run format:check && npm run scan" }, "dependencies": { "node-fetch": "2.7.0" From 6bd07bc1778e32520db77af6c00cce54bdc27aa6 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Tue, 23 Dec 2025 14:24:22 -0500 Subject: [PATCH 08/11] causing semgrep finding --- scripts/shared/run-semgrep.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/shared/run-semgrep.js b/scripts/shared/run-semgrep.js index 42026ae..6f22668 100644 --- a/scripts/shared/run-semgrep.js +++ b/scripts/shared/run-semgrep.js @@ -119,7 +119,7 @@ function constructSemgrepCommand(baseline, config, resultsFileName) { function runSemgrepAndCapture(semgrepCmd, resultsFileName) { // we are trusting internal engineers not to pass untrusted arguments to semgrep GHA - // nosemgrep: javascript.lang.security.detect-child-process.detect-child-process + const result = spawnSync(semgrepCmd, { encoding: 'utf-8', shell: true }); if (result.error) { From ff19f64938390f55c24c926f7e1f288ed3962689 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Tue, 23 Dec 2025 14:31:21 -0500 Subject: [PATCH 09/11] small fixes for validation --- ...ernal_on_pr_validate_component_version.yml | 96 +++++++------------ .github/workflows/run_semgrep_scan.yml | 10 +- .../validate-version-labels/index.js | 11 ++- 3 files changed, 45 insertions(+), 72 deletions(-) diff --git a/.github/workflows/internal_on_pr_validate_component_version.yml b/.github/workflows/internal_on_pr_validate_component_version.yml index 5885230..ccfcba0 100644 --- a/.github/workflows/internal_on_pr_validate_component_version.yml +++ b/.github/workflows/internal_on_pr_validate_component_version.yml @@ -68,93 +68,61 @@ jobs: run: | echo "## Validation Outcome" >> "$GITHUB_STEP_SUMMARY" - if [ "${{ env.NO_LABELS }}" === "true" ]; then + if [ "${{ env.NO_LABELS }}" == "true" ]; then echo "❌ No labels found on the PR. Add at least one version label." >> "$GITHUB_STEP_SUMMARY" fi - if [ "${{ env.IS_VALID }}" === "false" ]; then + if [ "${{ env.IS_VALID }}" == "false" ]; then echo "❌ Version label validation failed." >> "$GITHUB_STEP_SUMMARY" if [ -n "${{ env.VALIDATION_MESSAGE }}" ]; then echo "${{ env.VALIDATION_MESSAGE }}" >> "$GITHUB_STEP_SUMMARY" echo "" >> "$GITHUB_STEP_SUMMARY" fi if [ -n "${{ env.INVALID_VERSION_LABELS }}" ]; then - echo "Invalid version labels: ${{ env.INVALID_VERSION_LABELS }}" >> "$GITHUB_STEP_SUMMARY" + echo "**Invalid version labels**: ${{ env.INVALID_VERSION_LABELS }}" >> "$GITHUB_STEP_SUMMARY" + fi + if [ -n "${{ env.INVALID_COMPONENTS }}" ]; then + echo "**Nonexistent/Invalid components**: ${{ env.INVALID_COMPONENTS }}" >> "$GITHUB_STEP_SUMMARY" + fi + if [ -n "${{ env.MISSING_CHANGELOGS }}" ]; then + echo "**Components missing changelogs/entries**: ${{ env.MISSING_CHANGELOGS }}" >> "$GITHUB_STEP_SUMMARY" fi else - echo -e "✅ Version label validation passed\n" >> "$GITHUB_STEP_SUMMARY" + echo "✅ Version label validation passed" >> "$GITHUB_STEP_SUMMARY" fi - echo "**Untracked Version**: ${{ env.HAS_UNTRACKED_VERSION }}" >> "$GITHUB_STEP_SUMMARY" + echo -e "\n**Untracked Version**: ${{ env.HAS_UNTRACKED_VERSION }}" >> "$GITHUB_STEP_SUMMARY" if [ -n "${{ env.COMPONENT_VERSION_LABELS }}" ]; then echo "**Component Versions**: ${{ env.COMPONENT_VERSION_LABELS }}" >> "$GITHUB_STEP_SUMMARY" fi - - name: Tags Job Summary + - name: Write Tags Job Summary + id: tags_summary env: HAS_UNTRACKED_VERSION: ${{ steps.validate.outputs.hasUntrackedVersion }} VALID_COMPONENTS: ${{ steps.validate.outputs.validComponents }} run: | - echo "## Tags" >> "$GITHUB_STEP_SUMMARY" - if [ "${{ env.HAS_UNTRACKED_VERSION }}" = "true" ]; then - echo "- No Tags will be created on main" >> "$GITHUB_STEP_SUMMARY" + summary="## Tags\n" + if [ "${HAS_UNTRACKED_VERSION}" = "true" ]; then + summary+="- No Tags will be created on main" else - echo -e "The following tags will be created on main after merge\n" >> "$GITHUB_STEP_SUMMARY" - echo "${{ env.VALID_COMPONENTS }}" | tr ',' '\n' | while read -r component_version; do - echo "🏷️ \`$component_version\`" >> "$GITHUB_STEP_SUMMARY" + summary+="The following tags will be created on main after merge\n\n" + IFS=',' read -ra components <<< "${VALID_COMPONENTS}" + for component_version in "${components[@]}"; do + summary+="🏷️ \\`$component_version\\`\n" done + # Remove trailing newline + summary="${summary%\\n}" fi - - name: Tags PR Summary - uses: actions/github-script@v7 - env: - PR_NUMBER: ${{ github.event.pull_request.number }} - HAS_UNTRACKED_VERSION: ${{ steps.validate.outputs.hasUntrackedVersion }} - VALID_COMPONENTS: ${{ steps.validate.outputs.validComponents }} + echo -e "$summary" >> "$GITHUB_STEP_SUMMARY" + + summary_escaped="${summary//$'\n'/\\n}" + echo "summary=$summary_escaped" >> "$GITHUB_OUTPUT" + + - name: Write Tags to PR Comment + uses: ./.github/actions/upsert-pr-comment with: + pr-number: ${{ github.event.pull_request.number }} github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const prNumber = parseInt(process.env.PR_NUMBER, 10); - const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; - let summary = ''; - - if (process.env.HAS_UNTRACKED_VERSION === 'true') { - summary += '- No Tags will be created on main after merge.\n'; - } else { - summary += 'The following tags will be created on main after merge:\n\n'; - const components = process.env.VALID_COMPONENTS.split(','); - for (const component_version of components) { - summary += `🏷️ \`${component_version.trim()}\`\n`; - } - } - - const body = ` - ## Version Tags - ${summary.trim()} - - [View run](${runUrl}) - `; - - // Upsert by hidden marker to avoid duplicate comments - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - per_page: 100, - }); - - const existing = comments.find(c => c.body && c.body.includes("")); - if (existing) { - await github.rest.issues.updateComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: existing.id, - body, - }); - } else { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - body, - }); - } + comment-marker: version-summary + body-content: ${{ steps.tags_summary.outputs.summary }} diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index f9032b2..86b6a79 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -239,7 +239,7 @@ jobs: const errors = Number(process.env.NUM_ERRORS || '0'); const warns = Number(process.env.NUM_WARNINGS || '0'); const info = Number(process.env.NUM_INFO || '0'); - const configSummary = process.env.CONFIG_SUMMARY || ''; + const configSummary = (process.env.CONFIG_SUMMARY || '').replace(/\\n/g, '\n'); const emoji = scanStatus === 'success' ? "✅" : "❌"; const statusText = scanStatus === 'success' ? "passed" : "failed"; @@ -251,13 +251,15 @@ jobs: if (warns > 0) parts.push(`${warns} warning${warns === 1 ? "" : "s"}`); if (info > 0) parts.push(`${info} info`); - const findings = total > 0 ? `\n\n**Findings:** ${parts.join(", ")}` : "\n\n🎉 No security issues found!"; + const findings = total > 0 ? `\n\n${parts.join(", ")}` : "\n\n🎉 No security issues found!"; const runUrl = `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`; const body = `${heading} ${configSummary} + ### Findings ${findings} + [View run](${runUrl}) 🤖 Powered by Semgrep + reviewdog`; @@ -265,7 +267,7 @@ jobs: - name: Upsert PR comment if: ${{ github.event_name == 'pull_request' || steps.pr_check.outputs.pr_exists == 'true' }} - uses: ./.github/actions/pr-comment-upsert + uses: ./.github/actions/upsert-pr-comment with: pr-number: ${{ steps.pr_check.outputs.pr_number }} github-token: ${{ secrets.GITHUB_TOKEN }} @@ -273,7 +275,7 @@ jobs: body-content: ${{ steps.generate_body.outputs.body }} - name: Fail on findings at/above threshold - if: ${{ steps.semgrep.outputs.scanStatus == 'failed' }} + if: ${{ steps.semgrep.outputs.scanStatus == 'failure' }} env: FAIL_SEVERITY: ${{ inputs.fail_severity }} run: | diff --git a/scripts/internal-ci/validate-version-labels/index.js b/scripts/internal-ci/validate-version-labels/index.js index 6fd4162..77bae1b 100644 --- a/scripts/internal-ci/validate-version-labels/index.js +++ b/scripts/internal-ci/validate-version-labels/index.js @@ -205,11 +205,14 @@ if (require.main === module) { const githubOutput = process.env.GITHUB_OUTPUT; if (githubOutput) { fs.appendFileSync(githubOutput, `isValid=${isValid}\n`); - fs.appendFileSync(githubOutput, `invalidVersionLabels=${invalidVersionLabels.join(',')}\n`); + fs.appendFileSync(githubOutput, `invalidVersionLabels=${invalidVersionLabels.join(', ')}\n`); fs.appendFileSync(githubOutput, `hasUntrackedVersion=${hasUntrackedVersion}\n`); - fs.appendFileSync(githubOutput, `componentVersionLabels=${componentVersionLabels.join(',')}\n`); - fs.appendFileSync(githubOutput, `invalidComponents=${invalidComponents.join(',')}\n`); - fs.appendFileSync(githubOutput, `missingChangelogs=${missingChangelogs.join(',')}\n`); + fs.appendFileSync( + githubOutput, + `componentVersionLabels=${componentVersionLabels.join(', ')}\n` + ); + fs.appendFileSync(githubOutput, `invalidComponents=${invalidComponents.join(', ')}\n`); + fs.appendFileSync(githubOutput, `missingChangelogs=${missingChangelogs.join(', ')}\n`); fs.appendFileSync( githubOutput, `validationMessage=${validationMessage.replace(/\n/g, '\\n')}\n` From 4dfd1f216dfe22dfe28bab211016f65d76d413f2 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Tue, 23 Dec 2025 14:54:56 -0500 Subject: [PATCH 10/11] remove finding --- scripts/shared/run-semgrep.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/shared/run-semgrep.js b/scripts/shared/run-semgrep.js index 6f22668..42026ae 100644 --- a/scripts/shared/run-semgrep.js +++ b/scripts/shared/run-semgrep.js @@ -119,7 +119,7 @@ function constructSemgrepCommand(baseline, config, resultsFileName) { function runSemgrepAndCapture(semgrepCmd, resultsFileName) { // we are trusting internal engineers not to pass untrusted arguments to semgrep GHA - + // nosemgrep: javascript.lang.security.detect-child-process.detect-child-process const result = spawnSync(semgrepCmd, { encoding: 'utf-8', shell: true }); if (result.error) { From 9af23952fb80dcc417c34f5f474a31cf99c78471 Mon Sep 17 00:00:00 2001 From: ssvoss Date: Tue, 23 Dec 2025 17:06:27 -0500 Subject: [PATCH 11/11] add support for workflow versioning --- .../workflows/CHANGELOGS/run_semgrep_scan.md | 17 +++++ .github/workflows/README.md | 47 +++++++++++-- .github/workflows/READMES/run_semgrep_scan.md | 69 +++++++++++++++++++ ...ernal_on_pr_validate_component_version.yml | 5 +- .github/workflows/run_semgrep_scan.yml | 1 + README.md | 23 ++++--- VERSIONING.md | 26 ++++--- .../validate-version-labels/index.js | 26 +++++-- .../index.unit.test.js | 66 +++++++++++++----- 9 files changed, 231 insertions(+), 49 deletions(-) create mode 100644 .github/workflows/CHANGELOGS/run_semgrep_scan.md create mode 100644 .github/workflows/READMES/run_semgrep_scan.md diff --git a/.github/workflows/CHANGELOGS/run_semgrep_scan.md b/.github/workflows/CHANGELOGS/run_semgrep_scan.md new file mode 100644 index 0000000..7e432a7 --- /dev/null +++ b/.github/workflows/CHANGELOGS/run_semgrep_scan.md @@ -0,0 +1,17 @@ +# Run Semgrep Scan Workflow Changelog + +All notable changes to the **run_semgrep_scan** callable workflow are documented in this file. + +## 1.0.0 + +### Added + +- First official release of the `run_semgrep_scan` workflow. +- Supports both full and diff/baseline scan modes. +- Configurable via `workflow_call` inputs for rulesets, targets, fail severity, and more. +- Integrates with PRs and pushes, posting findings to Actions UI, Job Summary, PR comments, and Reviewdog. +- Outputs scan results, config summary, and normalized baseline for downstream jobs. +- Replaces previous usage under the `legacy-stable` tag with a versioned, documented workflow. + - Refactored code for maintainability. + - Added support for specifying Semgrep version, multiple rulesets, specific targets, and extra arguments. + - Note: Some input defaults have changed and may be breaking for consumers. diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 7384500..72770b1 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -1,9 +1,44 @@ -# GitHub Action Workflows +# GitHub Workflows Directory -## Naming Convention +This directory contains externally reusable and internal, project-specific GitHub Actions workflows for this repository. -- **Reusable workflows** (those that expose `workflow_call`) are treated as **products** of this repo. - Their filenames should describe what they do, e.g. `deploy_environment.yml`, `tf_apply.yml`. +## Internal Workflows -- **Internal workflows** (used only by this repository and never exposed via `workflow_call`) - must be prefixed with: `internal_` +Internal workflows are used by this repository for the pipeline of its products. They must never expose `workflow_call` and must be prefixed with `internal_` + +## Reusable Workflows + +This repository exposes externally reusable workflows, those that expose `workflow_call`. These workflows are treated as **products** of this repo. + +### Requirements + +- Only `.yml` files are considered valid workflow definitions. +- Workflow, changelog, and README file names must match the workflow name (excluding the `.yml` extension). +- CHANGELOGs and READMEs must be kept up to date with any changes to the workflow. + +- **Workflow YAML files:** + - Workflow definitions must be placed directly in this directory. + - Their filenames should describe what they do, e.g. `deploy_environment.yml`, `tf_apply.yml`. + - File name: `{workflow_name}.yml` + - Example: `run_semgrep_scan.yml` + +- **Changelog files:** + - Each workflow must have a corresponding changelog documenting all notable changes. + - Path: `CHANGELOGS/{workflow_name}.md` + - Example: `CHANGELOGS/run_semgrep_scan.md` + +- **README files:** + - Each workflow should have a README describing its purpose, usage, inputs, and outputs. + - Path: `READMEs/{workflow_name}.md` + - Example: `READMEs/run_semgrep_scan.md` + +### Example Structure + +```text +.github/workflows/ + run_semgrep_scan.yml + CHANGELOGS/ + run_semgrep_scan.md + READMEs/ + run_semgrep_scan.md +``` diff --git a/.github/workflows/READMES/run_semgrep_scan.md b/.github/workflows/READMES/run_semgrep_scan.md new file mode 100644 index 0000000..a836408 --- /dev/null +++ b/.github/workflows/READMES/run_semgrep_scan.md @@ -0,0 +1,69 @@ +# Run Semgrep Scan + +This workflow runs [Semgrep](https://semgrep.dev/) on your repository to perform static code analysis, to report security issues, bugs, and code quality problems. + +## Purpose + +The `run_semgrep_scan` workflow designed to be reusable and configurable for different scan scenarios. It supports both full and differential scans, integrates with pushes and PRs, and can be customized for different rule sets, targets, and failure thresholds. The workflow is intended to: + +- Enforce code security and quality standards +- Catch issues early in the development lifecycle +- Provide actionable feedback directly in GitHub + +## Usage + +This workflow is intended to be called by other workflows using `workflow_call`. + +### Inputs + +You can customize the scan by providing the following inputs: + +| Input Name | Type | Default | Description | +|-------------------------|---------|-------------------|-------------------------------------------------------------------| +| `commit_identifier` | string | (required) | Commit SHA or ref to scan | +| `cancel_in_progress` | boolean | true | Cancel in-progress run for the same ref | +| `semgrep_config` | string | p/default | Semgrep rulesets to use (YAML array, newline, or space-separated) | +| `semgrep_targets` | string | . | Files/directories to scan | +| `extra_args` | string | '' | Additional arguments to pass to Semgrep | +| `semgrep_version` | string | '' | Semgrep version to install | +| `fail_severity` | string | error | Minimum severity to fail the workflow (`error`, `warning`, `info`)| +| `semgrep_scan_mode` | string | full | Scan mode: `full`, `diff`, or `baseline` | +| `baseline_ref` | string | origin/main | Ref for diff/baseline scans | +| `reviewdog_filter_mode` | string | nofilter | Reviewdog display filter: `added`, `diff_context`, `nofilter` | +| `reviewdog_reporter` | string | github-pr-review | Reviewdog reporter type | + +See the workflow file for full input documentation and defaults. + +### How it works + +1. Checks out the code at the specified commit or ref. +2. Checks for an open PR and normalizes settings if one is found. +3. Installs dependencies and Semgrep (customizable version). +4. Runs Semgrep with the provided configuration and scan mode. +5. Summarizes findings and posts results to the Actions UI, Job Summary, and if applicable, PR comments and Reviewdog review. +6. Fails the workflow if findings meet or exceed the configured severity threshold. + +## Outputs + +The workflow provides the following outputs for use in downstream jobs or for reporting: + +- `total_findings`: Total number of findings +- `error_count`: Number of ERROR findings +- `warning_count`: Number of WARNING findings +- `info_count`: Number of INFO findings +- `scan_status`: `success` or `failure` based on findings and fail threshold +- `scan_md_summary`: Markdown summary of findings +- `config_md_summary`: Markdown summary of the config settings used +- `normalized_baseline`: The resolved baseline ref used for diff/baseline scans + +Findings are also posted as PR comments and Reviewdog annotations (if enabled), and a summary is written to the GitHub Actions job summary. + +## Contribution + +- Update the workflow file and related javascript file +- Update the README and CHANGELOG +- Create a PR and set a version label following [versioning instructions](../../../VERSIONING.md) + +## References + +- [Semgrep Documentation](https://semgrep.dev/docs/) diff --git a/.github/workflows/internal_on_pr_validate_component_version.yml b/.github/workflows/internal_on_pr_validate_component_version.yml index ccfcba0..9fed760 100644 --- a/.github/workflows/internal_on_pr_validate_component_version.yml +++ b/.github/workflows/internal_on_pr_validate_component_version.yml @@ -101,13 +101,14 @@ jobs: HAS_UNTRACKED_VERSION: ${{ steps.validate.outputs.hasUntrackedVersion }} VALID_COMPONENTS: ${{ steps.validate.outputs.validComponents }} run: | + set -x summary="## Tags\n" if [ "${HAS_UNTRACKED_VERSION}" = "true" ]; then summary+="- No Tags will be created on main" else summary+="The following tags will be created on main after merge\n\n" - IFS=',' read -ra components <<< "${VALID_COMPONENTS}" - for component_version in "${components[@]}"; do + IFS=',' + for component_version in $VALID_COMPONENTS; do summary+="🏷️ \\`$component_version\\`\n" done # Remove trailing newline diff --git a/.github/workflows/run_semgrep_scan.yml b/.github/workflows/run_semgrep_scan.yml index 86b6a79..00dfb89 100644 --- a/.github/workflows/run_semgrep_scan.yml +++ b/.github/workflows/run_semgrep_scan.yml @@ -88,6 +88,7 @@ jobs: info_count: ${{ steps.semgrep.outputs.numInfo }} scan_status: ${{ steps.semgrep.outputs.scanStatus }} scan_md_summary: ${{ steps.semgrep.outputs.scanFindings }} + config_md_summary: ${{ steps.semgrep.outputs.configSummary }} normalized_baseline: ${{ steps.semgrep.outputs.normalizedBaseline }} steps: diff --git a/README.md b/README.md index 98d5d92..0f68366 100644 --- a/README.md +++ b/README.md @@ -68,13 +68,23 @@ To get started: - install semgrep globally `brew install semgrep` - install project dependencies `npm ci` +### 🏷️ Versioning Policy Overview + +A complete policy is defined in [VERSIONING.md](VERSIONING.md). Highlights: + +- Versioned components use namespaced tags: actions/{component-name}/vX.Y.Z +- PRs modifying a component must include a version label +- version:untracked is allowed for non-behavior changes +- Version/changelog validation runs automatically on PRs +- Tags are automatically created when changes merge into main + ### 🚧 Reusable Workflows (Work in Progress) The Core Services team is moving away from composite actions and building **reusable workflows** in this repository. -### Current scope +At this stage, the reusable workflows support **Terraform-only projects**. They are still evolving and are not yet versioned. While they can be consumed by other repositories, their API is not considered stable. Their contracts remain subject to change until the versioning model expands to reusable workflows. These workflows should be referenced by the `legacy-stable` tag. This allows us to make changes to bring the workflows under versioning safely. -At this stage, the reusable workflows support **Terraform-only projects**. They are still evolving and are not yet versioned. While they can be consumed by other repositories, their API is not considered stable. Their contracts remain subject to change until the versioning model expands to reusable workflows. +You can check the `./github/workflows/CHANGELOGS` directory to know which workflows are under versioning and you should reference them by their namespaced version tags. ### 🧩 Versioned Composite Actions @@ -92,12 +102,3 @@ Each action: - Requires version labels on PRs (e.g., version:pr-open-check/1.0.0) Versioning ensures that consumers can safely upgrade without unexpected breaking changes. - -🏷️ Versioning Policy Overview - -- A complete policy is defined in VERSIONING.md. Highlights: -- Versioned components use namespaced tags: actions/{component-name}/vX.Y.Z -- PRs modifying a component must include a version label -- version:untracked is allowed for non-behavior changes -- Version/changelog validation runs automatically on PRs -- Tags are automatically created when changes merge into main diff --git a/VERSIONING.md b/VERSIONING.md index 3b29476..358bde5 100644 --- a/VERSIONING.md +++ b/VERSIONING.md @@ -7,9 +7,6 @@ This repository contains **reusable GitHub Actions and reusable workflows** that **Current** - Composite Actions that live under `./github/actions` - -**Future** - - Reusable workflows that live under `./github/workflows` **Excluded** @@ -33,14 +30,15 @@ Versioned components use namespaced semantic version tags so multiple components **Format** ```text -action//vX.Y.Z +actions//vX.Y.Z ``` **Examples** ```text -action/pr-open-check/v1.0.0 -action/pr-upsert-comment/v0.2.3 +actions/pr-open-check/2.0.0 +actions/upsert-pr-comment/1.0.0 +workflows/run_semgrep_scan/1.0.0 ``` **Semantic Versioning** @@ -112,15 +110,23 @@ A PR cannot merge until version validation passes. Each versioned component must have its own changelog. The changelog is the authoritative source of truth for released versions of that component. **Located At** +Action changelogs live in their component directory under `.github/actions/`. ```text .github/actions//CHANGELOG.md ``` +Workflow changelogs live in a component file under the `.github/workflows/CHANGELOGS` directory + +```text +.github/workflows/CHANGELOGS/.md +``` + **Examples** ```text .github/actions/pr-open-check/CHANGELOG.md +.github/workflows/CHANGELOGS/run_semgrep_scan.md ``` ### Required Format @@ -151,15 +157,17 @@ Minimum requirements: - The header must contain `## X.Y.Z` exactly (this is what validation looks for). - The PR must add or update an entry for the version used in the label - (version:/X.Y.Z). + (version:{component-name}/X.Y.Z). The rest of the content (sections and bullets) is for humans, but strongly recommended. ### Interaction with Labels For versioned releases, A label like `version:actions/pr-open-check/1.2.0` -requires that CHANGELOG.md under `.github/actions/pr-open-check/` contain a `## 1.2.0` entry. +requires that the `CHANGELOG.md` file under `.github/actions/pr-open-check/` contain a `## 1.2.0` entry. + +A label like `version:workflows/run_semgrep_scan/1.0.0` requires a `run_semgrep_scan.md` file under `.github/workflows/CHANGELOGS` containing a `## 1.0.0` entry. Validation will fail if: -- A vX.Y.Z label is present but `## X.Y.Z` does not appear in the matching component’s CHANGELOG.md. +- A X.Y.Z label is present but `## X.Y.Z` does not appear in the matching component’s CHANGELOG diff --git a/scripts/internal-ci/validate-version-labels/index.js b/scripts/internal-ci/validate-version-labels/index.js index 77bae1b..0938e01 100644 --- a/scripts/internal-ci/validate-version-labels/index.js +++ b/scripts/internal-ci/validate-version-labels/index.js @@ -80,13 +80,18 @@ function getInvalidComponents(componentVersionMap) { let invalidComponents = []; for (const path of Object.keys(componentVersionMap)) { - if (!path.startsWith('actions/')) { - invalidComponents.push(path); - } else { + if (path.startsWith('actions/')) { const expectedDir = `.github/${path}`; if (!fs.existsSync(expectedDir) || !fs.lstatSync(expectedDir).isDirectory()) { invalidComponents.push(path); } + } else if (path.startsWith('workflows/')) { + const expectedFile = `.github/${path}.yml`; + if (!fs.existsSync(expectedFile) || !fs.lstatSync(expectedFile).isFile) { + invalidComponents.push(path); + } + } else { + invalidComponents.push(path); } } @@ -98,8 +103,19 @@ function getInvalidComponents(componentVersionMap) { function getMissingChangelogs(componentVersionMap) { let missingChangelogs = []; for (const path of Object.keys(componentVersionMap)) { - // Expected changelog path: .github//CHANGELOG.md - const changelogPath = `.github/${path}/CHANGELOG.md`; + let changelogPath; + if (path.startsWith('actions/')) { + // Expected changelog path: .github//CHANGELOG.md + changelogPath = `.github/${path}/CHANGELOG.md`; + } else if (path.startsWith('workflows/')) { + // Expected changelog path: .github/workflows/CHANGELOGS/.md + const workflowName = path.replace(/^workflows\//, ''); + changelogPath = `.github/workflows/CHANGELOGS/${workflowName}.md`; + } else { + // Unknown type, skip + missingChangelogs.push(path); + continue; + } if (!fs.existsSync(changelogPath)) { missingChangelogs.push(path); continue; diff --git a/scripts/internal-ci/validate-version-labels/index.unit.test.js b/scripts/internal-ci/validate-version-labels/index.unit.test.js index aeb2daa..d048fa8 100644 --- a/scripts/internal-ci/validate-version-labels/index.unit.test.js +++ b/scripts/internal-ci/validate-version-labels/index.unit.test.js @@ -96,22 +96,23 @@ describe('getInvalidComponents', () => { let lstatSyncMock; // Simulate a static directory structure - const directorySet = new Set([ - '.github/actions/valid-component', - '.github/actions/not-a-directory', - ]); - const fileTypeMap = { - '.github/actions/valid-component': true, - '.github/actions/not-a-directory': false, + const pathTypeMap = { + '.github/actions/valid-component': 'directory', // valid action directory + '.github/workflows/valid-workflow.yml': 'file', // valid workflow file + '.github/actions/not-a-directory': 'file', // invalid action, extensionless file instead of directory + '.github/workflows/not-a-file': 'directory', // invalid workflow - subdirectory instead of file + '.github/workflows/not-yaml.txt': 'file', // invalid workflow - wrong file type + '.github/foo/bar.yml': 'file', // invalid type }; beforeAll(() => { existsSyncMock = jest .spyOn(fs, 'existsSync') - .mockImplementation(path => directorySet.has(path)); - lstatSyncMock = jest - .spyOn(fs, 'lstatSync') - .mockImplementation(path => ({ isDirectory: () => fileTypeMap[path] === true })); + .mockImplementation(path => Object.prototype.hasOwnProperty.call(pathTypeMap, path)); + lstatSyncMock = jest.spyOn(fs, 'lstatSync').mockImplementation(path => ({ + isDirectory: () => pathTypeMap[path] === 'directory', + isFile: () => pathTypeMap[path] === 'file', + })); }); afterAll(() => { @@ -125,20 +126,28 @@ describe('getInvalidComponents', () => { expect(invalidComponents).toEqual([]); }); - it('should return paths not starting with actions/', () => { + it('should return paths not starting with actions/ or workflows/', () => { const componentVersionMap = { - 'dir/component': '1.0.0', + 'actions/valid-component': '1.0.0', + 'workflows/valid-workflow': '1.0.0', + 'foo/bar': '1.0.0', }; const invalidComponents = getInvalidComponents(componentVersionMap); - expect(invalidComponents).toEqual(['dir/component']); + expect(invalidComponents).toEqual(['foo/bar']); }); - it('should return paths where the expected directory does not exist', () => { + it('should return paths where the expected path does not exist', () => { const componentVersionMap = { 'actions/nonexistent-component': '1.0.0', + 'workflows/nonexistent-workflow': '1.0.0', + 'actions/valid-component': '1.0.0', + 'workflows/valid-workflow': '1.0.0', }; const invalidComponents = getInvalidComponents(componentVersionMap); - expect(invalidComponents).toEqual(['actions/nonexistent-component']); + expect(invalidComponents).toEqual([ + 'actions/nonexistent-component', + 'workflows/nonexistent-workflow', + ]); }); it('should return paths where the expected directory is not a directory', () => { @@ -152,10 +161,20 @@ describe('getInvalidComponents', () => { it('should return an empty array when all components are valid', () => { const componentVersionMap = { 'actions/valid-component': '1.0.0', + 'workflows/valid-workflow': '1.0.0', }; const invalidComponents = getInvalidComponents(componentVersionMap); expect(invalidComponents).toEqual([]); }); + + it('should return paths where the expected workflow is not a YAML file', () => { + const componentVersionMap = { + 'workflows/not-a-file': '1.0.0', + 'workflows/not-yaml': '1.0.0', + }; + const invalidComponents = getInvalidComponents(componentVersionMap); + expect(invalidComponents).toEqual(['workflows/not-a-file', 'workflows/not-yaml']); + }); }); // It is assumed that path is correctly formatted when this function is called (dir1/dir2) @@ -171,10 +190,17 @@ describe('getMissingChangelogs', () => { '.github/actions/component-with-changelog/CHANGELOG.md', '.github/actions/component-without-changelog', '.github/actions/component-missing-changelog-entry/CHANGELOG.md', + '.github/workflows/CHANGELOGS/workflow-with-changelog.md', + '.github/workflows/CHANGELOGS/workflow-missing-changelog-entry.md', + '.github/workflows/CHANGELOGS/workflow-without-changelog', + '.github/workflows/wrong_location_CHANGELOG.md', ]); const fileContentMap = { '.github/actions/component-with-changelog/CHANGELOG.md': '## 1.0.0\n- Initial release', '.github/actions/component-missing-changelog-entry/CHANGELOG.md': '', + '.github/workflows/CHANGELOGS/workflow-with-changelog.md': '## 2.0.0\n- Major update', + '.github/workflows/CHANGELOGS/workflow-missing-changelog-entry.md': '', + '.github/workflows/wrong_location_CHANGELOG.md': '## 1.0.0\n- Initial release', }; beforeAll(() => { @@ -200,17 +226,25 @@ describe('getMissingChangelogs', () => { 'actions/component-with-changelog': '1.0.0', 'actions/component-without-changelog': '1.0.0', 'actions/component-missing-changelog-entry': '1.0.0', + 'workflows/workflow-with-changelog': '2.0.0', + 'workflows/workflow-without-changelog': '1.0.0', + 'workflows/workflow-missing-changelog-entry': '2.0.0', + 'workflows/wrong_location': '1.0.0', }; const missingChangelogs = getMissingChangelogs(componentVersionMap); expect(missingChangelogs).toEqual([ 'actions/component-without-changelog', 'actions/component-missing-changelog-entry', + 'workflows/workflow-without-changelog', + 'workflows/workflow-missing-changelog-entry', + 'workflows/wrong_location', ]); }); it('should return an empty array when all changelogs are present and correct', () => { const componentVersionMap = { 'actions/component-with-changelog': '1.0.0', + 'workflows/workflow-with-changelog': '2.0.0', }; const missingChangelogs = getMissingChangelogs(componentVersionMap); expect(missingChangelogs).toEqual([]);