diff --git a/.github/workflows/bench-command.yml b/.github/workflows/bench-command.yml new file mode 100644 index 0000000..6756703 --- /dev/null +++ b/.github/workflows/bench-command.yml @@ -0,0 +1,592 @@ +name: Benchmark Command +# Trigger on-demand benchmarks via PR comments +# Usage: /bench [iterations] [sizes] +# Examples: +# /bench main v0.13.0 +# /bench abc123 def456 100 1000,5000,10000 +# Only repository owner can trigger this command + +on: + issue_comment: + types: [created] + +# Prevent concurrent benchmark runs on the same PR +concurrency: + group: bench-${{ github.event.issue.number }} + cancel-in-progress: true + +jobs: + check-permission: + name: Check Command Permission + # Only run on PR comments (not regular issues) + if: | + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/bench ') + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + outputs: + authorized: ${{ steps.check.outputs.authorized }} + ref1: ${{ steps.parse.outputs.ref1 }} + ref2: ${{ steps.parse.outputs.ref2 }} + iterations: ${{ steps.parse.outputs.iterations }} + sizes: ${{ steps.parse.outputs.sizes }} + steps: + - name: Check if commenter is repo owner + id: check + uses: actions/github-script@v7 + with: + script: | + const commenter = context.payload.comment.user.login; + const owner = context.payload.repository.owner.login; + const isOwner = commenter === owner; + + console.log(`Commenter: ${commenter}`); + console.log(`Repository owner: ${owner}`); + console.log(`Is owner: ${isOwner}`); + + if (!isOwner) { + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: '-1' + }); + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: `❌ **Permission denied**: Only @${owner} can trigger benchmark comparisons.` + }); + } else { + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: 'eyes' + }); + } + + core.setOutput('authorized', isOwner); + + - name: Parse benchmark command + id: parse + if: steps.check.outputs.authorized == 'true' + continue-on-error: true + run: | + set -euo pipefail + COMMENT="${{ github.event.comment.body }}" + + # Parse command: /bench ref1 ref2 [iterations] [sizes] + # Remove /bench prefix and extract parameters + PARAMS=$(echo "$COMMENT" | sed 's|^/bench[[:space:]]*||') + + # Extract parameters + REF1=$(echo "$PARAMS" | awk '{print $1}') + REF2=$(echo "$PARAMS" | awk '{print $2}') + ITERATIONS=$(echo "$PARAMS" | awk '{print $3}') + SIZES=$(echo "$PARAMS" | awk '{print $4}') + + # Validate required parameters + if [ -z "$REF1" ] || [ -z "$REF2" ]; then + echo "error=Invalid format. Missing required parameters." >> $GITHUB_OUTPUT + echo "parse_failed=true" >> $GITHUB_OUTPUT + exit 1 + fi + + # Set defaults for optional parameters + if [ -z "$ITERATIONS" ] || ! [[ "$ITERATIONS" =~ ^[0-9]+$ ]]; then + ITERATIONS=100 + fi + + if [ -z "$SIZES" ]; then + SIZES="1000,5000,10000" + fi + + # Validate sizes format (comma-separated numbers) + if ! echo "$SIZES" | grep -qE '^[0-9]+(,[0-9]+)*$'; then + echo "error=Invalid sizes format: $SIZES" >> $GITHUB_OUTPUT + echo "parse_failed=true" >> $GITHUB_OUTPUT + exit 1 + fi + + echo "ref1=$REF1" >> $GITHUB_OUTPUT + echo "ref2=$REF2" >> $GITHUB_OUTPUT + echo "iterations=$ITERATIONS" >> $GITHUB_OUTPUT + echo "sizes=$SIZES" >> $GITHUB_OUTPUT + echo "parse_failed=false" >> $GITHUB_OUTPUT + + echo "Parsed parameters:" + echo " ref1: $REF1" + echo " ref2: $REF2" + echo " iterations: $ITERATIONS" + echo " sizes: $SIZES" + + - name: Post parse error + if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'failure' + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: 'confused' + }); + + const errorMessage = [ + '❌ **Invalid command format**', + '', + '**Usage:** `/bench [iterations] [sizes]`', + '', + '**Examples:**', + '```', + '/bench main v0.13.0', + '/bench abc123 def456 100 1000,5000,10000', + '```', + '', + '**Parameters:**', + '- `ref1` (required): Baseline git reference', + '- `ref2` (required): Current git reference', + '- `iterations` (optional): Number of iterations (default: 100)', + '- `sizes` (optional): Comma-separated sizes (default: 1000,5000,10000)' + ].join('\n'); + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: errorMessage + }); + + - name: Post acknowledgment + if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'success' + uses: actions/github-script@v7 + with: + script: | + const ref1 = '${{ steps.parse.outputs.ref1 }}'; + const ref2 = '${{ steps.parse.outputs.ref2 }}'; + const iterations = '${{ steps.parse.outputs.iterations }}'; + const sizes = '${{ steps.parse.outputs.sizes }}'; + + const message = [ + '🚀 **Benchmark comparison started**', + '', + '**Comparing:**', + `- **Baseline**: \`${ref1}\``, + `- **Current**: \`${ref2}\``, + '', + '**Parameters:**', + `- **Iterations**: ${iterations}`, + `- **Sizes**: ${sizes}`, + '', + 'Results will be posted here when complete...' + ].join('\n'); + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: message + }); + + run-benchmarks: + name: Run Benchmark Comparison + needs: check-permission + if: needs.check-permission.outputs.authorized == 'true' && needs.check-permission.outputs.ref1 != '' + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Need full history to access all refs + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + + - name: Fetch refs from remote + run: | + set -euo pipefail + REF1="${{ needs.check-permission.outputs.ref1 }}" + REF2="${{ needs.check-permission.outputs.ref2 }}" + + echo "Fetching ref1: $REF1" + git fetch origin "$REF1" || git fetch origin "refs/tags/$REF1" || git fetch origin "refs/heads/$REF1" || true + + echo "Fetching ref2: $REF2" + git fetch origin "$REF2" || git fetch origin "refs/tags/$REF2" || git fetch origin "refs/heads/$REF2" || true + + # Update remote refs + git fetch origin --tags + + - name: Validate and order refs + id: validate + run: | + set -euo pipefail + REF1="${{ needs.check-permission.outputs.ref1 }}" + REF2="${{ needs.check-permission.outputs.ref2 }}" + + # Validate both refs exist + if ! git rev-parse --verify "$REF1" >/dev/null 2>&1; then + echo "error=Ref '$REF1' not found" >> $GITHUB_OUTPUT + exit 1 + fi + + if ! git rev-parse --verify "$REF2" >/dev/null 2>&1; then + echo "error=Ref '$REF2' not found" >> $GITHUB_OUTPUT + exit 1 + fi + + # Resolve to full SHAs + SHA1=$(git rev-parse "$REF1") + SHA2=$(git rev-parse "$REF2") + + # Check if both refs resolve to the same commit + if [ "$SHA1" = "$SHA2" ]; then + echo "same_commit=true" >> $GITHUB_OUTPUT + echo "ref1_sha=$(git rev-parse --short $REF1)" >> $GITHUB_OUTPUT + echo "ref2_sha=$(git rev-parse --short $REF2)" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "same_commit=false" >> $GITHUB_OUTPUT + + # Determine which is older (baseline) and newer (current) + # Get commit timestamps + TIMESTAMP1=$(git log -1 --format=%ct "$SHA1") + TIMESTAMP2=$(git log -1 --format=%ct "$SHA2") + + if [ "$TIMESTAMP1" -le "$TIMESTAMP2" ]; then + # REF1 is older or same age -> baseline + BASELINE_REF="$REF1" + BASELINE_SHA="$SHA1" + CURRENT_REF="$REF2" + CURRENT_SHA="$SHA2" + else + # REF2 is older -> baseline + BASELINE_REF="$REF2" + BASELINE_SHA="$SHA2" + CURRENT_REF="$REF1" + CURRENT_SHA="$SHA1" + fi + + # Output the determined ordering + echo "baseline_ref=$BASELINE_REF" >> $GITHUB_OUTPUT + echo "baseline_sha=$(git rev-parse --short $BASELINE_SHA)" >> $GITHUB_OUTPUT + echo "current_ref=$CURRENT_REF" >> $GITHUB_OUTPUT + echo "current_sha=$(git rev-parse --short $CURRENT_SHA)" >> $GITHUB_OUTPUT + + # Keep original refs for display + echo "ref1_sha=$(git rev-parse --short $REF1)" >> $GITHUB_OUTPUT + echo "ref2_sha=$(git rev-parse --short $REF2)" >> $GITHUB_OUTPUT + + echo "Determined ordering:" + echo " Baseline (older): $BASELINE_REF ($BASELINE_SHA)" + echo " Current (newer): $CURRENT_REF ($CURRENT_SHA)" + + - name: Handle same commit case + if: steps.validate.outputs.same_commit == 'true' + uses: actions/github-script@v7 + with: + script: | + const ref1 = '${{ needs.check-permission.outputs.ref1 }}'; + const ref2 = '${{ needs.check-permission.outputs.ref2 }}'; + const sha = '${{ steps.validate.outputs.ref1_sha }}'; + + const message = [ + '⚠️ **Same commit detected**', + '', + `Both \`${ref1}\` and \`${ref2}\` resolve to the same commit: \`${sha}\``, + '', + 'No benchmark comparison needed - the refs are identical.', + '', + '**Tip:** To compare different versions, use refs that point to different commits.' + ].join('\n'); + + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: ${{ github.event.comment.id }}, + content: 'eyes' + }); + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: message + }); + + - name: Check benchmark tool exists in baseline + if: steps.validate.outputs.same_commit == 'false' + id: check_baseline_tool + run: | + set -euo pipefail + BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}" + echo "Checking out $BASELINE_REF..." + git checkout "$BASELINE_REF" + + # Check if bench_throughput binary is defined in Cargo.toml + if ! grep -q 'name = "bench_throughput"' Cargo.toml 2>/dev/null; then + echo "exists=false" >> $GITHUB_OUTPUT + echo "❌ Benchmark tool 'bench_throughput' not found in $BASELINE_REF" + exit 0 + fi + + # Check if the source file exists + if ! grep -A 2 'name = "bench_throughput"' Cargo.toml | grep -q 'path.*='; then + echo "exists=false" >> $GITHUB_OUTPUT + echo "❌ Benchmark tool source file not found in $BASELINE_REF" + exit 0 + fi + + echo "exists=true" >> $GITHUB_OUTPUT + echo "✓ Benchmark tool found in $BASELINE_REF" + + - name: Check benchmark tool exists in current + if: steps.validate.outputs.same_commit == 'false' + id: check_current_tool + run: | + set -euo pipefail + CURRENT_REF="${{ steps.validate.outputs.current_ref }}" + echo "Checking out $CURRENT_REF..." + git checkout "$CURRENT_REF" + + # Check if bench_throughput binary is defined in Cargo.toml + if ! grep -q 'name = "bench_throughput"' Cargo.toml 2>/dev/null; then + echo "exists=false" >> $GITHUB_OUTPUT + echo "❌ Benchmark tool 'bench_throughput' not found in $CURRENT_REF" + exit 0 + fi + + # Check if the source file exists + if ! grep -A 2 'name = "bench_throughput"' Cargo.toml | grep -q 'path.*='; then + echo "exists=false" >> $GITHUB_OUTPUT + echo "❌ Benchmark tool source file not found in $CURRENT_REF" + exit 0 + fi + + echo "exists=true" >> $GITHUB_OUTPUT + echo "✓ Benchmark tool found in $CURRENT_REF" + + - name: Post missing tool error + if: steps.validate.outputs.same_commit == 'false' && (steps.check_baseline_tool.outputs.exists == 'false' || steps.check_current_tool.outputs.exists == 'false') + uses: actions/github-script@v7 + with: + script: | + const baseline_ref = '${{ steps.validate.outputs.baseline_ref }}'; + const current_ref = '${{ steps.validate.outputs.current_ref }}'; + const baseline_exists = '${{ steps.check_baseline_tool.outputs.exists }}' === 'true'; + const current_exists = '${{ steps.check_current_tool.outputs.exists }}' === 'true'; + + let message = '❌ **Benchmark comparison failed**\n\n'; + message += '**Reason**: The benchmark tool (`bench_throughput`) does not exist in '; + + if (!baseline_exists && !current_exists) { + message += `both refs:\n- \`${baseline_ref}\` (baseline/older)\n- \`${current_ref}\` (current/newer)`; + } else if (!baseline_exists) { + message += `baseline ref: \`${baseline_ref}\` (older commit)`; + } else { + message += `current ref: \`${current_ref}\` (newer commit)`; + } + + message += '\n\n**Solution**: The benchmark tool was added in commit `d264124`. Please use refs that include this commit or later.'; + message += '\n\n**Example**: `/bench main HEAD` (if both include the benchmark tool)'; + + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: ${{ github.event.comment.id }}, + content: 'confused' + }); + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: message + }); + + - name: Benchmark baseline (older commit) + if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true' + run: | + set -euo pipefail + BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}" + ITERATIONS="${{ needs.check-permission.outputs.iterations }}" + SIZES="${{ needs.check-permission.outputs.sizes }}" + + echo "Checking out baseline: $BASELINE_REF..." + git checkout "$BASELINE_REF" + + echo "Building benchmark tool..." + if ! cargo build --release --bin bench_throughput 2>&1 | tee build_baseline.log; then + echo "❌ Failed to build benchmark tool for $BASELINE_REF" + exit 1 + fi + + echo "Running benchmarks on baseline..." + ./target/release/bench_throughput \ + --sizes "$SIZES" \ + --iterations "$ITERATIONS" \ + --format json \ + --output benchmark_baseline.json + + - name: Benchmark current (newer commit) + if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true' + run: | + set -euo pipefail + CURRENT_REF="${{ steps.validate.outputs.current_ref }}" + ITERATIONS="${{ needs.check-permission.outputs.iterations }}" + SIZES="${{ needs.check-permission.outputs.sizes }}" + + echo "Checking out current: $CURRENT_REF..." + git checkout "$CURRENT_REF" + + # Rebuild in case dependencies changed + echo "Building benchmark tool..." + if ! cargo build --release --bin bench_throughput 2>&1 | tee build_current.log; then + echo "❌ Failed to build benchmark tool for $CURRENT_REF" + exit 1 + fi + + echo "Running benchmarks on current..." + ./target/release/bench_throughput \ + --sizes "$SIZES" \ + --iterations "$ITERATIONS" \ + --format json \ + --output benchmark_current.json + + - name: Compare results + if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true' + run: | + set -euo pipefail + # Use the comparison script from current (newer) commit + if [ -f scripts/compare_benchmarks.py ]; then + python3 scripts/compare_benchmarks.py \ + benchmark_baseline.json \ + benchmark_current.json > comparison.md + else + echo "❌ Comparison script not found" + exit 1 + fi + + - name: Post results to PR + if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const comparison = fs.readFileSync('comparison.md', 'utf8'); + const baseline_ref = '${{ steps.validate.outputs.baseline_ref }}'; + const current_ref = '${{ steps.validate.outputs.current_ref }}'; + const baseline_sha = '${{ steps.validate.outputs.baseline_sha }}'; + const current_sha = '${{ steps.validate.outputs.current_sha }}'; + const iterations = '${{ needs.check-permission.outputs.iterations }}'; + const sizes = '${{ needs.check-permission.outputs.sizes }}'; + + const body = [ + '## 🔬 Benchmark Comparison Report', + '', + '**Requested by:** @${{ github.event.comment.user.login }}', + '', + '**Comparison:**', + `- **Baseline** (older): \`${baseline_ref}\` (${baseline_sha})`, + `- **Current** (newer): \`${current_ref}\` (${current_sha})`, + '', + '**Parameters:**', + `- **Iterations**: ${iterations}`, + `- **Sizes**: ${sizes}`, + '', + '---', + '', + comparison, + '', + '---', + '', + 'Triggered by [/bench command](${{ github.event.comment.html_url }})' + ].join('\n'); + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body + }); + + - name: Upload benchmark artifacts + if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true' + uses: actions/upload-artifact@v4 + with: + name: benchmark-comparison-${{ github.event.comment.id }} + path: | + benchmark_baseline.json + benchmark_current.json + comparison.md + build_baseline.log + build_current.log + retention-days: 30 + + - name: Add success reaction + if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true' + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: ${{ github.event.comment.id }}, + content: 'rocket' + }); + + handle-error: + name: Handle Errors + needs: [check-permission, run-benchmarks] + if: failure() && needs.check-permission.outputs.authorized == 'true' + runs-on: ubuntu-latest + permissions: + issues: write + steps: + - name: Post error message + uses: actions/github-script@v7 + with: + script: | + const ref1 = '${{ needs.check-permission.outputs.ref1 }}'; + const ref2 = '${{ needs.check-permission.outputs.ref2 }}'; + + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: ${{ github.event.comment.id }}, + content: 'confused' + }); + + const errorBody = [ + '❌ **Benchmark comparison failed**', + '', + `Failed to compare \`${ref1}\` and \`${ref2}\`.`, + '', + '**Please check:**', + '- Both refs exist and are valid git references (branches, tags, or commits)', + '- The benchmark tool exists in both refs', + '- The code at those refs compiles successfully', + '- Parameters are in correct format: `/bench [iterations] [sizes]`', + '', + '**See the [workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.**' + ].join('\n'); + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: errorBody + }); diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml deleted file mode 100644 index 7f937ae..0000000 --- a/.github/workflows/benchmark.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: Performance Benchmarks - -on: - push: - branches: - - main - pull_request: - -jobs: - benchmark: - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - - - name: Cache Rust dependencies - uses: Swatinem/rust-cache@v2 - - - name: Build benchmark tool - run: cargo build --release --bin bench_throughput - - - name: Run benchmarks - run: | - # Run benchmarks with multiple sizes and save to JSON - ./target/release/bench_throughput \ - --sizes 1000,5000,10000 \ - --iterations 100 \ - --format json \ - --output benchmark_results.json - - - name: Download baseline benchmark - id: download-baseline - continue-on-error: true - uses: dawidd6/action-download-artifact@v3 - with: - workflow: benchmark.yml - branch: main - name: benchmark-baseline - path: baseline - if_no_artifact_found: warn - - - name: Compare with baseline - id: compare - run: | - if [ -f baseline/benchmark_results.json ]; then - echo "Baseline found, comparing results..." - python3 scripts/compare_benchmarks.py \ - baseline/benchmark_results.json \ - benchmark_results.json > comparison.md - echo "comparison_available=true" >> $GITHUB_OUTPUT - else - echo "No baseline found, this will become the new baseline" - echo "comparison_available=false" >> $GITHUB_OUTPUT - echo "## Benchmark Results\n\nNo baseline available for comparison. These results will be used as the baseline for future comparisons." > comparison.md - fi - - - name: Comment PR with results - if: github.event_name == 'pull_request' && steps.compare.outputs.comparison_available == 'true' - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const comparison = fs.readFileSync('comparison.md', 'utf8'); - - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: comparison - }); - - - name: Upload current results as artifact - uses: actions/upload-artifact@v4 - with: - name: benchmark-current - path: | - benchmark_results.json - comparison.md - - - name: Fail if significant performance regression - if: steps.compare.outputs.comparison_available == 'true' - run: | - if grep -q "⚠️ PERFORMANCE REGRESSION" comparison.md; then - echo "::warning::Performance regression detected. Review comparison.md for details." - # Uncomment the next line to fail the build on regression - # exit 1 - fi diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 570e3ca..943063c 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -217,9 +217,8 @@ jobs: zsh xz-utils liblz4-tool musl-tools brotli zstd - name: Install Rust - uses: dtolnay/rust-toolchain@master + uses: dtolnay/rust-toolchain@stable with: - toolchain: nightly target: ${{ matrix.target }} # for some reason, the above action doesn't seem to set the target correctly diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c0ce61..67746bc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@nightly + uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - name: Run tests run: cargo test --locked --all-features --workspace -- --nocapture @@ -28,7 +28,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@nightly + uses: dtolnay/rust-toolchain@stable with: components: rustfmt - uses: Swatinem/rust-cache@v2 diff --git a/.github/workflows/update-baseline.yml b/.github/workflows/update-baseline.yml deleted file mode 100644 index 46c1a24..0000000 --- a/.github/workflows/update-baseline.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: Update Benchmark Baseline - -on: - workflow_dispatch: - inputs: - ref: - description: 'Git ref (tag, branch, or commit SHA) to benchmark' - required: true - default: 'main' - type: string - iterations: - description: 'Number of benchmark iterations' - required: false - default: '100' - type: string - -jobs: - update-baseline: - runs-on: ubuntu-latest - permissions: - contents: write - - steps: - - name: Checkout repository at specified ref - uses: actions/checkout@v4 - with: - ref: ${{ inputs.ref }} - fetch-depth: 0 - - - name: Get commit info - id: commit-info - run: | - echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT - echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT - echo "date=$(git log -1 --format=%ci)" >> $GITHUB_OUTPUT - echo "message=$(git log -1 --format=%s)" >> $GITHUB_OUTPUT - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - - - name: Cache Rust dependencies - uses: Swatinem/rust-cache@v2 - - - name: Build benchmark tool - run: cargo build --release --bin bench_throughput - - - name: Run benchmarks - run: | - echo "Running benchmarks for commit ${{ steps.commit-info.outputs.short_sha }}" - ./target/release/bench_throughput \ - --sizes 1000,5000,10000 \ - --iterations ${{ inputs.iterations }} \ - --format json \ - --output benchmark_results.json - - - name: Add workflow summary - run: | - echo "## Baseline Update Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "**Reference:** \`${{ inputs.ref }}\`" >> $GITHUB_STEP_SUMMARY - echo "**Commit:** ${{ steps.commit-info.outputs.sha }}" >> $GITHUB_STEP_SUMMARY - echo "**Date:** ${{ steps.commit-info.outputs.date }}" >> $GITHUB_STEP_SUMMARY - echo "**Message:** ${{ steps.commit-info.outputs.message }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "✅ Baseline has been updated successfully." >> $GITHUB_STEP_SUMMARY - - - name: Upload new baseline - uses: actions/upload-artifact@v4 - with: - name: benchmark-baseline - path: benchmark_results.json - retention-days: 90 - - - name: Baseline updated successfully - run: | - echo "::notice::Baseline successfully updated to commit ${{ steps.commit-info.outputs.short_sha }}" diff --git a/scripts/README.md b/scripts/README.md index 4061422..fb338ba 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -4,12 +4,84 @@ This directory contains scripts used by the GitHub Actions CI/CD pipeline to tra ## Overview -The benchmark CI/CD system automatically: -1. Runs performance benchmarks on every push to `main` and on pull requests -2. Compares results against the baseline (last `main` branch results) -3. Generates a detailed comparison report -4. Comments on PRs with performance changes -5. Warns about significant performance regressions +The benchmark system uses an **on-demand approach** triggered via PR comments. There are no automatic benchmark runs - all comparisons are triggered manually by the repository owner using the `/bench` command. + +## The `/bench` Command + +### Command Syntax + +```bash +/bench [iterations] [sizes] +``` + +**Parameters:** +- `ref1` (required): Baseline git reference (commit, branch, or tag) +- `ref2` (required): Current git reference to compare against baseline +- `iterations` (optional): Number of benchmark iterations (default: 100) +- `sizes` (optional): Comma-separated input sizes (default: 1000,5000,10000) + +### Examples + +```bash +# Basic comparison with defaults (100 iterations, sizes: 1000,5000,10000) +/bench main v0.13.0 + +# Compare two commits +/bench abc123 def456 + +# Custom iterations +/bench main HEAD 200 + +# Custom iterations and sizes +/bench v0.12.0 v0.13.0 100 1000,5000,10000,50000 + +# Compare feature branch vs main +/bench feature-branch main +``` + +### Security + +- ⚠️ **Owner-only**: Only the repository owner can trigger benchmarks +- ✅ **PR-only**: Works only on pull request comments (not regular issues) +- ✅ **Safe**: No arbitrary code execution - only validated git refs + +### Workflow + +1. **Post command** in a PR comment: `/bench main HEAD` +2. **Bot acknowledges** with 👀 reaction and status message +3. **Validation** checks: + - User is repository owner + - Both refs exist + - Benchmark tool exists in both refs + - Parameters are valid +4. **Benchmarks run** on both refs +5. **Results posted** as PR comment with detailed comparison +6. **Success reaction** 🚀 (or 😕 on failure) +7. **Artifacts uploaded** for 30 days + +### Error Handling + +The workflow handles several error cases gracefully: + +**Missing benchmark tool:** +``` +❌ Benchmark comparison failed + +Reason: The benchmark tool (bench_throughput) does not exist in ref: v0.10.0 + +Solution: The benchmark tool was added in commit d264124. +Please use refs that include this commit or later. + +Example: /bench main HEAD (if both include the benchmark tool) +``` + +**Invalid parameters:** +``` +❌ Invalid format. Usage: /bench [iterations] [sizes] +``` + +**Build failures:** +The workflow will report build errors with logs attached as artifacts. ## Files @@ -25,7 +97,7 @@ python3 scripts/compare_benchmarks.py baseline.json current.json > report.md **Features:** - Detects performance regressions (>5% slower) - Highlights improvements (>5% faster) -- Compares avg/path latency, p99, and throughput +- Compares avg/path latency, p95, p99, and throughput - Color-coded indicators: - 🟢 Significant improvement (>5% faster) - ✅ Improvement (2-5% faster) @@ -36,33 +108,30 @@ python3 scripts/compare_benchmarks.py baseline.json current.json > report.md ## GitHub Actions Workflow -The benchmark workflow (`.github/workflows/benchmark.yml`) runs automatically on: -- Pushes to `main` branch -- Pull requests +### Benchmark Command (`.github/workflows/bench-command.yml`) -### Workflow Steps +The single workflow that handles all benchmark comparisons. -1. **Build** - Compiles the `bench_throughput` tool in release mode -2. **Run Benchmarks** - Executes benchmarks with multiple input sizes (100, 1K, 10K paths) -3. **Download Baseline** - Fetches the last benchmark from `main` branch -4. **Compare** - Runs the comparison script -5. **Comment on PR** - Posts results as a comment on pull requests -6. **Upload Artifacts** - Stores results for historical tracking -7. **Update Baseline** - Saves results as new baseline (main branch only) -8. **Check Regressions** - Warns if significant regressions detected +**Triggers:** +- PR comments starting with `/bench` +- Owner-only access control -### Artifacts +**What it does:** +1. **Validates** user permissions and parameters +2. **Checks** both refs for benchmark tool existence +3. **Builds** the benchmark tool for each ref +4. **Runs** benchmarks with specified parameters +5. **Compares** results using `compare_benchmarks.py` +6. **Posts** detailed report to PR +7. **Uploads** artifacts (results + build logs) -The workflow stores three artifacts: +**Artifacts:** -1. **benchmark-current** - Current run results (JSON, text, comparison) - - Retained for 30 days - - Available for download from workflow runs - -2. **benchmark-baseline** - Baseline for comparison - - Updated only on `main` branch pushes - - Retained for 90 days - - Used for comparing future PRs +- **benchmark-comparison-** + - Both benchmark JSON files + - Comparison markdown report + - Build logs for debugging + - Retained for 30 days ## Running Benchmarks Locally @@ -71,8 +140,8 @@ The workflow stores three artifacts: cargo build --release --bin bench_throughput ./target/release/bench_throughput \ - --sizes 100,1000,10000 \ - --iterations 50 \ + --sizes 1000,5000,10000 \ + --iterations 100 \ --format json \ --output my_benchmark.json ``` @@ -91,18 +160,18 @@ cat comparison.md ### Benchmark Parameters -Default parameters in the CI workflow: -- **Input sizes:** 100, 1,000, 10,000 paths -- **Iterations:** 50 (per size) -- **Output format:** JSON + human-readable text +Default parameters: +- **Input sizes:** 1,000, 5,000, 10,000 paths +- **Iterations:** 100 (per size) +- **Output format:** JSON -To change these, edit `.github/workflows/benchmark.yml`: -```yaml -./target/release/bench_throughput \ - --sizes 100,1000,10000,100000 \ # Add more sizes - --iterations 100 \ # More iterations = more stable results - --format json \ - --output benchmark_results.json +These can be overridden per-command: +```bash +# Use different sizes for larger datasets +/bench main HEAD 100 10000,50000,100000 + +# More iterations for stable results +/bench v0.12.0 v0.13.0 500 1000,5000,10000 ``` ### Regression Thresholds @@ -130,24 +199,42 @@ def calculate_change(baseline: float, current: float): ... ``` -### Failing on Regressions +## Use Cases -By default, the workflow **warns** about regressions but doesn't fail the build. +### 1. Compare Feature Branch vs Main +```bash +/bench main feature-optimize-parsing +``` +Use this to see if your optimization actually improves performance. -To fail on regressions, uncomment this line in `.github/workflows/benchmark.yml`: -```yaml -- name: Fail if significant performance regression - run: | - if grep -q "⚠️ PERFORMANCE REGRESSION" comparison.md; then - echo "::warning::Performance regression detected." - exit 1 # Uncomment this line - fi +### 2. Validate Release Performance +```bash +/bench v0.12.0 v0.13.0 ``` +Compare performance between releases to ensure no regressions. + +### 3. Debug Performance Issues +```bash +/bench abc123 def456 +``` +Bisect between two commits to find which one introduced a regression. + +### 4. Stress Test with Large Datasets +```bash +/bench main HEAD 100 10000,50000,100000,500000 +``` +Test how your code scales with larger input sizes. + +### 5. High-Precision Comparison +```bash +/bench main feature-branch 1000 1000,5000,10000 +``` +Use more iterations for more stable and reliable results. ## Troubleshooting -### No baseline found -On the first run, there's no baseline for comparison. The first successful run on `main` will establish the baseline. +### No benchmark tool found +The benchmark tool (`bench_throughput`) was added in commit `d264124`. If you're comparing older commits, you'll get an error. Solution: Only compare refs that include the benchmark tool. ### Benchmark variance Benchmarks can vary due to: @@ -156,30 +243,42 @@ Benchmarks can vary due to: - Network conditions The 2% noise threshold accounts for normal variance. For more stable results: -1. Increase iteration count -2. Run benchmarks multiple times -3. Use larger input sizes (less affected by noise) +1. Increase iteration count: `/bench main HEAD 500` +2. Use larger input sizes (less affected by noise) +3. Run benchmarks multiple times and compare ### Permission errors -The workflow needs these permissions (already configured): -```yaml -permissions: - contents: write - pull-requests: write -``` +Only the repository owner can trigger benchmarks. Other users will receive a permission denied message. + +### Build failures +If the code doesn't compile at one of the refs, the workflow will fail. Check the workflow run logs for build errors. Artifacts include `build_ref1.log` and `build_ref2.log` for debugging. ## Example Report +When you run `/bench main HEAD`, you'll get a report like this: + ```markdown +## 🔬 Benchmark Comparison Report + +**Requested by:** @username + +**Comparison:** +- **Baseline**: `main` (abc123) +- **Current**: `HEAD` (def456) + +**Parameters:** +- **Iterations**: 100 +- **Sizes**: 1000,5000,10000 + +--- + # 📊 Benchmark Comparison Report **Input Size:** 10,000 paths -**Baseline Timestamp:** 1699123456 -**Current Timestamp:** 1699123789 ## Performance Comparison -| Template | Avg/Path | Change | p99 | Change | Throughput | Change | +| Template | Avg/Path | Change | p95 | Change | Throughput | Change | |----------|----------|--------|-----|--------|------------|--------| | Strip ANSI | 304ns | ✅ -3.2% | 327ns | ➖ -1.1% | 3.29M/s | ✅ +3.3% | | Split all | 519ns | 🔴 +12.5% | 838ns | ⚠️ +8.2% | 1.93M/s | 🔴 -11.1% | @@ -194,6 +293,14 @@ permissions: ### ⚠️ PERFORMANCE REGRESSIONS - **Split all**: +12.5% slower + +### ✨ Performance Improvements + +- **Strip ANSI**: 3.2% faster + +--- + +Triggered by [/bench command](https://github.com/...) ``` ## Further Reading