diff --git a/.github/workflows/bench-command.yml b/.github/workflows/bench-command.yml
new file mode 100644
index 0000000..6756703
--- /dev/null
+++ b/.github/workflows/bench-command.yml
@@ -0,0 +1,592 @@
+name: Benchmark Command
+# Trigger on-demand benchmarks via PR comments
+# Usage: /bench <ref1> <ref2> [iterations] [sizes]
+# Examples:
+#   /bench main v0.13.0
+#   /bench abc123 def456 100 1000,5000,10000
+# Only repository owner can trigger this command
+
+on:
+  issue_comment:
+    types: [created]
+
+# Prevent concurrent benchmark runs on the same PR
+concurrency:
+  group: bench-${{ github.event.issue.number }}
+  cancel-in-progress: true
+
+jobs:
+  check-permission:
+    name: Check Command Permission
+    # Only run on PR comments (not regular issues)
+    if: |
+      github.event.issue.pull_request &&
+      startsWith(github.event.comment.body, '/bench ')
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    outputs:
+      authorized: ${{ steps.check.outputs.authorized }}
+      ref1: ${{ steps.parse.outputs.ref1 }}
+      ref2: ${{ steps.parse.outputs.ref2 }}
+      iterations: ${{ steps.parse.outputs.iterations }}
+      sizes: ${{ steps.parse.outputs.sizes }}
+    steps:
+      - name: Check if commenter is repo owner
+        id: check
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const commenter = context.payload.comment.user.login;
+            const owner = context.payload.repository.owner.login;
+            const isOwner = commenter === owner;
+
+            console.log(`Commenter: ${commenter}`);
+            console.log(`Repository owner: ${owner}`);
+            console.log(`Is owner: ${isOwner}`);
+
+            if (!isOwner) {
+              await github.rest.reactions.createForIssueComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: context.payload.comment.id,
+                content: '-1'
+              });
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: `❌ **Permission denied**: Only @${owner} can trigger benchmark comparisons.`
+              });
+            } else {
+              await github.rest.reactions.createForIssueComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: context.payload.comment.id,
+                content: 'eyes'
+              });
+            }
+
+            core.setOutput('authorized', isOwner);
+
+      - name: Parse benchmark command
+        id: parse
+        if: steps.check.outputs.authorized == 'true'
+        continue-on-error: true
+        run: |
+          set -euo pipefail
+          COMMENT="${{ github.event.comment.body }}"
+
+          # Parse command: /bench ref1 ref2 [iterations] [sizes]
+          # Remove /bench prefix and extract parameters
+          PARAMS=$(echo "$COMMENT" | sed 's|^/bench[[:space:]]*||')
+
+          # Extract parameters
+          REF1=$(echo "$PARAMS" | awk '{print $1}')
+          REF2=$(echo "$PARAMS" | awk '{print $2}')
+          ITERATIONS=$(echo "$PARAMS" | awk '{print $3}')
+          SIZES=$(echo "$PARAMS" | awk '{print $4}')
+
+          # Validate required parameters
+          if [ -z "$REF1" ] || [ -z "$REF2" ]; then
+            echo "error=Invalid format. Missing required parameters." >> $GITHUB_OUTPUT
+            echo "parse_failed=true" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Set defaults for optional parameters
+          if [ -z "$ITERATIONS" ] || ! [[ "$ITERATIONS" =~ ^[0-9]+$ ]]; then
+            ITERATIONS=100
+          fi
+
+          if [ -z "$SIZES" ]; then
+            SIZES="1000,5000,10000"
+          fi
+
+          # Validate sizes format (comma-separated numbers)
+          if ! echo "$SIZES" | grep -qE '^[0-9]+(,[0-9]+)*$'; then
+            echo "error=Invalid sizes format: $SIZES" >> $GITHUB_OUTPUT
+            echo "parse_failed=true" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          echo "ref1=$REF1" >> $GITHUB_OUTPUT
+          echo "ref2=$REF2" >> $GITHUB_OUTPUT
+          echo "iterations=$ITERATIONS" >> $GITHUB_OUTPUT
+          echo "sizes=$SIZES" >> $GITHUB_OUTPUT
+          echo "parse_failed=false" >> $GITHUB_OUTPUT
+
+          echo "Parsed parameters:"
+          echo "  ref1: $REF1"
+          echo "  ref2: $REF2"
+          echo "  iterations: $ITERATIONS"
+          echo "  sizes: $SIZES"
+
+      - name: Post parse error
+        if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'failure'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'confused'
+            });
+
+            const errorMessage = [
+              '❌ **Invalid command format**',
+              '',
+              '**Usage:** `/bench <ref1> <ref2> [iterations] [sizes]`',
+              '',
+              '**Examples:**',
+              '```',
+              '/bench main v0.13.0',
+              '/bench abc123 def456 100 1000,5000,10000',
+              '```',
+              '',
+              '**Parameters:**',
+              '- `ref1` (required): Baseline git reference',
+              '- `ref2` (required): Current git reference',
+              '- `iterations` (optional): Number of iterations (default: 100)',
+              '- `sizes` (optional): Comma-separated sizes (default: 1000,5000,10000)'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: errorMessage
+            });
+
+      - name: Post acknowledgment
+        if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'success'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ steps.parse.outputs.ref1 }}';
+            const ref2 = '${{ steps.parse.outputs.ref2 }}';
+            const iterations = '${{ steps.parse.outputs.iterations }}';
+            const sizes = '${{ steps.parse.outputs.sizes }}';
+
+            const message = [
+              '🚀 **Benchmark comparison started**',
+              '',
+              '**Comparing:**',
+              `- **Baseline**: \`${ref1}\``,
+              `- **Current**: \`${ref2}\``,
+              '',
+              '**Parameters:**',
+              `- **Iterations**: ${iterations}`,
+              `- **Sizes**: ${sizes}`,
+              '',
+              'Results will be posted here when complete...'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+  run-benchmarks:
+    name: Run Benchmark Comparison
+    needs: check-permission
+    if: needs.check-permission.outputs.authorized == 'true' && needs.check-permission.outputs.ref1 != ''
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Need full history to access all refs
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache Rust dependencies
+        uses: Swatinem/rust-cache@v2
+
+      - name: Fetch refs from remote
+        run: |
+          set -euo pipefail
+          REF1="${{ needs.check-permission.outputs.ref1 }}"
+          REF2="${{ needs.check-permission.outputs.ref2 }}"
+
+          echo "Fetching ref1: $REF1"
+          git fetch origin "$REF1" || git fetch origin "refs/tags/$REF1" || git fetch origin "refs/heads/$REF1" || true
+
+          echo "Fetching ref2: $REF2"
+          git fetch origin "$REF2" || git fetch origin "refs/tags/$REF2" || git fetch origin "refs/heads/$REF2" || true
+
+          # Update remote refs
+          git fetch origin --tags
+
+      - name: Validate and order refs
+        id: validate
+        run: |
+          set -euo pipefail
+          REF1="${{ needs.check-permission.outputs.ref1 }}"
+          REF2="${{ needs.check-permission.outputs.ref2 }}"
+
+          # Validate both refs exist
+          if ! git rev-parse --verify "$REF1" >/dev/null 2>&1; then
+            echo "error=Ref '$REF1' not found" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          if ! git rev-parse --verify "$REF2" >/dev/null 2>&1; then
+            echo "error=Ref '$REF2' not found" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Resolve to full SHAs
+          SHA1=$(git rev-parse "$REF1")
+          SHA2=$(git rev-parse "$REF2")
+
+          # Check if both refs resolve to the same commit
+          if [ "$SHA1" = "$SHA2" ]; then
+            echo "same_commit=true" >> $GITHUB_OUTPUT
+            echo "ref1_sha=$(git rev-parse --short $REF1)" >> $GITHUB_OUTPUT
+            echo "ref2_sha=$(git rev-parse --short $REF2)" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          echo "same_commit=false" >> $GITHUB_OUTPUT
+
+          # Determine which is older (baseline) and newer (current)
+          # Get commit timestamps
+          TIMESTAMP1=$(git log -1 --format=%ct "$SHA1")
+          TIMESTAMP2=$(git log -1 --format=%ct "$SHA2")
+
+          if [ "$TIMESTAMP1" -le "$TIMESTAMP2" ]; then
+            # REF1 is older or same age -> baseline
+            BASELINE_REF="$REF1"
+            BASELINE_SHA="$SHA1"
+            CURRENT_REF="$REF2"
+            CURRENT_SHA="$SHA2"
+          else
+            # REF2 is older -> baseline
+            BASELINE_REF="$REF2"
+            BASELINE_SHA="$SHA2"
+            CURRENT_REF="$REF1"
+            CURRENT_SHA="$SHA1"
+          fi
+
+          # Output the determined ordering
+          echo "baseline_ref=$BASELINE_REF" >> $GITHUB_OUTPUT
+          echo "baseline_sha=$(git rev-parse --short $BASELINE_SHA)" >> $GITHUB_OUTPUT
+          echo "current_ref=$CURRENT_REF" >> $GITHUB_OUTPUT
+          echo "current_sha=$(git rev-parse --short $CURRENT_SHA)" >> $GITHUB_OUTPUT
+
+          # Keep original refs for display
+          echo "ref1_sha=$(git rev-parse --short $REF1)" >> $GITHUB_OUTPUT
+          echo "ref2_sha=$(git rev-parse --short $REF2)" >> $GITHUB_OUTPUT
+
+          echo "Determined ordering:"
+          echo "  Baseline (older): $BASELINE_REF ($BASELINE_SHA)"
+          echo "  Current (newer):  $CURRENT_REF ($CURRENT_SHA)"
+
+      - name: Handle same commit case
+        if: steps.validate.outputs.same_commit == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ needs.check-permission.outputs.ref1 }}';
+            const ref2 = '${{ needs.check-permission.outputs.ref2 }}';
+            const sha = '${{ steps.validate.outputs.ref1_sha }}';
+
+            const message = [
+              '⚠️ **Same commit detected**',
+              '',
+              `Both \`${ref1}\` and \`${ref2}\` resolve to the same commit: \`${sha}\``,
+              '',
+              'No benchmark comparison needed - the refs are identical.',
+              '',
+              '**Tip:** To compare different versions, use refs that point to different commits.'
+            ].join('\n');
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'eyes'
+            });
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+      - name: Check benchmark tool exists in baseline
+        if: steps.validate.outputs.same_commit == 'false'
+        id: check_baseline_tool
+        run: |
+          set -euo pipefail
+          BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}"
+          echo "Checking out $BASELINE_REF..."
+          git checkout "$BASELINE_REF"
+
+          # Check if bench_throughput binary is defined in Cargo.toml
+          if ! grep -q 'name = "bench_throughput"' Cargo.toml 2>/dev/null; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool 'bench_throughput' not found in $BASELINE_REF"
+            exit 0
+          fi
+
+          # Check if the source file exists
+          if ! grep -A 2 'name = "bench_throughput"' Cargo.toml | grep -q 'path.*='; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool source file not found in $BASELINE_REF"
+            exit 0
+          fi
+
+          echo "exists=true" >> $GITHUB_OUTPUT
+          echo "✓ Benchmark tool found in $BASELINE_REF"
+
+      - name: Check benchmark tool exists in current
+        if: steps.validate.outputs.same_commit == 'false'
+        id: check_current_tool
+        run: |
+          set -euo pipefail
+          CURRENT_REF="${{ steps.validate.outputs.current_ref }}"
+          echo "Checking out $CURRENT_REF..."
+          git checkout "$CURRENT_REF"
+
+          # Check if bench_throughput binary is defined in Cargo.toml
+          if ! grep -q 'name = "bench_throughput"' Cargo.toml 2>/dev/null; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool 'bench_throughput' not found in $CURRENT_REF"
+            exit 0
+          fi
+
+          # Check if the source file exists
+          if ! grep -A 2 'name = "bench_throughput"' Cargo.toml | grep -q 'path.*='; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool source file not found in $CURRENT_REF"
+            exit 0
+          fi
+
+          echo "exists=true" >> $GITHUB_OUTPUT
+          echo "✓ Benchmark tool found in $CURRENT_REF"
+
+      - name: Post missing tool error
+        if: steps.validate.outputs.same_commit == 'false' && (steps.check_baseline_tool.outputs.exists == 'false' || steps.check_current_tool.outputs.exists == 'false')
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const baseline_ref = '${{ steps.validate.outputs.baseline_ref }}';
+            const current_ref = '${{ steps.validate.outputs.current_ref }}';
+            const baseline_exists = '${{ steps.check_baseline_tool.outputs.exists }}' === 'true';
+            const current_exists = '${{ steps.check_current_tool.outputs.exists }}' === 'true';
+
+            let message = '❌ **Benchmark comparison failed**\n\n';
+            message += '**Reason**: The benchmark tool (`bench_throughput`) does not exist in ';
+
+            if (!baseline_exists && !current_exists) {
+              message += `both refs:\n- \`${baseline_ref}\` (baseline/older)\n- \`${current_ref}\` (current/newer)`;
+            } else if (!baseline_exists) {
+              message += `baseline ref: \`${baseline_ref}\` (older commit)`;
+            } else {
+              message += `current ref: \`${current_ref}\` (newer commit)`;
+            }
+
+            message += '\n\n**Solution**: The benchmark tool was added in commit `d264124`. Please use refs that include this commit or later.';
+            message += '\n\n**Example**: `/bench main HEAD` (if both include the benchmark tool)';
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'confused'
+            });
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+      - name: Benchmark baseline (older commit)
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}"
+          ITERATIONS="${{ needs.check-permission.outputs.iterations }}"
+          SIZES="${{ needs.check-permission.outputs.sizes }}"
+
+          echo "Checking out baseline: $BASELINE_REF..."
+          git checkout "$BASELINE_REF"
+
+          echo "Building benchmark tool..."
+          if ! cargo build --release --bin bench_throughput 2>&1 | tee build_baseline.log; then
+            echo "❌ Failed to build benchmark tool for $BASELINE_REF"
+            exit 1
+          fi
+
+          echo "Running benchmarks on baseline..."
+          ./target/release/bench_throughput \
+            --sizes "$SIZES" \
+            --iterations "$ITERATIONS" \
+            --format json \
+            --output benchmark_baseline.json
+
+      - name: Benchmark current (newer commit)
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          CURRENT_REF="${{ steps.validate.outputs.current_ref }}"
+          ITERATIONS="${{ needs.check-permission.outputs.iterations }}"
+          SIZES="${{ needs.check-permission.outputs.sizes }}"
+
+          echo "Checking out current: $CURRENT_REF..."
+          git checkout "$CURRENT_REF"
+
+          # Rebuild in case dependencies changed
+          echo "Building benchmark tool..."
+          if ! cargo build --release --bin bench_throughput 2>&1 | tee build_current.log; then
+            echo "❌ Failed to build benchmark tool for $CURRENT_REF"
+            exit 1
+          fi
+
+          echo "Running benchmarks on current..."
+          ./target/release/bench_throughput \
+            --sizes "$SIZES" \
+            --iterations "$ITERATIONS" \
+            --format json \
+            --output benchmark_current.json
+
+      - name: Compare results
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          # Use the comparison script from current (newer) commit
+          if [ -f scripts/compare_benchmarks.py ]; then
+            python3 scripts/compare_benchmarks.py \
+              benchmark_baseline.json \
+              benchmark_current.json > comparison.md
+          else
+            echo "❌ Comparison script not found"
+            exit 1
+          fi
+
+      - name: Post results to PR
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const comparison = fs.readFileSync('comparison.md', 'utf8');
+            const baseline_ref = '${{ steps.validate.outputs.baseline_ref }}';
+            const current_ref = '${{ steps.validate.outputs.current_ref }}';
+            const baseline_sha = '${{ steps.validate.outputs.baseline_sha }}';
+            const current_sha = '${{ steps.validate.outputs.current_sha }}';
+            const iterations = '${{ needs.check-permission.outputs.iterations }}';
+            const sizes = '${{ needs.check-permission.outputs.sizes }}';
+
+            const body = [
+              '## 🔬 Benchmark Comparison Report',
+              '',
+              '**Requested by:** @${{ github.event.comment.user.login }}',
+              '',
+              '**Comparison:**',
+              `- **Baseline** (older): \`${baseline_ref}\` (${baseline_sha})`,
+              `- **Current** (newer): \`${current_ref}\` (${current_sha})`,
+              '',
+              '**Parameters:**',
+              `- **Iterations**: ${iterations}`,
+              `- **Sizes**: ${sizes}`,
+              '',
+              '---',
+              '',
+              comparison,
+              '',
+              '---',
+              '',
+              '<sub>Triggered by [/bench command](${{ github.event.comment.html_url }})</sub>'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: body
+            });
+
+      - name: Upload benchmark artifacts
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-comparison-${{ github.event.comment.id }}
+          path: |
+            benchmark_baseline.json
+            benchmark_current.json
+            comparison.md
+            build_baseline.log
+            build_current.log
+          retention-days: 30
+
+      - name: Add success reaction
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'rocket'
+            });
+
+  handle-error:
+    name: Handle Errors
+    needs: [check-permission, run-benchmarks]
+    if: failure() && needs.check-permission.outputs.authorized == 'true'
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+    steps:
+      - name: Post error message
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ needs.check-permission.outputs.ref1 }}';
+            const ref2 = '${{ needs.check-permission.outputs.ref2 }}';
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'confused'
+            });
+
+            const errorBody = [
+              '❌ **Benchmark comparison failed**',
+              '',
+              `Failed to compare \`${ref1}\` and \`${ref2}\`.`,
+              '',
+              '**Please check:**',
+              '- Both refs exist and are valid git references (branches, tags, or commits)',
+              '- The benchmark tool exists in both refs',
+              '- The code at those refs compiles successfully',
+              '- Parameters are in correct format: `/bench <ref1> <ref2> [iterations] [sizes]`',
+              '',
+              '**See the [workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.**'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: errorBody
+            });
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
deleted file mode 100644
index 7f937ae..0000000
--- a/.github/workflows/benchmark.yml
+++ /dev/null
@@ -1,96 +0,0 @@
-name: Performance Benchmarks
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-
-jobs:
-  benchmark:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      pull-requests: write
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
-
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-
-      - name: Build benchmark tool
-        run: cargo build --release --bin bench_throughput
-
-      - name: Run benchmarks
-        run: |
-          # Run benchmarks with multiple sizes and save to JSON
-          ./target/release/bench_throughput \
-            --sizes 1000,5000,10000 \
-            --iterations 100 \
-            --format json \
-            --output benchmark_results.json
-
-      - name: Download baseline benchmark
-        id: download-baseline
-        continue-on-error: true
-        uses: dawidd6/action-download-artifact@v3
-        with:
-          workflow: benchmark.yml
-          branch: main
-          name: benchmark-baseline
-          path: baseline
-          if_no_artifact_found: warn
-
-      - name: Compare with baseline
-        id: compare
-        run: |
-          if [ -f baseline/benchmark_results.json ]; then
-            echo "Baseline found, comparing results..."
-            python3 scripts/compare_benchmarks.py \
-              baseline/benchmark_results.json \
-              benchmark_results.json > comparison.md
-            echo "comparison_available=true" >> $GITHUB_OUTPUT
-          else
-            echo "No baseline found, this will become the new baseline"
-            echo "comparison_available=false" >> $GITHUB_OUTPUT
-            echo "## Benchmark Results\n\nNo baseline available for comparison. These results will be used as the baseline for future comparisons." > comparison.md
-          fi
-
-      - name: Comment PR with results
-        if: github.event_name == 'pull_request' && steps.compare.outputs.comparison_available == 'true'
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const comparison = fs.readFileSync('comparison.md', 'utf8');
-
-            github.rest.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: comparison
-            });
-
-      - name: Upload current results as artifact
-        uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-current
-          path: |
-            benchmark_results.json
-            comparison.md
-
-      - name: Fail if significant performance regression
-        if: steps.compare.outputs.comparison_available == 'true'
-        run: |
-          if grep -q "⚠️ PERFORMANCE REGRESSION" comparison.md; then
-            echo "::warning::Performance regression detected. Review comparison.md for details."
-            # Uncomment the next line to fail the build on regression
-            # exit 1
-          fi
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
index 570e3ca..943063c 100644
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -217,9 +217,8 @@ jobs:
            zsh xz-utils liblz4-tool musl-tools brotli zstd
 
       - name: Install Rust
-        uses: dtolnay/rust-toolchain@master
+        uses: dtolnay/rust-toolchain@stable
         with:
-          toolchain: nightly
           target: ${{ matrix.target }}
 
       # for some reason, the above action doesn't seem to set the target correctly
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c0ce61..67746bc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@nightly
+        uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
       - name: Run tests
         run: cargo test --locked --all-features --workspace -- --nocapture
@@ -28,7 +28,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@nightly
+        uses: dtolnay/rust-toolchain@stable
         with:
           components: rustfmt
       - uses: Swatinem/rust-cache@v2
diff --git a/.github/workflows/update-baseline.yml b/.github/workflows/update-baseline.yml
deleted file mode 100644
index 46c1a24..0000000
--- a/.github/workflows/update-baseline.yml
+++ /dev/null
@@ -1,76 +0,0 @@
-name: Update Benchmark Baseline
-
-on:
-  workflow_dispatch:
-    inputs:
-      ref:
-        description: 'Git ref (tag, branch, or commit SHA) to benchmark'
-        required: true
-        default: 'main'
-        type: string
-      iterations:
-        description: 'Number of benchmark iterations'
-        required: false
-        default: '100'
-        type: string
-
-jobs:
-  update-baseline:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-
-    steps:
-      - name: Checkout repository at specified ref
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.ref }}
-          fetch-depth: 0
-
-      - name: Get commit info
-        id: commit-info
-        run: |
-          echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
-          echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
-          echo "date=$(git log -1 --format=%ci)" >> $GITHUB_OUTPUT
-          echo "message=$(git log -1 --format=%s)" >> $GITHUB_OUTPUT
-
-      - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
-
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-
-      - name: Build benchmark tool
-        run: cargo build --release --bin bench_throughput
-
-      - name: Run benchmarks
-        run: |
-          echo "Running benchmarks for commit ${{ steps.commit-info.outputs.short_sha }}"
-          ./target/release/bench_throughput \
-            --sizes 1000,5000,10000 \
-            --iterations ${{ inputs.iterations }} \
-            --format json \
-            --output benchmark_results.json
-
-      - name: Add workflow summary
-        run: |
-          echo "## Baseline Update Summary" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "**Reference:** \`${{ inputs.ref }}\`" >> $GITHUB_STEP_SUMMARY
-          echo "**Commit:** ${{ steps.commit-info.outputs.sha }}" >> $GITHUB_STEP_SUMMARY
-          echo "**Date:** ${{ steps.commit-info.outputs.date }}" >> $GITHUB_STEP_SUMMARY
-          echo "**Message:** ${{ steps.commit-info.outputs.message }}" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "✅ Baseline has been updated successfully." >> $GITHUB_STEP_SUMMARY
-
-      - name: Upload new baseline
-        uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-baseline
-          path: benchmark_results.json
-          retention-days: 90
-
-      - name: Baseline updated successfully
-        run: |
-          echo "::notice::Baseline successfully updated to commit ${{ steps.commit-info.outputs.short_sha }}"
diff --git a/scripts/README.md b/scripts/README.md
index 4061422..fb338ba 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -4,12 +4,84 @@ This directory contains scripts used by the GitHub Actions CI/CD pipeline to tra
 
 ## Overview
 
-The benchmark CI/CD system automatically:
-1. Runs performance benchmarks on every push to `main` and on pull requests
-2. Compares results against the baseline (last `main` branch results)
-3. Generates a detailed comparison report
-4. Comments on PRs with performance changes
-5. Warns about significant performance regressions
+The benchmark system uses an **on-demand approach** triggered via PR comments. There are no automatic benchmark runs - all comparisons are triggered manually by the repository owner using the `/bench` command.
+
+## The `/bench` Command
+
+### Command Syntax
+
+```bash
+/bench <ref1> <ref2> [iterations] [sizes]
+```
+
+**Parameters:**
+- `ref1` (required): Baseline git reference (commit, branch, or tag)
+- `ref2` (required): Current git reference to compare against baseline
+- `iterations` (optional): Number of benchmark iterations (default: 100)
+- `sizes` (optional): Comma-separated input sizes (default: 1000,5000,10000)
+
+### Examples
+
+```bash
+# Basic comparison with defaults (100 iterations, sizes: 1000,5000,10000)
+/bench main v0.13.0
+
+# Compare two commits
+/bench abc123 def456
+
+# Custom iterations
+/bench main HEAD 200
+
+# Custom iterations and sizes
+/bench v0.12.0 v0.13.0 100 1000,5000,10000,50000
+
+# Compare feature branch vs main
+/bench feature-branch main
+```
+
+### Security
+
+- ⚠️ **Owner-only**: Only the repository owner can trigger benchmarks
+- ✅ **PR-only**: Works only on pull request comments (not regular issues)
+- ✅ **Safe**: No arbitrary code execution - only validated git refs
+
+### Workflow
+
+1. **Post command** in a PR comment: `/bench main HEAD`
+2. **Bot acknowledges** with 👀 reaction and status message
+3. **Validation** checks:
+   - User is repository owner
+   - Both refs exist
+   - Benchmark tool exists in both refs
+   - Parameters are valid
+4. **Benchmarks run** on both refs
+5. **Results posted** as PR comment with detailed comparison
+6. **Success reaction** 🚀 (or 😕 on failure)
+7. **Artifacts uploaded** for 30 days
+
+### Error Handling
+
+The workflow handles several error cases gracefully:
+
+**Missing benchmark tool:**
+```
+❌ Benchmark comparison failed
+
+Reason: The benchmark tool (bench_throughput) does not exist in ref: v0.10.0
+
+Solution: The benchmark tool was added in commit d264124.
+Please use refs that include this commit or later.
+
+Example: /bench main HEAD (if both include the benchmark tool)
+```
+
+**Invalid parameters:**
+```
+❌ Invalid format. Usage: /bench <ref1> <ref2> [iterations] [sizes]
+```
+
+**Build failures:**
+The workflow will report build errors with logs attached as artifacts.
 
 ## Files
 
@@ -25,7 +97,7 @@ python3 scripts/compare_benchmarks.py baseline.json current.json > report.md
 **Features:**
 - Detects performance regressions (>5% slower)
 - Highlights improvements (>5% faster)
-- Compares avg/path latency, p99, and throughput
+- Compares avg/path latency, p95, p99, and throughput
 - Color-coded indicators:
   - 🟢 Significant improvement (>5% faster)
   - ✅ Improvement (2-5% faster)
@@ -36,33 +108,30 @@ python3 scripts/compare_benchmarks.py baseline.json current.json > report.md
 
 ## GitHub Actions Workflow
 
-The benchmark workflow (`.github/workflows/benchmark.yml`) runs automatically on:
-- Pushes to `main` branch
-- Pull requests
+### Benchmark Command (`.github/workflows/bench-command.yml`)
 
-### Workflow Steps
+The single workflow that handles all benchmark comparisons.
 
-1. **Build** - Compiles the `bench_throughput` tool in release mode
-2. **Run Benchmarks** - Executes benchmarks with multiple input sizes (100, 1K, 10K paths)
-3. **Download Baseline** - Fetches the last benchmark from `main` branch
-4. **Compare** - Runs the comparison script
-5. **Comment on PR** - Posts results as a comment on pull requests
-6. **Upload Artifacts** - Stores results for historical tracking
-7. **Update Baseline** - Saves results as new baseline (main branch only)
-8. **Check Regressions** - Warns if significant regressions detected
+**Triggers:**
+- PR comments starting with `/bench`
+- Owner-only access control
 
-### Artifacts
+**What it does:**
+1. **Validates** user permissions and parameters
+2. **Checks** both refs for benchmark tool existence
+3. **Builds** the benchmark tool for each ref
+4. **Runs** benchmarks with specified parameters
+5. **Compares** results using `compare_benchmarks.py`
+6. **Posts** detailed report to PR
+7. **Uploads** artifacts (results + build logs)
 
-The workflow stores three artifacts:
+**Artifacts:**
 
-1. **benchmark-current** - Current run results (JSON, text, comparison)
-   - Retained for 30 days
-   - Available for download from workflow runs
-
-2. **benchmark-baseline** - Baseline for comparison
-   - Updated only on `main` branch pushes
-   - Retained for 90 days
-   - Used for comparing future PRs
+- **benchmark-comparison-<comment_id>**
+  - Both benchmark JSON files
+  - Comparison markdown report
+  - Build logs for debugging
+  - Retained for 30 days
 
 ## Running Benchmarks Locally
 
@@ -71,8 +140,8 @@ The workflow stores three artifacts:
 cargo build --release --bin bench_throughput
 
 ./target/release/bench_throughput \
-  --sizes 100,1000,10000 \
-  --iterations 50 \
+  --sizes 1000,5000,10000 \
+  --iterations 100 \
   --format json \
   --output my_benchmark.json
 ```
@@ -91,18 +160,18 @@ cat comparison.md
 
 ### Benchmark Parameters
 
-Default parameters in the CI workflow:
-- **Input sizes:** 100, 1,000, 10,000 paths
-- **Iterations:** 50 (per size)
-- **Output format:** JSON + human-readable text
+Default parameters:
+- **Input sizes:** 1,000, 5,000, 10,000 paths
+- **Iterations:** 100 (per size)
+- **Output format:** JSON
 
-To change these, edit `.github/workflows/benchmark.yml`:
-```yaml
-./target/release/bench_throughput \
-  --sizes 100,1000,10000,100000 \  # Add more sizes
-  --iterations 100 \                # More iterations = more stable results
-  --format json \
-  --output benchmark_results.json
+These can be overridden per-command:
+```bash
+# Use different sizes for larger datasets
+/bench main HEAD 100 10000,50000,100000
+
+# More iterations for stable results
+/bench v0.12.0 v0.13.0 500 1000,5000,10000
 ```
 
 ### Regression Thresholds
@@ -130,24 +199,42 @@ def calculate_change(baseline: float, current: float):
         ...
 ```
 
-### Failing on Regressions
+## Use Cases
 
-By default, the workflow **warns** about regressions but doesn't fail the build.
+### 1. Compare Feature Branch vs Main
+```bash
+/bench main feature-optimize-parsing
+```
+Use this to see if your optimization actually improves performance.
 
-To fail on regressions, uncomment this line in `.github/workflows/benchmark.yml`:
-```yaml
-- name: Fail if significant performance regression
-  run: |
-    if grep -q "⚠️ PERFORMANCE REGRESSION" comparison.md; then
-      echo "::warning::Performance regression detected."
-      exit 1  # Uncomment this line
-    fi
+### 2. Validate Release Performance
+```bash
+/bench v0.12.0 v0.13.0
 ```
+Compare performance between releases to ensure no regressions.
+
+### 3. Debug Performance Issues
+```bash
+/bench abc123 def456
+```
+Bisect between two commits to find which one introduced a regression.
+
+### 4. Stress Test with Large Datasets
+```bash
+/bench main HEAD 100 10000,50000,100000,500000
+```
+Test how your code scales with larger input sizes.
+
+### 5. High-Precision Comparison
+```bash
+/bench main feature-branch 1000 1000,5000,10000
+```
+Use more iterations for more stable and reliable results.
 
 ## Troubleshooting
 
-### No baseline found
-On the first run, there's no baseline for comparison. The first successful run on `main` will establish the baseline.
+### No benchmark tool found
+The benchmark tool (`bench_throughput`) was added in commit `d264124`. If you're comparing older commits, you'll get an error. Solution: Only compare refs that include the benchmark tool.
 
 ### Benchmark variance
 Benchmarks can vary due to:
@@ -156,30 +243,42 @@ Benchmarks can vary due to:
 - Network conditions
 
 The 2% noise threshold accounts for normal variance. For more stable results:
-1. Increase iteration count
-2. Run benchmarks multiple times
-3. Use larger input sizes (less affected by noise)
+1. Increase iteration count: `/bench main HEAD 500`
+2. Use larger input sizes (less affected by noise)
+3. Run benchmarks multiple times and compare
 
 ### Permission errors
-The workflow needs these permissions (already configured):
-```yaml
-permissions:
-  contents: write
-  pull-requests: write
-```
+Only the repository owner can trigger benchmarks. Other users will receive a permission denied message.
+
+### Build failures
+If the code doesn't compile at one of the refs, the workflow will fail. Check the workflow run logs for build errors. Artifacts include `build_ref1.log` and `build_ref2.log` for debugging.
 
 ## Example Report
 
+When you run `/bench main HEAD`, you'll get a report like this:
+
 ```markdown
+## 🔬 Benchmark Comparison Report
+
+**Requested by:** @username
+
+**Comparison:**
+- **Baseline**: `main` (abc123)
+- **Current**: `HEAD` (def456)
+
+**Parameters:**
+- **Iterations**: 100
+- **Sizes**: 1000,5000,10000
+
+---
+
 # 📊 Benchmark Comparison Report
 
 **Input Size:** 10,000 paths
-**Baseline Timestamp:** 1699123456
-**Current Timestamp:** 1699123789
 
 ## Performance Comparison
 
-| Template | Avg/Path | Change | p99 | Change | Throughput | Change |
+| Template | Avg/Path | Change | p95 | Change | Throughput | Change |
 |----------|----------|--------|-----|--------|------------|--------|
 | Strip ANSI | 304ns | ✅ -3.2% | 327ns | ➖ -1.1% | 3.29M/s | ✅ +3.3% |
 | Split all | 519ns | 🔴 +12.5% | 838ns | ⚠️ +8.2% | 1.93M/s | 🔴 -11.1% |
@@ -194,6 +293,14 @@ permissions:
 ### ⚠️ PERFORMANCE REGRESSIONS
 
 - **Split all**: +12.5% slower
+
+### ✨ Performance Improvements
+
+- **Strip ANSI**: 3.2% faster
+
+---
+
+<sub>Triggered by [/bench command](https://github.com/...)</sub>
 ```
 
 ## Further Reading