diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index 3385ff50a0c1..6217ba3824d3 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -2,30 +2,34 @@ name: 'C-Chain Re-Execution Benchmark' description: 'Run C-Chain re-execution benchmark' inputs: - runner_name: - description: 'The name of the runner to use and include in the Golang Benchmark name.' - required: true + task: + description: 'Task name to execute from Taskfile.yml. Leave empty to use custom inputs below.' + default: '' + # Custom inputs (alternative to task-based approach) config: description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' default: '' start-block: description: 'The start block for the benchmark.' - default: '101' + default: '' end-block: description: 'The end block for the benchmark.' - default: '250000' + default: '' block-dir-src: description: 'The source block directory. Supports S3 directory/zip and local directories.' - default: 's3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**' + default: '' current-state-dir-src: description: 'The current state directory. Supports S3 directory/zip and local directories.' - default: 's3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**' + default: '' + runner_name: + description: 'The name of the runner to use and include in the Golang Benchmark name.' + required: true aws-role: description: 'AWS role to assume for S3 access.' required: true aws-region: description: 'AWS region to use for S3 access.' - required: true + default: 'us-east-2' aws-role-duration-seconds: description: 'The duration of the AWS role to assume for S3 access.' required: true @@ -56,54 +60,126 @@ inputs: push-github-action-benchmark: description: 'Whether to push the benchmark result to GitHub.' required: true - default: false push-post-state: description: 'S3 destination to copy the current-state directory after completing re-execution. If empty, this will be skipped.' default: '' + # The following inputs need never be provided by the caller. They + # default to context values that the action's steps are unable to + # access directly. + repository-owner: + default: ${{ github.repository_owner }} + repository-name: + default: ${{ github.event.repository.name }} + workflow: + default: ${{ github.workflow }} + run-id: + default: ${{ github.run_id }} + run-number: + default: ${{ github.run_number }} + run-attempt: + default: ${{ github.run_attempt }} + job: + default: ${{ github.job }} runs: using: composite steps: - - name: Set task env + - uses: cachix/install-nix-action@02a151ada4993995686f9ed4f1be7cfbb229e56f #v31 + with: + github_access_token: ${{ inputs.github-token }} + - run: echo "dependencies installed" + shell: nix develop --command bash {0} + # Cache Go modules (architecture-independent) + - uses: actions/cache@v4 + id: go-mod-cache + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-mod-${{ hashFiles('go.sum') }} + restore-keys: ${{ runner.os }}-go-mod- + # Cache Go build cache (architecture-specific) + - uses: actions/cache@v4 + with: + path: ~/.cache/go-build + key: ${{ runner.os }}-${{ runner.arch }}-go-build-${{ hashFiles('go.sum') }} + restore-keys: ${{ runner.os }}-${{ runner.arch }}-go-build- + # Download modules only on cache miss + - run: go mod download + if: steps.go-mod-cache.outputs.cache-hit != 'true' + shell: nix develop --command bash -x {0} + - name: Notify of metrics availability + if: inputs.prometheus-username != '' shell: bash run: | - { - echo "EXECUTION_DATA_DIR=${{ inputs.workspace }}/reexecution-data" - echo "BENCHMARK_OUTPUT_FILE=output.txt" - echo "START_BLOCK=${{ inputs.start-block }}" - echo "END_BLOCK=${{ inputs.end-block }}" - echo "BLOCK_DIR_SRC=${{ inputs.block-dir-src }}" - echo "CURRENT_STATE_DIR_SRC=${{ inputs.current-state-dir-src }}" - } >> $GITHUB_ENV + metrics_url=$($GITHUB_ACTION_PATH/output-metrics-url.sh) + echo "Grafana: ${metrics_url}" + echo "🔗 [View Grafana Dashboard](${metrics_url})" >> "$GITHUB_STEP_SUMMARY" + env: + GRAFANA_URL: https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&refresh=10s&var-filter=is_ephemeral_node%7C%3D%7Cfalse&var-filter=gh_repo%7C%3D%7C${{ inputs.repository_owner }}%2F${{ inputs.repository_name }}&var-filter=gh_run_id%7C%3D%7C${{ inputs.run_id }}&var-filter=gh_run_attempt%7C%3D%7C${{ inputs.run_attempt }} + GH_JOB_ID: ${{ inputs.job }} + - name: Warn that collection of metrics and logs will not be performed + if: inputs.prometheus-username == '' + shell: bash + run: echo "::warning::Monitoring credentials not found. Skipping collector start. Is the PR from a fork branch?" - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ inputs.aws-role }} aws-region: ${{ inputs.aws-region }} role-duration-seconds: ${{ inputs.aws-role-duration-seconds }} - - name: Run C-Chain Re-Execution - uses: ./.github/actions/run-monitored-tmpnet-cmd - with: - run: | + - name: Validate inputs + shell: bash + run: | + if [[ -z "${{ inputs.task }}" ]]; then + # Granular mode - validate required inputs + missing=() + [[ -z "${{ inputs.block-dir-src }}" ]] && missing+=("block-dir-src") + [[ -z "${{ inputs.current-state-dir-src }}" ]] && missing+=("current-state-dir-src") + [[ -z "${{ inputs.start-block }}" ]] && missing+=("start-block") + [[ -z "${{ inputs.end-block }}" ]] && missing+=("end-block") + + if [[ ${#missing[@]} -gt 0 ]]; then + echo "::error::When 'task' is empty, the following inputs are required: ${missing[*]}" + exit 1 + fi + fi + - name: Set task env + shell: bash + run: | + TIMESTAMP=$(date '+%Y%m%d-%H%M%S') + echo "EXECUTION_DATA_DIR=/tmp/reexecution-data-${TIMESTAMP}" >> "$GITHUB_ENV" + echo "BENCHMARK_OUTPUT_FILE=${GITHUB_WORKSPACE}/benchmark-output.txt" >> "$GITHUB_ENV" + - name: Run C-Chain Re-execution Benchmark + shell: nix develop --impure --command bash -x {0} + run: | + if [[ -n "${{ inputs.task }}" ]]; then + # Task-based approach + ./scripts/run_task.sh ${{ inputs.task }} \ + BENCHMARK_OUTPUT_FILE="${{ env.BENCHMARK_OUTPUT_FILE }}" \ + EXECUTION_DATA_DIR="${{ env.EXECUTION_DATA_DIR }}" + else + # Granular approach ./scripts/run_task.sh reexecute-cchain-range-with-copied-data \ CONFIG=${{ inputs.config }} \ EXECUTION_DATA_DIR=${{ env.EXECUTION_DATA_DIR }} \ - BLOCK_DIR_SRC=${{ env.BLOCK_DIR_SRC }} \ - CURRENT_STATE_DIR_SRC=${{ env.CURRENT_STATE_DIR_SRC }} \ - START_BLOCK=${{ env.START_BLOCK }} \ - END_BLOCK=${{ env.END_BLOCK }} \ - LABELS=${{ env.LABELS }} \ - BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \ - RUNNER_NAME=${{ inputs.runner_name }} \ - METRICS_SERVER_ENABLED=true \ - METRICS_COLLECTOR_ENABLED=true - prometheus_url: ${{ inputs.prometheus-url }} - prometheus_push_url: ${{ inputs.prometheus-push-url }} - prometheus_username: ${{ inputs.prometheus-username }} - prometheus_password: ${{ inputs.prometheus-password }} - grafana_dashboard_id: 'Gl1I20mnk/c-chain' - runtime: "" # Set runtime input to empty string to disable log collection - + BLOCK_DIR_SRC=${{ inputs.block-dir-src }} \ + CURRENT_STATE_DIR_SRC=${{ inputs.current-state-dir-src }} \ + START_BLOCK=${{ inputs.start-block }} \ + END_BLOCK=${{ inputs.end-block }} \ + BENCHMARK_OUTPUT_FILE="${{ env.BENCHMARK_OUTPUT_FILE }}" + fi + env: + RUNNER_NAME: ${{ inputs.runner_name }} + METRICS_COLLECTOR_ENABLED: ${{ inputs.prometheus-username != '' }} + PROMETHEUS_URL: ${{ inputs.prometheus-url }} + PROMETHEUS_PUSH_URL: ${{ inputs.prometheus-push-url }} + PROMETHEUS_USERNAME: ${{ inputs.prometheus-username }} + PROMETHEUS_PASSWORD: ${{ inputs.prometheus-password }} + GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }} + GH_WORKFLOW: ${{ inputs.workflow }} + GH_RUN_ID: ${{ inputs.run_id }} + GH_RUN_NUMBER: ${{ inputs.run_number }} + GH_RUN_ATTEMPT: ${{ inputs.run_attempt }} + GH_JOB_ID: ${{ inputs.job }} - name: Compare Benchmark Results uses: benchmark-action/github-action-benchmark@v1 with: @@ -112,8 +188,10 @@ runs: summary-always: true github-token: ${{ inputs.github-token }} auto-push: ${{ inputs.push-github-action-benchmark }} - - - name: Push Post-State to S3 (if not exists) - if: ${{ inputs.push-post-state != '' }} - shell: nix develop --command bash -x {0} - run: ./scripts/run_task.sh export-dir-to-s3 SRC=${{ env.EXECUTION_DATA_DIR }}/current-state/ DST=${{ inputs.push-post-state }} + - name: Push Post-State to S3 + if: inputs.push-post-state != '' + shell: nix develop --impure --command bash -x {0} + run: | + ./scripts/run_task.sh export-dir-to-s3 \ + SRC=${{ env.EXECUTION_DATA_DIR }}/current-state/ \ + DST=${{ inputs.push-post-state }} diff --git a/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh b/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh new file mode 100755 index 000000000000..875f7d7fdb8d --- /dev/null +++ b/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# WARNING: This file is a duplication of: +# - .github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh (source of truth) +# Changes must be made to BOTH files. + +set -euo pipefail + +# Timestamps are in seconds +from_timestamp="$(date '+%s')" +monitoring_period=900 # 15 minutes +to_timestamp="$((from_timestamp + monitoring_period))" + +# Grafana expects microseconds, so pad timestamps with 3 zeros +metrics_url="${GRAFANA_URL}&var-filter=gh_job_id%7C%3D%7C${GH_JOB_ID}&from=${from_timestamp}000&to=${to_timestamp}000" + +# Optionally ensure that the link displays metrics only for the shared +# network rather than mixing it with the results for private networks. +if [[ -n "${FILTER_BY_OWNER:-}" ]]; then + metrics_url="${metrics_url}&var-filter=network_owner%7C%3D%7C${FILTER_BY_OWNER}" +fi + +echo "${metrics_url}" diff --git a/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh b/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh index ccecc34ac09c..5d2e8d59e8d1 100755 --- a/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh +++ b/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh @@ -1,5 +1,9 @@ #!/usr/bin/env bash +# WARNING: This file is duplicated at: +# - .github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh (copy) +# Changes must be made to BOTH files. + set -euo pipefail # Timestamps are in seconds diff --git a/.github/workflows/c-chain-reexecution-benchmark-container.json b/.github/workflows/c-chain-reexecution-benchmark-container.json index aa8edb0aac70..2ccfac027111 100644 --- a/.github/workflows/c-chain-reexecution-benchmark-container.json +++ b/.github/workflows/c-chain-reexecution-benchmark-container.json @@ -3,20 +3,12 @@ "include": [ { "runner": "ubuntu-latest", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", + "task": "c-chain-reexecution-hashdb-101-250k", "timeout-minutes": 30 }, { "runner": "avalanche-avalanchego-runner-2ti", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", + "task": "c-chain-reexecution-hashdb-101-250k", "timeout-minutes": 30 } ] @@ -25,20 +17,12 @@ "include": [ { "runner": "avago-runner-m6i-4xlarge-ebs-fast", - "config": "default", - "start-block": 33000001, - "end-block": 33500000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-30m-40m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/**", + "task": "c-chain-reexecution-hashdb-33m-33m500k", "timeout-minutes": 1440 }, { "runner": "avago-runner-i4i-4xlarge-local-ssd", - "config": "default", - "start-block": 33000001, - "end-block": 33500000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-30m-40m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/**", + "task": "c-chain-reexecution-hashdb-33m-33m500k", "timeout-minutes": 1440 } ] diff --git a/.github/workflows/c-chain-reexecution-benchmark-container.yml b/.github/workflows/c-chain-reexecution-benchmark-container.yml index db12a98ad703..83f0d16bd560 100644 --- a/.github/workflows/c-chain-reexecution-benchmark-container.yml +++ b/.github/workflows/c-chain-reexecution-benchmark-container.yml @@ -6,34 +6,30 @@ on: inputs: config: description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' - required: false default: '' start-block: description: 'The start block for the benchmark.' - required: false - default: 101 + default: '' end-block: description: 'The end block for the benchmark.' - required: false - default: 250000 + default: '' block-dir-src: description: 'The source block directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/** + default: '' current-state-dir-src: description: 'The current state directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/** + default: '' + task: + description: 'Taskfile task to execute (e.g., c-chain-reexecution-hashdb-101-250k)' + default: '' runner: description: 'Runner to execute the benchmark. Input to the runs-on field of the job.' - required: false - default: ubuntu-latest + required: true push-post-state: description: 'S3 location to push post-execution state directory. Skips this step if left unpopulated.' default: '' timeout-minutes: description: 'Timeout in minutes for the job.' - required: false default: 30 # Disabled because scheduled trigger is empty. To enable, uncomment and add at least one vector to the schedule @@ -55,12 +51,8 @@ jobs: if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then { echo "matrix< " - echo "Import from S3 Example: $0 's3://bucket1/path1' /dest/dir" + echo "Import from S3 URL Example: $0 's3://bucket1/path1' /dest/dir" + echo "Import from S3 object key Example: $0 'cchain-mainnet-blocks-1m-ldb' /dest/dir" echo "Export to S3 Example: $0 '/local/path1' 's3://bucket2/path2'" echo "Local Example: $0 '/local/path1' /dest/dir" exit 1 @@ -18,11 +18,18 @@ fi SRC="$1" DST="$2" +# If SRC doesn't start with s3:// or /, assume it's an S3 object key +if [[ "$SRC" != s3://* ]] && [[ "$SRC" != /* ]]; then + echo "Error: SRC must be either an S3 URL (s3://...), a local path (/...), or already expanded" + echo "If using an object key, expand it before calling this script" + exit 1 +fi + # Function to copy from a single source to destination function copy_source() { local source="$1" local dest="$2" - + # Check if source starts with s3:// if [[ "$source" == s3://* || "$dest" == s3://* ]]; then # Use s5cmd to copy from S3 @@ -30,7 +37,7 @@ function copy_source() { time s5cmd cp --show-progress "$source" "$dest" else # Use cp for local filesystem with recursive support - + # Ensure destination directory exists mkdir -p "$dest" diff --git a/scripts/run_polyrepo.sh b/scripts/run_polyrepo.sh new file mode 100755 index 000000000000..23ab4ec7bd1a --- /dev/null +++ b/scripts/run_polyrepo.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -euo pipefail + +POLYREPO_REVISION=0c4c6fcc92 +echo "Running polyrepo@${POLYREPO_REVISION} via go run..." +go run github.com/ava-labs/avalanchego/tests/fixture/polyrepo@"${POLYREPO_REVISION}" "${@}" diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 2cb50b91fe21..b396a8eacac2 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -208,6 +208,12 @@ func benchmarkReexecuteRange( ) log.Info("re-executing block range with params", + zap.String("runner", runnerNameArg), + zap.String("config", configNameArg), + zap.String("labels", labelsArg), + zap.String("metrics-server-enabled", strconv.FormatBool(metricsServerEnabled)), + zap.Uint64("metrics-server-port", metricsPort), + zap.String("metrics-collector-enabled", strconv.FormatBool(metricsCollectorEnabled)), zap.String("block-dir", blockDir), zap.String("vm-db-dir", vmDBDir), zap.String("chain-data-dir", chainDataDir),