Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 7 additions & 25 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@ on:
alias:
required: true
type: string
num-shards:
kernels:
required: true
type: number
description: "Number of shards benchmark is running on"
shard:
required: true
type: number
description: "Maximum parallel runners to determine shards"
type: string

jobs:
benchmark:
name: benchmark-${{ inputs.runtime-version }}-shard${{ inputs.shard }}-py${{ inputs.python-version }}-${{ inputs.alias }}
name: benchmark-${{ inputs.runtime-version }}-${{ inputs.kernels }}-py${{ inputs.python-version }}-${{ inputs.alias }}

container:
image: ${{ inputs.image }}
Expand Down Expand Up @@ -120,25 +115,12 @@ jobs:

source .venv/bin/activate

KERNELS=("softmax" "jsd" "welford" "kl_div" "layer_norm" "layer_norm-bwd" "rms_norm" "rms_norm-bwd" "cross_entropy" "flash_attention" "gemm" "grouped_gemm")
NUMSHARDS=${{ inputs.num-shards }}
SHARD=${{ inputs.shard }}

SHARD_KERNELS=()
for ((i=0; i<${#KERNELS[@]}; i++)); do
if [ $((i % NUMSHARDS)) -eq $SHARD ]; then
SHARD_KERNELS+=("${KERNELS[i]}")
fi
done

KERNEL_LIST=$(IFS=','; echo "${SHARD_KERNELS[*]}")
echo "Running shard $SHARD of $NUMSHARDS with kernels: $KERNEL_LIST"

TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
echo "$TEST_REPORTS_DIR"

for kernel in "${SHARD_KERNELS[@]}"; do
KERNEL_LIST="${{ inputs.kernels }}"
for kernel in ${KERNEL_LIST//,/ }; do
echo "=========================================="
echo "Running benchmark for kernel: $kernel"
echo "=========================================="
Expand Down Expand Up @@ -217,7 +199,7 @@ jobs:
- name: Upload the benchmark results to GitHub
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }}
name: benchmark-results-${{ inputs.alias }}-${{ inputs.kernels }}
path: test/test-reports

upload-benchmark-results:
Expand All @@ -227,7 +209,7 @@ jobs:
id-token: write
contents: read
with:
benchmark-artifact: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }}
benchmark-artifact: benchmark-results-${{ inputs.alias }}-${{ inputs.kernels }}
benchmark-metadata: ${{ needs.benchmark.outputs.benchmark-metadata }}
runners-info: ${{ needs.benchmark.outputs.runners-info }}
dependencies: ${{ needs.benchmark.outputs.dependencies }}
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/benchmark_dispatch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
uses: ./.github/workflows/compute-benchmark-matrix.yml
with:
max-runners: 12
kernels: "softmax,jsd,welford,kl_div,layer_norm,layer_norm-bwd,rms_norm,rms_norm-bwd,cross_entropy,flash_attention,gemm,grouped_gemm"

run-h100:
needs: gen-matrix-h100
Expand All @@ -44,14 +45,14 @@ jobs:
runtime-version: cu128
container-options: --gpus all
alias: h100
num-shards: ${{ matrix.num_shards }}
shard: ${{ matrix.shard }}
kernels: ${{ matrix.kernels }}

gen-matrix-b200:
uses: ./.github/workflows/compute-benchmark-matrix.yml
if: ${{ github.event.inputs.run_b200 == 'true' || github.event_name == 'schedule' }}
with:
max-runners: 12
kernels: "softmax,jsd,welford,kl_div,layer_norm,layer_norm-bwd,rms_norm,rms_norm-bwd,cross_entropy,flash_attention,gemm,grouped_gemm"

run-b200:
needs: gen-matrix-b200
Expand All @@ -69,14 +70,14 @@ jobs:
runtime-version: cu130
container-options: --gpus all
alias: b200
num-shards: ${{ matrix.num_shards }}
shard: ${{ matrix.shard }}
kernels: ${{ matrix.kernels }}

gen-matrix-mi325x:
uses: ./.github/workflows/compute-benchmark-matrix.yml
if: ${{ github.event.inputs.run_mi325x == 'true' || github.event_name == 'schedule' }}
with:
max-runners: 6
kernels: "softmax,jsd,welford,kl_div,layer_norm,layer_norm-bwd,rms_norm,rms_norm-bwd,cross_entropy,flash_attention,gemm,grouped_gemm"

run-mi325x:
needs: gen-matrix-mi325x
Expand All @@ -94,5 +95,4 @@ jobs:
runtime-version: rocm6.4
container-options: --device=/dev/kfd --device=/dev/dri
alias: mi325x
num-shards: ${{ matrix.num_shards }}
shard: ${{ matrix.shard }}
kernels: ${{ matrix.kernels }}
28 changes: 26 additions & 2 deletions .github/workflows/compute-benchmark-matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
max-runners:
required: true
type: string
kernels:
required: true
type: string
outputs:
matrix:
description: "The generated matrix for sharding"
Expand All @@ -20,5 +23,26 @@ jobs:
- id: gen
run: |
n="${{ inputs.max-runners }}"
shards=$(seq 0 $((n-1)) | paste -sd, -)
echo "matrix={\"shard\": [${shards}], \"num_shards\": [${n}]}" >> $GITHUB_OUTPUT
IFS=',' read -ra K <<< "${{ inputs.kernels }}"
total_kernels=${#K[@]}
jobs=$(( total_kernels < n ? total_kernels : n ))

declare -a BUCKETS
for ((i=0; i<jobs; i++)); do BUCKETS[i]=""; done
for ((i=0; i<total_kernels; i++)); do
idx=$(( i % jobs ))
if [[ -z "${BUCKETS[idx]}" ]]; then
BUCKETS[idx]="${K[i]}"
else
BUCKETS[idx]="${BUCKETS[idx]},${K[i]}"
fi
done

json='{"kernels":['
for ((i=0; i<jobs; i++)); do
json+="\"${BUCKETS[i]}\""
if (( i < jobs - 1 )); then json+=","; fi
done
json+=']}'

echo "matrix=$json" >> $GITHUB_OUTPUT
Loading