Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 52 additions & 7 deletions .github/workflows/third-party-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,13 @@ jobs:
./scripts/test-triton.sh --install-sglang --skip-pip-install --skip-pytorch-install
cd benchmarks/third_party/sglang
python scaled_mm_benchmark.py --reports $REPORTS
python ../vllm/transform_results.py $REPORTS/scaled_mm_benchmark.csv $REPORTS/scaled-mm-int8-report.csv --tag $TAG --benchmark scaled-mm-int8 --param_cols="M,N,K" --bgroup sglang
python ../vllm/transform_results.py \
$REPORTS/scaled_mm_benchmark.csv \
$REPORTS/scaled-mm-int8-report.csv \
--tag $TAG \
--bgroup sglang \
--benchmark scaled-mm-int8 \
--param_cols="M,N,K"

- name: Run sglang benchmark with fp8
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'sglang')) }}
Expand All @@ -99,9 +105,32 @@ jobs:

cd benchmarks/third_party/sglang
FP8="1" python scaled_mm_benchmark.py --reports $REPORTS
python ../vllm/transform_results.py $REPORTS/scaled_mm_benchmark.csv $REPORTS/scaled-mm-fp8-report.csv --tag $TAG --benchmark scaled-mm-fp8 --param_cols="M,N,K" --bgroup sglang
python ../vllm/transform_results.py \
$REPORTS/scaled_mm_benchmark.csv \
$REPORTS/scaled-mm-fp8-report.csv \
--tag $TAG \
--bgroup sglang \
--benchmark scaled-mm-fp8 \
--param_cols="M,N,K"

- name: Run vllm benchmarks bf16
- name: Run vllm unified attention bf16
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
run: |
source ./scripts/capture-hw-details.sh

./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install

cd benchmarks/third_party/vllm
python unified_attention_benchmark.py --reports $REPORTS
python transform_results.py \
$REPORTS/unified-attention-performance.csv \
$REPORTS/unified-attention-report.csv \
--tag $TAG \
--bgroup "vllm" \
--benchmark "unified-attn-bf16" \
--param_cols "q_heads,k_heads,head_size,dtype,qdtype,seq_lens,sliding_window,soft_cap,num_blocks,block_size"

- name: Run vllm batched moe bf16
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
run: |
source ./scripts/capture-hw-details.sh
Expand All @@ -111,17 +140,30 @@ jobs:

cd benchmarks/third_party/vllm
python batched_moe_benchmark.py --reports $REPORTS
python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-bf16-benchmark --param_cols="num_experts,max_tokens_per_expert,K,N" --bgroup vllm
python transform_results.py \
$REPORTS/moe-gemm-performance.csv \
$REPORTS/moe-gemm-report.csv \
--tag $TAG \
--bgroup vllm \
--benchmark moe-bf16-benchmark \
--gbps \
--param_cols="num_experts,max_tokens_per_expert,K,N"


- name: Run vllm benchmarks fp8
- name: Run vllm batched moe fp8
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
run: |
source ./scripts/capture-hw-details.sh

cd benchmarks/third_party/vllm
FP8="1" python batched_moe_benchmark.py --reports $REPORTS
python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-fp8-report.csv --tag $TAG --benchmark moe-fp8-benchmark --param_cols="num_experts,max_tokens_per_expert,K,N" --bgroup vllm
python transform_results.py \
$REPORTS/moe-gemm-performance.csv \
$REPORTS/moe-gemm-fp8-report.csv \
--tag $TAG \
--bgroup vllm \
--benchmark moe-fp8-benchmark \
--param_cols="num_experts,max_tokens_per_expert,K,N"


- name: Run Liger-Kernel benchmarks
Expand All @@ -136,7 +178,10 @@ jobs:
bash benchmarks/third_party/liger/run_benchmarks.sh || RET_CODE=$?

cp Liger-Kernel/benchmark/data/all_benchmark_data.csv $REPORTS/liger-raw.csv
python benchmarks/third_party/liger/transform.py $REPORTS/liger-raw.csv $REPORTS/liger-report.csv --tag $TAG
python benchmarks/third_party/liger/transform.py \
$REPORTS/liger-raw.csv \
$REPORTS/liger-report.csv \
--tag $TAG

# Return the captured return code at the end
exit "$RET_CODE"
Expand Down
59 changes: 36 additions & 23 deletions benchmarks/third_party/vllm/transform_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ def parse_args():
parser.add_argument('--tag', help='Tag for the benchmark run', default='')
parser.add_argument('--benchmark', help='moe-benchmark', required=True)
parser.add_argument('--bgroup', help='Benchmark group', required=True)
parser.add_argument('--gbps', help='Parse "GB/s" values instead of TFlops', action='store_true')

return parser.parse_args()


def parse_csv(csv_file_path, tag, bench_group, benchmark, param_cols):
def parse_csv(csv_file_path, tag, bench_group, benchmark, param_cols, gbps=False):
"""Parse the benchmark CSV and extract performance metrics."""

df = pd.read_csv(csv_file_path)
Expand All @@ -32,32 +33,44 @@ def parse_csv(csv_file_path, tag, bench_group, benchmark, param_cols):
current_datetime = datetime.now().isoformat()

# Create params for all rows vectorized
df['params'] = df.apply(lambda row: json.dumps({p: int(row[p]) for p in param_cols}), axis=1)
def serialize_params(row):
param2val = {}
for p in param_cols:
try:
param2val[p] = int(row[p])
except ValueError:
param2val[p] = str(row[p])
return json.dumps(param2val)

df['params'] = df.apply(serialize_params, axis=1)

# Define compiler columns
compilers = [('triton', 'triton-TFlops'), ('pytorch', 'pytorch-TFlops'), ('triton-td', 'triton-td-TFlops')]
# parse "GB/s", parse "TFLOPS"
compilers = ['pytorch', 'triton', 'triton-td']

# Create list of dataframes for each compiler
dfs = []
for compiler_name, tflops_col in compilers:
if tflops_col in df.columns:
# Filter out NaN values
valid_rows = df[df[tflops_col].notna()].copy()
if len(valid_rows) > 0:
valid_rows['run_uuid'] = run_uuid
valid_rows['ts'] = current_datetime
valid_rows['benchmark_group'] = bench_group
valid_rows['benchmark'] = benchmark
valid_rows['compiler'] = compiler_name
valid_rows['value_name'] = 'tflops'
valid_rows['value'] = valid_rows[tflops_col].astype(float)
valid_rows['tag'] = tag

# Select only needed columns
result_df = valid_rows[[
'run_uuid', 'ts', 'benchmark_group', 'benchmark', 'compiler', 'value_name', 'value', 'params', 'tag'
]]
dfs.append(result_df)
for compiler_name in compilers:
col = f'{compiler_name}-{"GB/s" if gbps else "TFlops"}'
if col not in df.columns:
continue
# Filter out NaN values
valid_rows = df[df[col].notna()].copy()
if len(valid_rows) > 0:
valid_rows['run_uuid'] = run_uuid
valid_rows['ts'] = current_datetime
valid_rows['benchmark_group'] = bench_group
valid_rows['benchmark'] = benchmark
valid_rows['compiler'] = compiler_name
valid_rows['value_name'] = 'tflops' if not gbps else 'gbps'
valid_rows['value'] = valid_rows[col].astype(float)
valid_rows['tag'] = tag

# Select only needed columns
result_df = valid_rows[[
'run_uuid', 'ts', 'benchmark_group', 'benchmark', 'compiler', 'value_name', 'value', 'params', 'tag'
]]
dfs.append(result_df)

# Concatenate all compiler results
df_results = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
Expand Down Expand Up @@ -91,7 +104,7 @@ def main():
raise ValueError(f'Error: CSV file {args.source} not found')

param_cols = args.param_cols.split(',')
df_results = parse_csv(args.source, args.tag, args.bgroup, args.benchmark, param_cols)
df_results = parse_csv(args.source, args.tag, args.bgroup, args.benchmark, param_cols, gbps=args.gbps)
df_results.to_csv(args.target, index=False)


Expand Down
Loading
Loading