Skip to content

[HUD] [SGLang benchmarking] Adding bigger models for NVIDIA and implementing AMD Support for SGLang using docker #209

[HUD] [SGLang benchmarking] Adding bigger models for NVIDIA and implementing AMD Support for SGLang using docker

[HUD] [SGLang benchmarking] Adding bigger models for NVIDIA and implementing AMD Support for SGLang using docker #209

name: SGLang Benchmark
on:
schedule:
# Run every week on Sunday at midnight
- cron: '0 0 * * 0'
workflow_dispatch:
inputs:
sglang_branch:
description: SGLang branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER)
required: true
type: string
default: main
models:
description: |
A comma-separated list of models from sglang-benchmarks/benchmarks (optional, default to run everything)
required: false
type: string
runners:
description: |
A comma-separated list of runners from .github/scripts/generate_vllm_benchmark_matrix.py to run the benchmark (optional, default to run everything)
required: true
type: string
default: h100,b200,rocm
pull_request:
paths:
- .github/workflows/sglang-benchmark.yml
- sglang-benchmarks/**
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
set-parameters:
runs-on: ubuntu-latest
outputs:
benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@v6
- name: Set parameters
id: set-parameters
shell: bash
env:
MODELS: ${{ inputs.models || '' }}
RUNNERS: ${{ inputs.runners || '' }}
run: |
set -eux
# The generated matrix is grouped by model and runner
python .github/scripts/generate_vllm_benchmark_matrix.py \
--benchmark-configs-dir sglang-benchmarks/benchmarks \
--models "${MODELS}" \
--runners "${RUNNERS}"
benchmarks:
name: Run SGLang benchmarks
needs: set-parameters
strategy:
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
environment: pytorch-x-vllm
permissions:
id-token: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install system dependencies
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y libnuma-dev numactl
- name: Checkout SGLang repository
uses: actions/checkout@v4
with:
repository: sgl-project/sglang
path: sglang-benchmarks/sglang
ref: ${{ inputs.sglang_branch || 'main' }}
fetch-depth: 0
- uses: actions/setup-python@v5
# Amazon Linux fails on this step
continue-on-error: true
with:
python-version: '3.12'
cache: 'pip'
- name: Check if the device is supported
shell: bash
run: |
set -eux
if command -v nvidia-smi; then
DEVICE_NAME=cuda
nvidia-smi
elif command -v rocm-smi; then
DEVICE_NAME=rocm
rocm-smi
else
DEVICE_NAME=cpu
lscpu
fi
echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
- name: Set GPU name and type
working-directory: sglang-benchmarks
shell: bash
run: |
set -eux
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
CUDA_HOME="/usr/local/cuda"
echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
fi
echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
- name: Install dependencies
shell: bash
run: |
set -eux
if [[ "${DEVICE_NAME}" == "rocm" ]]; then
pip install -r .github/scripts/requirements.txt \
--extra-index-url https://download.pytorch.org/whl/rocm6.3
else
pip install -r .github/scripts/requirements.txt \
--extra-index-url https://download.pytorch.org/whl/cu128
fi
- name: Setup CUDA GPU_FLAG for docker run
if: env.DEVICE_NAME == 'cuda'
run: |
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
- name: Setup ROCm
if: env.DEVICE_NAME == 'rocm'
uses: pytorch/pytorch/./.github/actions/setup-rocm@main
- name: Authenticate with AWS
# Only need for DGX hosts
if: contains(env.DEVICE_TYPE, 'B200')
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::308535385114:role/public_ecr_read_only
role-duration-seconds: 18000
aws-region: us-east-1
- name: Login to public.ecr.aws
# Only need for DGX hosts
if: contains(env.DEVICE_TYPE, 'B200')
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
with:
registry-type: public
- name: Select SGLang Docker image
working-directory: sglang-benchmarks/sglang
shell: bash
run: |
set -eux
# Determine image suffix based on device
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
if [[ "${DEVICE_TYPE}" == *"B200"* ]]; then
IMAGE_SUFFIX="-cu128-b200"
else
IMAGE_SUFFIX=""
fi
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
IMAGE_SUFFIX="-rocm630-mi30x"
else
echo "SGLang benchmarks require either CUDA or ROCm devices."
exit 1
fi
# Find the newest tag with available Docker image
SELECTED_TAG=""
for tag in $(git for-each-ref --sort=-creatordate --format '%(refname:short)' refs/tags); do
candidate_image="lmsysorg/sglang:${tag}${IMAGE_SUFFIX}"
echo "Checking: $candidate_image"
if docker manifest inspect "$candidate_image" >/dev/null 2>&1; then
SELECTED_TAG="$tag"
DOCKER_IMAGE="$candidate_image"
HEAD_SHA=$(git rev-list -n 1 "$tag")
echo "Found available image: $candidate_image"
break
fi
done
# Fallback to latest if no tagged version found
if [[ -z "$SELECTED_TAG" ]]; then
echo "No tagged images found, using latest"
DOCKER_IMAGE="lmsysorg/sglang:latest${IMAGE_SUFFIX}"
HEAD_SHA=$(git rev-parse HEAD)
SELECTED_TAG="latest"
fi
echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV"
echo "HEAD_SHA=$HEAD_SHA" >> "$GITHUB_ENV"
echo "LATEST_TAG=$SELECTED_TAG" >> "$GITHUB_ENV"
echo "Using: $DOCKER_IMAGE (tag: $SELECTED_TAG)"
- name: Setup benchmark tests
env:
MODELS: ${{ matrix.models }}
run: |
set -eux
# Create benchmarks directory structure
mkdir -p sglang-benchmarks/benchmarks/results
mkdir -p sglang-benchmarks/benchmarks/tests
# Set the list of benchmarks we want to cover in this runner
python3 .github/scripts/setup_vllm_benchmark.py \
--from-benchmark-configs-dir sglang-benchmarks/benchmarks \
--to-benchmark-configs-dir sglang-benchmarks/benchmarks/tests \
--models "${MODELS}" \
--device "${DEVICE_NAME}"
ls -lah sglang-benchmarks/benchmarks/tests || echo "No test files found"
find sglang-benchmarks/benchmarks/tests -type f -exec cat {} \; || echo "No test files to display"
- name: Run SGLang benchmark
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1
run: |
set -eux
container_name=$(docker run \
${GPU_FLAG:-} \
-e HF_TOKEN \
-e DEVICE_NAME \
-e DEVICE_TYPE \
-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
--ipc=host \
--tty \
--detach \
--security-opt seccomp=unconfined \
--shm-size=32g \
-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
-w /tmp/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" bash -c "cd sglang-benchmarks/benchmarks && bash ../../.github/scripts/run-sglang-performance-benchmarks.sh"
- name: Authenticate with AWS
# AWS CUDA runners already have access to the bucket via its runner IAM role
if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200')
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
# The max duration enforced by the server side
role-duration-seconds: 18000
aws-region: us-east-1
- name: Upload the benchmark results
if: always()
env:
BENCHMARK_RESULTS: sglang-benchmarks/benchmarks/results
MODELS: ${{ matrix.models }}
run: |
set -eux
sudo chown -R ${UID} "${BENCHMARK_RESULTS}" || true
ls -lah "${BENCHMARK_RESULTS}" || echo "Results directory not found"
SANITIZED_DEVICE_TYPE=$(echo "${DEVICE_TYPE// /_}" | sed "s/[^[:alnum:].-]/_/g")
SANITIZED_MODELS="${MODELS//\//_}"
# Create results summary
if [ -d "${BENCHMARK_RESULTS}" ]; then
echo "## SGLang Benchmark Results Summary" >> $GITHUB_STEP_SUMMARY
echo "- Device: ${DEVICE_TYPE}" >> $GITHUB_STEP_SUMMARY
echo "- Models: ${MODELS}" >> $GITHUB_STEP_SUMMARY
echo "- Runner: ${{ matrix.runner }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Files Generated:" >> $GITHUB_STEP_SUMMARY
find "${BENCHMARK_RESULTS}" -type f -name "*.json" -exec echo "- {}" \; >> $GITHUB_STEP_SUMMARY || echo "- No JSON files found" >> $GITHUB_STEP_SUMMARY
else
echo "⚠️ No benchmark results found in ${BENCHMARK_RESULTS}" >> $GITHUB_STEP_SUMMARY
fi
python3 .github/scripts/upload_benchmark_results.py \
--repo-name sgl-project/sglang \
--benchmark-name "SGLang benchmark" \
--benchmark-results "${BENCHMARK_RESULTS}" \
--head-sha "${HEAD_SHA}" \
--head-branch main \
--device-name "${DEVICE_NAME}" \
--device-type "${SANITIZED_DEVICE_TYPE}" \
--model "${SANITIZED_MODELS}"
echo "SANITIZED_DEVICE_TYPE=$SANITIZED_DEVICE_TYPE" >> $GITHUB_ENV
echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV
# Keep a copy of the benchmark results on GitHub for reference
- uses: actions/upload-artifact@v4
if: always()
with:
name: sglang-benchmark-results-${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODELS }}
path: sglang-benchmarks/benchmarks/results
retention-days: 30