[HUD] [SGLang benchmarking] Adding bigger models for NVIDIA and implementing AMD Support for SGLang using docker #209
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: SGLang Benchmark | |
| on: | |
| schedule: | |
| # Run every week on Sunday at midnight | |
| - cron: '0 0 * * 0' | |
| workflow_dispatch: | |
| inputs: | |
| sglang_branch: | |
| description: SGLang branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER) | |
| required: true | |
| type: string | |
| default: main | |
| models: | |
| description: | | |
| A comma-separated list of models from sglang-benchmarks/benchmarks (optional, default to run everything) | |
| required: false | |
| type: string | |
| runners: | |
| description: | | |
| A comma-separated list of runners from .github/scripts/generate_vllm_benchmark_matrix.py to run the benchmark (optional, default to run everything) | |
| required: true | |
| type: string | |
| default: h100,b200,rocm | |
| pull_request: | |
| paths: | |
| - .github/workflows/sglang-benchmark.yml | |
| - sglang-benchmarks/** | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
| cancel-in-progress: true | |
| jobs: | |
| set-parameters: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v6 | |
| - name: Set parameters | |
| id: set-parameters | |
| shell: bash | |
| env: | |
| MODELS: ${{ inputs.models || '' }} | |
| RUNNERS: ${{ inputs.runners || '' }} | |
| run: | | |
| set -eux | |
| # The generated matrix is grouped by model and runner | |
| python .github/scripts/generate_vllm_benchmark_matrix.py \ | |
| --benchmark-configs-dir sglang-benchmarks/benchmarks \ | |
| --models "${MODELS}" \ | |
| --runners "${RUNNERS}" | |
| benchmarks: | |
| name: Run SGLang benchmarks | |
| needs: set-parameters | |
| strategy: | |
| matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }} | |
| fail-fast: false | |
| runs-on: ${{ matrix.runner }} | |
| environment: pytorch-x-vllm | |
| permissions: | |
| id-token: write | |
| contents: read | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Install system dependencies | |
| shell: bash | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y libnuma-dev numactl | |
| - name: Checkout SGLang repository | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: sgl-project/sglang | |
| path: sglang-benchmarks/sglang | |
| ref: ${{ inputs.sglang_branch || 'main' }} | |
| fetch-depth: 0 | |
| - uses: actions/setup-python@v5 | |
| # Amazon Linux fails on this step | |
| continue-on-error: true | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' | |
| - name: Check if the device is supported | |
| shell: bash | |
| run: | | |
| set -eux | |
| if command -v nvidia-smi; then | |
| DEVICE_NAME=cuda | |
| nvidia-smi | |
| elif command -v rocm-smi; then | |
| DEVICE_NAME=rocm | |
| rocm-smi | |
| else | |
| DEVICE_NAME=cpu | |
| lscpu | |
| fi | |
| echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV | |
| - name: Set GPU name and type | |
| working-directory: sglang-benchmarks | |
| shell: bash | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_NAME}" == "cuda" ]]; then | |
| DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') | |
| CUDA_HOME="/usr/local/cuda" | |
| echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV | |
| elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs) | |
| elif [[ "${DEVICE_NAME}" == "cpu" ]]; then | |
| DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ") | |
| fi | |
| echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV | |
| - name: Install dependencies | |
| shell: bash | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| pip install -r .github/scripts/requirements.txt \ | |
| --extra-index-url https://download.pytorch.org/whl/rocm6.3 | |
| else | |
| pip install -r .github/scripts/requirements.txt \ | |
| --extra-index-url https://download.pytorch.org/whl/cu128 | |
| fi | |
| - name: Setup CUDA GPU_FLAG for docker run | |
| if: env.DEVICE_NAME == 'cuda' | |
| run: | | |
| echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | |
| - name: Setup ROCm | |
| if: env.DEVICE_NAME == 'rocm' | |
| uses: pytorch/pytorch/./.github/actions/setup-rocm@main | |
| - name: Authenticate with AWS | |
| # Only need for DGX hosts | |
| if: contains(env.DEVICE_TYPE, 'B200') | |
| uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | |
| with: | |
| role-to-assume: arn:aws:iam::308535385114:role/public_ecr_read_only | |
| role-duration-seconds: 18000 | |
| aws-region: us-east-1 | |
| - name: Login to public.ecr.aws | |
| # Only need for DGX hosts | |
| if: contains(env.DEVICE_TYPE, 'B200') | |
| uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 | |
| with: | |
| registry-type: public | |
| - name: Select SGLang Docker image | |
| working-directory: sglang-benchmarks/sglang | |
| shell: bash | |
| run: | | |
| set -eux | |
| # Determine image suffix based on device | |
| if [[ "${DEVICE_NAME}" == "cuda" ]]; then | |
| if [[ "${DEVICE_TYPE}" == *"B200"* ]]; then | |
| IMAGE_SUFFIX="-cu128-b200" | |
| else | |
| IMAGE_SUFFIX="" | |
| fi | |
| elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| IMAGE_SUFFIX="-rocm630-mi30x" | |
| else | |
| echo "SGLang benchmarks require either CUDA or ROCm devices." | |
| exit 1 | |
| fi | |
| # Find the newest tag with available Docker image | |
| SELECTED_TAG="" | |
| for tag in $(git for-each-ref --sort=-creatordate --format '%(refname:short)' refs/tags); do | |
| candidate_image="lmsysorg/sglang:${tag}${IMAGE_SUFFIX}" | |
| echo "Checking: $candidate_image" | |
| if docker manifest inspect "$candidate_image" >/dev/null 2>&1; then | |
| SELECTED_TAG="$tag" | |
| DOCKER_IMAGE="$candidate_image" | |
| HEAD_SHA=$(git rev-list -n 1 "$tag") | |
| echo "Found available image: $candidate_image" | |
| break | |
| fi | |
| done | |
| # Fallback to latest if no tagged version found | |
| if [[ -z "$SELECTED_TAG" ]]; then | |
| echo "No tagged images found, using latest" | |
| DOCKER_IMAGE="lmsysorg/sglang:latest${IMAGE_SUFFIX}" | |
| HEAD_SHA=$(git rev-parse HEAD) | |
| SELECTED_TAG="latest" | |
| fi | |
| echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV" | |
| echo "HEAD_SHA=$HEAD_SHA" >> "$GITHUB_ENV" | |
| echo "LATEST_TAG=$SELECTED_TAG" >> "$GITHUB_ENV" | |
| echo "Using: $DOCKER_IMAGE (tag: $SELECTED_TAG)" | |
| - name: Setup benchmark tests | |
| env: | |
| MODELS: ${{ matrix.models }} | |
| run: | | |
| set -eux | |
| # Create benchmarks directory structure | |
| mkdir -p sglang-benchmarks/benchmarks/results | |
| mkdir -p sglang-benchmarks/benchmarks/tests | |
| # Set the list of benchmarks we want to cover in this runner | |
| python3 .github/scripts/setup_vllm_benchmark.py \ | |
| --from-benchmark-configs-dir sglang-benchmarks/benchmarks \ | |
| --to-benchmark-configs-dir sglang-benchmarks/benchmarks/tests \ | |
| --models "${MODELS}" \ | |
| --device "${DEVICE_NAME}" | |
| ls -lah sglang-benchmarks/benchmarks/tests || echo "No test files found" | |
| find sglang-benchmarks/benchmarks/tests -type f -exec cat {} \; || echo "No test files to display" | |
| - name: Run SGLang benchmark | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1 | |
| run: | | |
| set -eux | |
| container_name=$(docker run \ | |
| ${GPU_FLAG:-} \ | |
| -e HF_TOKEN \ | |
| -e DEVICE_NAME \ | |
| -e DEVICE_TYPE \ | |
| -e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \ | |
| --ipc=host \ | |
| --tty \ | |
| --detach \ | |
| --security-opt seccomp=unconfined \ | |
| --shm-size=32g \ | |
| -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ | |
| -w /tmp/workspace \ | |
| "${DOCKER_IMAGE}" | |
| ) | |
| docker exec -t "${container_name}" bash -c "cd sglang-benchmarks/benchmarks && bash ../../.github/scripts/run-sglang-performance-benchmarks.sh" | |
| - name: Authenticate with AWS | |
| # AWS CUDA runners already have access to the bucket via its runner IAM role | |
| if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200') | |
| uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | |
| with: | |
| role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results | |
| # The max duration enforced by the server side | |
| role-duration-seconds: 18000 | |
| aws-region: us-east-1 | |
| - name: Upload the benchmark results | |
| if: always() | |
| env: | |
| BENCHMARK_RESULTS: sglang-benchmarks/benchmarks/results | |
| MODELS: ${{ matrix.models }} | |
| run: | | |
| set -eux | |
| sudo chown -R ${UID} "${BENCHMARK_RESULTS}" || true | |
| ls -lah "${BENCHMARK_RESULTS}" || echo "Results directory not found" | |
| SANITIZED_DEVICE_TYPE=$(echo "${DEVICE_TYPE// /_}" | sed "s/[^[:alnum:].-]/_/g") | |
| SANITIZED_MODELS="${MODELS//\//_}" | |
| # Create results summary | |
| if [ -d "${BENCHMARK_RESULTS}" ]; then | |
| echo "## SGLang Benchmark Results Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "- Device: ${DEVICE_TYPE}" >> $GITHUB_STEP_SUMMARY | |
| echo "- Models: ${MODELS}" >> $GITHUB_STEP_SUMMARY | |
| echo "- Runner: ${{ matrix.runner }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Files Generated:" >> $GITHUB_STEP_SUMMARY | |
| find "${BENCHMARK_RESULTS}" -type f -name "*.json" -exec echo "- {}" \; >> $GITHUB_STEP_SUMMARY || echo "- No JSON files found" >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "⚠️ No benchmark results found in ${BENCHMARK_RESULTS}" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| python3 .github/scripts/upload_benchmark_results.py \ | |
| --repo-name sgl-project/sglang \ | |
| --benchmark-name "SGLang benchmark" \ | |
| --benchmark-results "${BENCHMARK_RESULTS}" \ | |
| --head-sha "${HEAD_SHA}" \ | |
| --head-branch main \ | |
| --device-name "${DEVICE_NAME}" \ | |
| --device-type "${SANITIZED_DEVICE_TYPE}" \ | |
| --model "${SANITIZED_MODELS}" | |
| echo "SANITIZED_DEVICE_TYPE=$SANITIZED_DEVICE_TYPE" >> $GITHUB_ENV | |
| echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV | |
| # Keep a copy of the benchmark results on GitHub for reference | |
| - uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: sglang-benchmark-results-${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODELS }} | |
| path: sglang-benchmarks/benchmarks/results | |
| retention-days: 30 |