[HUD] [SGLang benchmarking] Adding bigger models for NVIDIA and implementing AMD Support for SGLang using docker #209

Workflow file for this run

.github/workflows/sglang-benchmark.yml at e53dc82

	name: SGLang Benchmark

	on:
	schedule:
	# Run every week on Sunday at midnight
	- cron: '0 0 * * 0'
	workflow_dispatch:
	inputs:
	sglang_branch:
	description: SGLang branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER)
	required: true
	type: string
	default: main
	models:
	description: \|
	A comma-separated list of models from sglang-benchmarks/benchmarks (optional, default to run everything)
	required: false
	type: string
	runners:
	description: \|
	A comma-separated list of runners from .github/scripts/generate_vllm_benchmark_matrix.py to run the benchmark (optional, default to run everything)
	required: true
	type: string
	default: h100,b200,rocm
	pull_request:
	paths:
	- .github/workflows/sglang-benchmark.yml
	- sglang-benchmarks/**

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	set-parameters:
	runs-on: ubuntu-latest
	outputs:
	benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'

	- name: Install uv
	uses: astral-sh/setup-uv@v6

	- name: Set parameters
	id: set-parameters
	shell: bash
	env:
	MODELS: ${{ inputs.models \|\| '' }}
	RUNNERS: ${{ inputs.runners \|\| '' }}
	run: \|
	set -eux

	# The generated matrix is grouped by model and runner
	python .github/scripts/generate_vllm_benchmark_matrix.py \
	--benchmark-configs-dir sglang-benchmarks/benchmarks \
	--models "${MODELS}" \
	--runners "${RUNNERS}"

	benchmarks:
	name: Run SGLang benchmarks
	needs: set-parameters
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	environment: pytorch-x-vllm
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Install system dependencies
	shell: bash
	run: \|
	sudo apt-get update
	sudo apt-get install -y libnuma-dev numactl

	- name: Checkout SGLang repository
	uses: actions/checkout@v4
	with:
	repository: sgl-project/sglang
	path: sglang-benchmarks/sglang
	ref: ${{ inputs.sglang_branch \|\| 'main' }}
	fetch-depth: 0

	- uses: actions/setup-python@v5
	# Amazon Linux fails on this step
	continue-on-error: true
	with:
	python-version: '3.12'
	cache: 'pip'


	- name: Check if the device is supported
	shell: bash
	run: \|
	set -eux

	if command -v nvidia-smi; then
	DEVICE_NAME=cuda
	nvidia-smi
	elif command -v rocm-smi; then
	DEVICE_NAME=rocm
	rocm-smi
	else
	DEVICE_NAME=cpu
	lscpu
	fi
	echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV

	- name: Set GPU name and type
	working-directory: sglang-benchmarks
	shell: bash
	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader \| awk '{print $2}')
	CUDA_HOME="/usr/local/cuda"
	echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	DEVICE_TYPE=$(rocminfo \| grep "Marketing Name" \| tail -n1 \| awk -F':' '{print $2}' \| xargs)
	elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
	DEVICE_TYPE=$(lscpu \| grep 'Model name' \| cut -f 2 -d ":" \| awk '{$1=$1}1' \| cut -f 2 -d " ")
	fi
	echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV

	- name: Install dependencies
	shell: bash
	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "rocm" ]]; then
	pip install -r .github/scripts/requirements.txt \
	--extra-index-url https://download.pytorch.org/whl/rocm6.3
	else
	pip install -r .github/scripts/requirements.txt \
	--extra-index-url https://download.pytorch.org/whl/cu128
	fi

	- name: Setup CUDA GPU_FLAG for docker run
	if: env.DEVICE_NAME == 'cuda'
	run: \|
	echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

	- name: Setup ROCm
	if: env.DEVICE_NAME == 'rocm'
	uses: pytorch/pytorch/./.github/actions/setup-rocm@main

	- name: Authenticate with AWS
	# Only need for DGX hosts
	if: contains(env.DEVICE_TYPE, 'B200')
	uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
	with:
	role-to-assume: arn:aws:iam::308535385114:role/public_ecr_read_only
	role-duration-seconds: 18000
	aws-region: us-east-1

	- name: Login to public.ecr.aws
	# Only need for DGX hosts
	if: contains(env.DEVICE_TYPE, 'B200')
	uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
	with:
	registry-type: public

	- name: Select SGLang Docker image
	working-directory: sglang-benchmarks/sglang
	shell: bash
	run: \|
	set -eux

	# Determine image suffix based on device
	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	if [[ "${DEVICE_TYPE}" == "B200" ]]; then
	IMAGE_SUFFIX="-cu128-b200"
	else
	IMAGE_SUFFIX=""
	fi
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	IMAGE_SUFFIX="-rocm630-mi30x"
	else
	echo "SGLang benchmarks require either CUDA or ROCm devices."
	exit 1
	fi

	# Find the newest tag with available Docker image
	SELECTED_TAG=""
	for tag in $(git for-each-ref --sort=-creatordate --format '%(refname:short)' refs/tags); do
	candidate_image="lmsysorg/sglang:${tag}${IMAGE_SUFFIX}"
	echo "Checking: $candidate_image"

	if docker manifest inspect "$candidate_image" >/dev/null 2>&1; then
	SELECTED_TAG="$tag"
	DOCKER_IMAGE="$candidate_image"
	HEAD_SHA=$(git rev-list -n 1 "$tag")
	echo "Found available image: $candidate_image"
	break
	fi
	done

	# Fallback to latest if no tagged version found
	if [[ -z "$SELECTED_TAG" ]]; then
	echo "No tagged images found, using latest"
	DOCKER_IMAGE="lmsysorg/sglang:latest${IMAGE_SUFFIX}"
	HEAD_SHA=$(git rev-parse HEAD)
	SELECTED_TAG="latest"
	fi

	echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV"
	echo "HEAD_SHA=$HEAD_SHA" >> "$GITHUB_ENV"
	echo "LATEST_TAG=$SELECTED_TAG" >> "$GITHUB_ENV"
	echo "Using: $DOCKER_IMAGE (tag: $SELECTED_TAG)"

	- name: Setup benchmark tests
	env:
	MODELS: ${{ matrix.models }}
	run: \|
	set -eux

	# Create benchmarks directory structure
	mkdir -p sglang-benchmarks/benchmarks/results
	mkdir -p sglang-benchmarks/benchmarks/tests

	# Set the list of benchmarks we want to cover in this runner
	python3 .github/scripts/setup_vllm_benchmark.py \
	--from-benchmark-configs-dir sglang-benchmarks/benchmarks \
	--to-benchmark-configs-dir sglang-benchmarks/benchmarks/tests \
	--models "${MODELS}" \
	--device "${DEVICE_NAME}"

	ls -lah sglang-benchmarks/benchmarks/tests \|\| echo "No test files found"
	find sglang-benchmarks/benchmarks/tests -type f -exec cat {} \; \|\| echo "No test files to display"

	- name: Run SGLang benchmark
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1
	run: \|
	set -eux

	container_name=$(docker run \
	${GPU_FLAG:-} \
	-e HF_TOKEN \
	-e DEVICE_NAME \
	-e DEVICE_TYPE \
	-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
	--ipc=host \
	--tty \
	--detach \
	--security-opt seccomp=unconfined \
	--shm-size=32g \
	-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
	-w /tmp/workspace \
	"${DOCKER_IMAGE}"
	)

	docker exec -t "${container_name}" bash -c "cd sglang-benchmarks/benchmarks && bash ../../.github/scripts/run-sglang-performance-benchmarks.sh"

	- name: Authenticate with AWS
	# AWS CUDA runners already have access to the bucket via its runner IAM role
	if: env.DEVICE_NAME == 'rocm' \|\| contains(env.DEVICE_TYPE, 'B200')
	uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
	with:
	role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
	# The max duration enforced by the server side
	role-duration-seconds: 18000
	aws-region: us-east-1

	- name: Upload the benchmark results
	if: always()
	env:
	BENCHMARK_RESULTS: sglang-benchmarks/benchmarks/results
	MODELS: ${{ matrix.models }}
	run: \|
	set -eux

	sudo chown -R ${UID} "${BENCHMARK_RESULTS}" \|\| true
	ls -lah "${BENCHMARK_RESULTS}" \|\| echo "Results directory not found"

	SANITIZED_DEVICE_TYPE=$(echo "${DEVICE_TYPE// /_}" \| sed "s/[^[:alnum:].-]/_/g")
	SANITIZED_MODELS="${MODELS//\//_}"

	# Create results summary
	if [ -d "${BENCHMARK_RESULTS}" ]; then
	echo "## SGLang Benchmark Results Summary" >> $GITHUB_STEP_SUMMARY
	echo "- Device: ${DEVICE_TYPE}" >> $GITHUB_STEP_SUMMARY
	echo "- Models: ${MODELS}" >> $GITHUB_STEP_SUMMARY
	echo "- Runner: ${{ matrix.runner }}" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### Files Generated:" >> $GITHUB_STEP_SUMMARY
	find "${BENCHMARK_RESULTS}" -type f -name "*.json" -exec echo "- {}" \; >> $GITHUB_STEP_SUMMARY \|\| echo "- No JSON files found" >> $GITHUB_STEP_SUMMARY
	else
	echo "⚠️ No benchmark results found in ${BENCHMARK_RESULTS}" >> $GITHUB_STEP_SUMMARY
	fi

	python3 .github/scripts/upload_benchmark_results.py \
	--repo-name sgl-project/sglang \
	--benchmark-name "SGLang benchmark" \
	--benchmark-results "${BENCHMARK_RESULTS}" \
	--head-sha "${HEAD_SHA}" \
	--head-branch main \
	--device-name "${DEVICE_NAME}" \
	--device-type "${SANITIZED_DEVICE_TYPE}" \
	--model "${SANITIZED_MODELS}"

	echo "SANITIZED_DEVICE_TYPE=$SANITIZED_DEVICE_TYPE" >> $GITHUB_ENV
	echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV

	# Keep a copy of the benchmark results on GitHub for reference
	- uses: actions/upload-artifact@v4
	if: always()
	with:
	name: sglang-benchmark-results-${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODELS }}
	path: sglang-benchmarks/benchmarks/results
	retention-days: 30

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[HUD] [SGLang benchmarking] Adding bigger models for NVIDIA and implementing AMD Support for SGLang using docker #209

Workflow file

[HUD] [SGLang benchmarking] Adding bigger models for NVIDIA and implementing AMD Support for SGLang using docker #209

Uh oh!

Jobs

Run details

Workflow file for this run