vLLM Benchmark #98

Workflow file for this run

.github/workflows/vllm-benchmark.yml at ab6a9b7

	name: vLLM Benchmark

	on:
	schedule:
	# Run every 2 hours
	- cron: '0 /2 * *'
	workflow_dispatch:
	inputs:
	vllm_branch:
	description: vLLM branch
	required: true
	type: string
	default: main
	vllm_commit:
	description: vLLM commit
	required: false
	type: string
	models:
	description: \|
	A comma-separated list of models to benchmark, leave empty to run everything
	required: false
	type: string
	pull_request:
	paths:
	- .github/workflows/vllm-benchmark.yml

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	set-parameters:
	runs-on: ubuntu-latest
	outputs:
	benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'

	- name: Set parameters
	id: set-parameters
	shell: bash
	env:
	MODELS: ${{ inputs.models \|\| '' }}
	run: \|
	set -eux

	# The generated matrix is grouped by model and runner
	python .github/scripts/generate_vllm_benchmark_matrix.py \
	--benchmark-configs-dir vllm-benchmarks/benchmarks \
	--models "${MODELS}"

	benchmarks:
	name: Run vLLM benchmarks
	needs: set-parameters
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	environment: pytorch-x-vllm
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Checkout vLLM repository
	uses: actions/checkout@v4
	with:
	repository: vllm-project/vllm
	path: vllm-benchmarks/vllm
	ref: ${{ inputs.vllm_branch \|\| 'main' }}
	fetch-depth: 0

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'
	cache: 'pip'

	- name: Set GPU device name
	working-directory: vllm-benchmarks
	run: \|
	export GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader \| awk '{print $2}')
	echo "GPU_DEVICE=$GPU_DEVICE" >> $GITHUB_ENV

	- name: Install dependencies
	working-directory: vllm-benchmarks
	run: \|
	set -eux
	pip install -r requirements.txt

	- name: Check for last benchmark commit
	working-directory: vllm-benchmarks
	env:
	HEAD_BRANCH: ${{ inputs.vllm_branch \|\| 'main' }}
	HEAD_SHA: ${{ inputs.vllm_commit \|\| '' }}
	DOCKER_IMAGE_PREFIX: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
	MODELS: ${{ matrix.models }}
	run: \|
	set -eux

	if [[ -z "${HEAD_SHA}" ]]; then
	pushd vllm
	# Looking back the latest 100 commits is enough
	for i in {0..99}
	do
	# Check if the image is there, if it doesn't then check an older one
	# because the commit is too recent
	HEAD_SHA=$(git rev-parse --verify HEAD~${i})
	DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}"

	# No Docker image available yet because the commit is too recent
	if ! docker manifest inspect "${DOCKER_IMAGE}"; then
	continue
	fi

	NOT_EXIST=0
	S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results_${MODELS//\//_}.json"
	aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} \|\| NOT_EXIST=1

	if [[ ${NOT_EXIST} == "1" ]]; then
	echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
	break
	fi
	done
	popd
	fi

	echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV

	- name: Setup GPU_FLAG for docker run
	run: \|
	echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

	- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
	run: \|
	echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"

	- name: Setup benchmark tests
	env:
	MODELS: ${{ matrix.models }}
	run: \|
	pushd vllm-benchmarks/vllm
	git checkout "${HEAD_SHA}"
	rm .buildkite/nightly-benchmarks/tests/*.json
	popd

	# Set the list of benchmarks we want to cover in this runner
	python .github/scripts/setup_vllm_benchmark.py \
	--from-benchmark-configs-dir vllm-benchmarks/benchmarks \
	--to-benchmark-configs-dir vllm-benchmarks/vllm/.buildkite/nightly-benchmarks/tests \
	--models "${MODELS}"

	pushd vllm-benchmarks/vllm
	ls -lah .buildkite/nightly-benchmarks/tests
	find .buildkite/nightly-benchmarks/tests -type f -exec cat {} \;
	popd

	- name: Run vLLM benchmark
	env:
	SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
	SCCACHE_REGION: us-east-1
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	DOCKER_IMAGE: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:${{ env.HEAD_SHA }}
	# vLLM-related environment variables
	ENGINE_VERSION: v1
	SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1
	run: \|
	set -x

	docker run \
	${GPU_FLAG:-} \
	${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
	-e SCCACHE_BUCKET \
	-e SCCACHE_REGION \
	-e GPU_DEVICE \
	-e HF_TOKEN \
	-e ENGINE_VERSION \
	-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
	--ipc=host \
	--tty \
	--security-opt seccomp=unconfined \
	-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
	-w /tmp/workspace \
	"${DOCKER_IMAGE}" \
	bash -xc "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"

	- name: Upload the benchmark results
	working-directory: vllm-benchmarks
	env:
	BENCHMARK_RESULTS: vllm/benchmarks/results
	MODELS: ${{ matrix.models }}
	run: \|
	set -eux

	sudo chown -R ${UID} "${BENCHMARK_RESULTS}"
	ls -lah "${BENCHMARK_RESULTS}"

	python upload_benchmark_results.py \
	--vllm vllm \
	--benchmark-results "${BENCHMARK_RESULTS}" \
	--device "${GPU_DEVICE}" \
	--model "${MODELS//\//_}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

vLLM Benchmark #98

Workflow file

vLLM Benchmark #98

Uh oh!

Jobs

Run details

Workflow file for this run