vLLM Benchmark #98
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: vLLM Benchmark | |
| on: | |
| schedule: | |
| # Run every 2 hours | |
| - cron: '0 */2 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| vllm_branch: | |
| description: vLLM branch | |
| required: true | |
| type: string | |
| default: main | |
| vllm_commit: | |
| description: vLLM commit | |
| required: false | |
| type: string | |
| models: | |
| description: | | |
| A comma-separated list of models to benchmark, leave empty to run everything | |
| required: false | |
| type: string | |
| pull_request: | |
| paths: | |
| - .github/workflows/vllm-benchmark.yml | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
| cancel-in-progress: true | |
| jobs: | |
| set-parameters: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| - name: Set parameters | |
| id: set-parameters | |
| shell: bash | |
| env: | |
| MODELS: ${{ inputs.models || '' }} | |
| run: | | |
| set -eux | |
| # The generated matrix is grouped by model and runner | |
| python .github/scripts/generate_vllm_benchmark_matrix.py \ | |
| --benchmark-configs-dir vllm-benchmarks/benchmarks \ | |
| --models "${MODELS}" | |
| benchmarks: | |
| name: Run vLLM benchmarks | |
| needs: set-parameters | |
| strategy: | |
| matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }} | |
| fail-fast: false | |
| runs-on: ${{ matrix.runner }} | |
| environment: pytorch-x-vllm | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Checkout vLLM repository | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: vllm-project/vllm | |
| path: vllm-benchmarks/vllm | |
| ref: ${{ inputs.vllm_branch || 'main' }} | |
| fetch-depth: 0 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' | |
| - name: Set GPU device name | |
| working-directory: vllm-benchmarks | |
| run: | | |
| export GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') | |
| echo "GPU_DEVICE=$GPU_DEVICE" >> $GITHUB_ENV | |
| - name: Install dependencies | |
| working-directory: vllm-benchmarks | |
| run: | | |
| set -eux | |
| pip install -r requirements.txt | |
| - name: Check for last benchmark commit | |
| working-directory: vllm-benchmarks | |
| env: | |
| HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} | |
| HEAD_SHA: ${{ inputs.vllm_commit || '' }} | |
| DOCKER_IMAGE_PREFIX: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo | |
| MODELS: ${{ matrix.models }} | |
| run: | | |
| set -eux | |
| if [[ -z "${HEAD_SHA}" ]]; then | |
| pushd vllm | |
| # Looking back the latest 100 commits is enough | |
| for i in {0..99} | |
| do | |
| # Check if the image is there, if it doesn't then check an older one | |
| # because the commit is too recent | |
| HEAD_SHA=$(git rev-parse --verify HEAD~${i}) | |
| DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}" | |
| # No Docker image available yet because the commit is too recent | |
| if ! docker manifest inspect "${DOCKER_IMAGE}"; then | |
| continue | |
| fi | |
| NOT_EXIST=0 | |
| S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results_${MODELS//\//_}.json" | |
| aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 | |
| if [[ ${NOT_EXIST} == "1" ]]; then | |
| echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet" | |
| break | |
| fi | |
| done | |
| popd | |
| fi | |
| echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV | |
| - name: Setup GPU_FLAG for docker run | |
| run: | | |
| echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | |
| - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container | |
| run: | | |
| echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}" | |
| - name: Setup benchmark tests | |
| env: | |
| MODELS: ${{ matrix.models }} | |
| run: | | |
| pushd vllm-benchmarks/vllm | |
| git checkout "${HEAD_SHA}" | |
| rm .buildkite/nightly-benchmarks/tests/*.json | |
| popd | |
| # Set the list of benchmarks we want to cover in this runner | |
| python .github/scripts/setup_vllm_benchmark.py \ | |
| --from-benchmark-configs-dir vllm-benchmarks/benchmarks \ | |
| --to-benchmark-configs-dir vllm-benchmarks/vllm/.buildkite/nightly-benchmarks/tests \ | |
| --models "${MODELS}" | |
| pushd vllm-benchmarks/vllm | |
| ls -lah .buildkite/nightly-benchmarks/tests | |
| find .buildkite/nightly-benchmarks/tests -type f -exec cat {} \; | |
| popd | |
| - name: Run vLLM benchmark | |
| env: | |
| SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 | |
| SCCACHE_REGION: us-east-1 | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| DOCKER_IMAGE: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:${{ env.HEAD_SHA }} | |
| # vLLM-related environment variables | |
| ENGINE_VERSION: v1 | |
| SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1 | |
| run: | | |
| set -x | |
| docker run \ | |
| ${GPU_FLAG:-} \ | |
| ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \ | |
| -e SCCACHE_BUCKET \ | |
| -e SCCACHE_REGION \ | |
| -e GPU_DEVICE \ | |
| -e HF_TOKEN \ | |
| -e ENGINE_VERSION \ | |
| -e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \ | |
| --ipc=host \ | |
| --tty \ | |
| --security-opt seccomp=unconfined \ | |
| -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ | |
| -w /tmp/workspace \ | |
| "${DOCKER_IMAGE}" \ | |
| bash -xc "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh" | |
| - name: Upload the benchmark results | |
| working-directory: vllm-benchmarks | |
| env: | |
| BENCHMARK_RESULTS: vllm/benchmarks/results | |
| MODELS: ${{ matrix.models }} | |
| run: | | |
| set -eux | |
| sudo chown -R ${UID} "${BENCHMARK_RESULTS}" | |
| ls -lah "${BENCHMARK_RESULTS}" | |
| python upload_benchmark_results.py \ | |
| --vllm vllm \ | |
| --benchmark-results "${BENCHMARK_RESULTS}" \ | |
| --device "${GPU_DEVICE}" \ | |
| --model "${MODELS//\//_}" |