Run an one-off benchmark for gpt-oss #28
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | name: gpt-oss benchmark | |
| on: | |
| pull_request: | |
| paths: | |
| - .github/workflows/gpt-oss-benchmark.yml | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
| cancel-in-progress: true | |
| jobs: | |
| benchmarks: | |
| name: Run gpt-oss benchmarks | |
| strategy: | |
| matrix: | |
| include: | |
| # gpt-oss-120b | |
| - runner: linux.aws.h100.8 | |
| model: openai/gpt-oss-120b | |
| docker-image: 'public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:6d8d0a24c02bfd84d46b3016b865a44f048ae84b' | |
| - runner: linux.dgx.b200.8 | |
| model: openai/gpt-oss-120b | |
| docker-image: 'public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:6d8d0a24c02bfd84d46b3016b865a44f048ae84b' | |
| - runner: linux.rocm.gpu.gfx942.8 | |
| model: openai/gpt-oss-120b | |
| docker-image: rocm/vllm-dev:open-mi300-08052025 | |
| # gpt-oss-20b | |
| - runner: linux.aws.h100.4 | |
| model: openai/gpt-oss-20b | |
| docker-image: 'public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:6d8d0a24c02bfd84d46b3016b865a44f048ae84b' | |
| - runner: linux.dgx.b200.8 | |
| model: openai/gpt-oss-20b | |
| docker-image: 'public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:6d8d0a24c02bfd84d46b3016b865a44f048ae84b' | |
| - runner: linux.rocm.gpu.gfx942.8 | |
| model: openai/gpt-oss-20b | |
| docker-image: rocm/vllm-dev:open-mi300-08052025 | |
| fail-fast: false | |
| runs-on: ${{ matrix.runner }} | |
| environment: pytorch-x-vllm | |
| permissions: | |
| id-token: write | |
| contents: read | |
| timeout-minutes: 720 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Checkout vLLM repository | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: vllm-project/vllm | |
| path: vllm-benchmarks/vllm | |
| - name: Checkout gpt-oss repository | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: openai/gpt-oss | |
| path: vllm-benchmarks/gpt-oss | |
| - uses: actions/setup-python@v5 | |
| # Amazon Linux fails on this step | |
| continue-on-error: true | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' | |
| - name: Check if the device is supported | |
| shell: bash | |
| run: | | |
| set -eux | |
| if command -v nvidia-smi; then | |
| DEVICE_NAME=cuda | |
| nvidia-smi | |
| elif command -v rocm-smi; then | |
| DEVICE_NAME=rocm | |
| rocm-smi | |
| else | |
| DEVICE_NAME=cpu | |
| lscpu | |
| fi | |
| echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV | |
| - name: Set GPU name and type | |
| working-directory: vllm-benchmarks | |
| shell: bash | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_NAME}" == "cuda" ]]; then | |
| DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') | |
| elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs) | |
| elif [[ "${DEVICE_NAME}" == "cpu" ]]; then | |
| DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ") | |
| fi | |
| echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV | |
| - name: Install dependencies | |
| shell: bash | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| pip install -r .github/scripts/requirements.txt \ | |
| --extra-index-url https://download.pytorch.org/whl/rocm6.3 | |
| else | |
| pip install -r .github/scripts/requirements.txt \ | |
| --extra-index-url https://download.pytorch.org/whl/cu128 | |
| fi | |
| - name: Setup CUDA GPU_FLAG for docker run | |
| if: env.DEVICE_NAME == 'cuda' | |
| run: | | |
| echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | |
| - name: Setup ROCm | |
| if: env.DEVICE_NAME == 'rocm' | |
| uses: pytorch/pytorch/./.github/actions/setup-rocm@main | |
| - name: Setup benchmark tests | |
| env: | |
| MODEL: ${{ matrix.model }} | |
| run: | | |
| set -eux | |
| pushd vllm-benchmarks/vllm | |
| rm .buildkite/nightly-benchmarks/tests/*.json | |
| popd | |
| # Set the list of benchmarks we want to cover in this runner | |
| python3 .github/scripts/setup_vllm_benchmark.py \ | |
| --from-benchmark-configs-dir vllm-benchmarks/benchmarks \ | |
| --to-benchmark-configs-dir vllm-benchmarks/vllm/.buildkite/nightly-benchmarks/tests \ | |
| --models "${MODEL}" \ | |
| --device "${DEVICE_NAME}" | |
| pushd vllm-benchmarks/vllm | |
| ls -lah .buildkite/nightly-benchmarks/tests | |
| find .buildkite/nightly-benchmarks/tests -type f -exec cat {} \; | |
| - name: Run vLLM gpt-oss benchmark | |
| env: | |
| # To login to public.ecr.aws | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| DOCKER_IMAGE: ${{ matrix.docker-image }} | |
| # vLLM-related environment variables | |
| ENGINE_VERSION: v1 | |
| SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1 | |
| MODEL: ${{ matrix.model }} | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_TYPE}" == *B200* ]]; then | |
| # Just to unblock this change on B200 | |
| aws configure set aws_access_key_id "${AWS_ACCESS_KEY_ID}" | |
| aws configure set aws_secret_access_key "${AWS_SECRET_ACCESS_KEY}" | |
| aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws | |
| fi | |
| # Leaving 1GB for the runner and other things | |
| TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo) | |
| # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap | |
| # comes from https://github.com/pytorch/test-infra/pull/6058 | |
| TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3)) | |
| container_name=$(docker run \ | |
| ${GPU_FLAG:-} \ | |
| -e MODEL \ | |
| -e DEVICE_NAME \ | |
| -e DEVICE_TYPE \ | |
| -e HF_TOKEN \ | |
| -e ENGINE_VERSION \ | |
| -e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \ | |
| --memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \ | |
| --memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \ | |
| --ipc=host \ | |
| --tty \ | |
| --detach \ | |
| --security-opt seccomp=unconfined \ | |
| --shm-size=4g \ | |
| -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ | |
| -w /tmp/workspace \ | |
| "${DOCKER_IMAGE}" | |
| ) | |
| # Run perf tests | |
| docker exec -t "${container_name}" bash .github/scripts/gpt-oss/run_benchmarks.sh | |
| # Run accuracy check (turning this on later if needed) | |
| # docker exec -t "${container_name}" bash .github/scripts/gpt-oss/run_accuracy_checks.sh | |
| - name: Authenticate with AWS | |
| # AWS CUDA runners already have access to the bucket via its runner IAM role | |
| if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200') | |
| uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | |
| with: | |
| role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results | |
| # The max duration enforced by the server side | |
| role-duration-seconds: 18000 | |
| aws-region: us-east-1 | |
| - name: Upload the benchmark results | |
| env: | |
| BENCHMARK_RESULTS: vllm-benchmarks/vllm/benchmarks/results | |
| MODEL: ${{ matrix.model }} | |
| run: | | |
| set -eux | |
| sudo chown -R ${UID} "${BENCHMARK_RESULTS}" | |
| ls -lah "${BENCHMARK_RESULTS}" | |
| SANITIZED_DEVICE_TYPE=$(echo "${DEVICE_TYPE// /_}" | sed "s/[^[:alnum:].-]/_/g") | |
| SANITIZED_MODEL="${MODEL//\//_}" | |
| python3 .github/scripts/upload_benchmark_results.py \ | |
| --repo vllm-benchmarks/vllm \ | |
| --benchmark-name "vLLM benchmark" \ | |
| --benchmark-results "${BENCHMARK_RESULTS}" \ | |
| --device-name "${DEVICE_NAME}" \ | |
| --device-type "${SANITIZED_DEVICE_TYPE}" \ | |
| --model "${MODEL//\//_}" | |
| echo "SANITIZED_DEVICE_TYPE=$SANITIZED_DEVICE_TYPE" >> $GITHUB_ENV | |
| echo "SANITIZED_MODEL=$SANITIZED_MODEL" >> $GITHUB_ENV | |
| # Keep a copy of the benchmark results on GitHub for reference | |
| - uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results--${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODEL }} | |
| path: vllm-benchmarks/vllm/benchmarks/results | |
| # Keep a copy of the accuracy results on GitHub for reference | |
| - uses: actions/upload-artifact@v4 | |
| with: | |
| name: accuracy-results--${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODEL }} | |
| path: | | |
| vllm-benchmarks/gpt-oss/gpqa_openai |