Skip to content

Commit b7e8cca

Browse files
Add Intel Gaudi3 HPU benchmark support with version compatibility
1 parent bd1e80c commit b7e8cca

File tree

1 file changed

+71
-22
lines changed

1 file changed

+71
-22
lines changed

.github/workflows/vllm-benchmark.yml

Lines changed: 71 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -198,34 +198,83 @@ jobs:
198198
199199
if [[ -z "${HEAD_SHA}" ]]; then
200200
pushd vllm
201-
# Looking back the latest 100 commits is enough
202-
for i in {0..99}
203-
do
204-
# Check if the image is there, if it doesn't then check an older one
205-
# because the commit is too recent
206-
HEAD_SHA=$(git rev-parse --verify HEAD~${i})
207-
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
208-
209-
# No Docker image available yet because the commit is too recent
210-
if ! docker manifest inspect "${DOCKER_IMAGE}"; then
211-
continue
201+
202+
# Special handling for HPU: use vllm-gaudi compatible commit
203+
#
204+
# Problem: VLLM_STABLE_COMMIT might change between when CI builds the image
205+
# and when this benchmark runs (every 12 hours), causing image tag mismatches.
206+
#
207+
# Solution: Query git history of VLLM_STABLE_COMMIT file to find the most recent
208+
# compatible vLLM commit that has an actual Docker image built by CI.
209+
if [[ "${DEVICE_NAME}" == "hpu" ]]; then
210+
echo "HPU device detected - finding compatible vLLM commit from vllm-gaudi history"
211+
212+
# Clone only the last-good-commit-for-vllm-gaudi branch (lightweight, single file)
213+
git clone --depth 50 --single-branch --branch vllm/last-good-commit-for-vllm-gaudi \
214+
https://github.com/vllm-project/vllm-gaudi.git /tmp/vllm-gaudi
215+
pushd /tmp/vllm-gaudi
216+
217+
# Get the last 30 commits - each commit represents a VLLM_STABLE_COMMIT update
218+
# This gives us a history of compatible vLLM versions
219+
CANDIDATE_COMMITS=$(git log -30 --pretty=format:"%H")
220+
popd
221+
222+
# Try each candidate commit (newest to oldest) until we find an existing image
223+
FOUND_IMAGE=0
224+
for VLLM_GAUDI_COMMIT in ${CANDIDATE_COMMITS}; do
225+
# Get the vLLM commit from this version of the branch
226+
CANDIDATE_VLLM_COMMIT=$(curl -s "https://raw.githubusercontent.com/vllm-project/vllm-gaudi/${VLLM_GAUDI_COMMIT}/VLLM_STABLE_COMMIT" | tr -d '\n')
227+
228+
if [[ -z "${CANDIDATE_VLLM_COMMIT}" ]]; then
229+
continue
230+
fi
231+
232+
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${CANDIDATE_VLLM_COMMIT}${DOCKER_IMAGE_SUFFIX}"
233+
echo "Checking if image exists: ${DOCKER_IMAGE}"
234+
235+
if docker manifest inspect "${DOCKER_IMAGE}" > /dev/null 2>&1; then
236+
echo "Found existing HPU image for vLLM commit: ${CANDIDATE_VLLM_COMMIT}"
237+
HEAD_SHA="${CANDIDATE_VLLM_COMMIT}"
238+
FOUND_IMAGE=1
239+
break
240+
fi
241+
done
242+
243+
if [[ ${FOUND_IMAGE} == 0 ]]; then
244+
echo "ERROR: No HPU Docker image found in the last 20 versions of VLLM_STABLE_COMMIT"
245+
echo "This likely means ci-infra hasn't successfully built any HPU images yet"
246+
exit 1
212247
fi
213-
214-
NOT_EXIST=0
215-
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
216-
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
217-
218-
if [[ ${NOT_EXIST} == "1" ]]; then
219-
echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
220-
break
221-
fi
222-
done
248+
else
249+
# For non-HPU devices: Looking back the latest 100 commits
250+
for i in {0..99}
251+
do
252+
# Check if the image is there, if it doesn't then check an older one
253+
# because the commit is too recent
254+
HEAD_SHA=$(git rev-parse --verify HEAD~${i})
255+
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
256+
257+
# No Docker image available yet because the commit is too recent
258+
if ! docker manifest inspect "${DOCKER_IMAGE}"; then
259+
continue
260+
fi
261+
262+
NOT_EXIST=0
263+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
264+
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
265+
266+
if [[ ${NOT_EXIST} == "1" ]]; then
267+
echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
268+
break
269+
fi
270+
done
271+
fi
223272
popd
224273
fi
225274
226275
echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
227276
228-
# Print the benchmark commit for rereference
277+
# Print the benchmark commit for reference
229278
echo "### Run benchmark on [${HEAD_SHA}](https://github.com/vllm-project/vllm/commit/${HEAD_SHA})" >> "${GITHUB_STEP_SUMMARY}"
230279
231280
- name: Setup CUDA GPU_FLAG for docker run

0 commit comments

Comments
 (0)