Skip to content

Commit e0579d0

Browse files
committed
Install torchao
1 parent 2bc5a58 commit e0579d0

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

.github/workflows/vllm-benchmark.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,9 @@ jobs:
292292
-w /tmp/workspace \
293293
"${DOCKER_IMAGE}"
294294
)
295+
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
296+
docker exec -t "${container_name}" bash -c "uv pip install --system --pre torchao --index-url https://download.pytorch.org/whl/nightly/cu128"
297+
fi
295298
docker exec -t "${container_name}" bash -c "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"
296299
297300
- name: Authenticate with AWS

vllm-benchmarks/benchmarks/cuda/latency-tests.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@
140140
}
141141
},
142142
{
143-
"test_name": "latency_gemma3_12b_it_fp8",
143+
"test_name": "latency_gemma3_12b_it_fp8_torchao",
144144
"parameters": {
145145
"model": "pytorch/gemma-3-12b-it-FP8",
146146
"load_format": "dummy",
@@ -149,7 +149,7 @@
149149
}
150150
},
151151
{
152-
"test_name": "latency_gemma3_12b_it_int4",
152+
"test_name": "latency_gemma3_12b_it_int4_torchao",
153153
"parameters": {
154154
"model": "pytorch/gemma-3-12b-it-INT4",
155155
"load_format": "dummy",
@@ -158,7 +158,7 @@
158158
}
159159
},
160160
{
161-
"test_name": "latency_gemma3_12b_it_awq_int4",
161+
"test_name": "latency_gemma3_12b_it_awq_int4_torchao",
162162
"parameters": {
163163
"model": "pytorch/gemma-3-12b-it-AWQ-INT4",
164164
"load_format": "dummy",
@@ -167,7 +167,7 @@
167167
}
168168
},
169169
{
170-
"test_name": "latency_gemma3_27b_it_fp8",
170+
"test_name": "latency_gemma3_27b_it_fp8_torchao",
171171
"parameters": {
172172
"model": "pytorch/gemma-3-27b-it-FP8",
173173
"load_format": "dummy",
@@ -176,7 +176,7 @@
176176
}
177177
},
178178
{
179-
"test_name": "latency_gemma3_27b_it_int4",
179+
"test_name": "latency_gemma3_27b_it_int4_torchao",
180180
"parameters": {
181181
"model": "pytorch/gemma-3-27b-it-INT4",
182182
"load_format": "dummy",
@@ -185,7 +185,7 @@
185185
}
186186
},
187187
{
188-
"test_name": "latency_gemma3_27b_it_awq_int4",
188+
"test_name": "latency_gemma3_27b_it_awq_int4_torchao",
189189
"parameters": {
190190
"model": "pytorch/gemma-3-27b-it-AWQ-INT4",
191191
"load_format": "dummy",

0 commit comments

Comments
 (0)