Skip to content

Commit 70ca29e

Browse files
authored
Distribute jobs to multiple private device pools (#12832)
### Summary We want to distribute jobs to multiple private device pools now that we have have more private devices on AWS mainly: * 5 Samsung Galaxy S22 5G * 4 Apple iPhone 15 Pro Max A simple round-robin distribution algorithm will allocate different benchmark configs to devices of the same type from different pools. To achieve this, I refactor `.ci/scripts/gather_benchmark_configs.py` to introduce the concept of device variant in which the device name will be in the format `DEVICE_NAME+VARIANT`, for example `samsung_galaxy_s22+private` or `apple_iphone_15+ios_18_public`. Each can have more than one device pools. I also re-enable the benchmark jobs on private iOS devices now that we have more of them to use. ### Test plan * Models are distributed round robin to different device pools: * `samsung_galaxy_s22+private`: https://github.com/pytorch/executorch/actions/runs/16509295068 * https://github.com/pytorch/executorch/actions/runs/16509295068/job/46688761353#step:1:25 * https://github.com/pytorch/executorch/actions/runs/16509295068/job/46688761345#step:1:25 * `apple_iphone_15+pro_private`: https://github.com/pytorch/executorch/actions/runs/16513651804 * `pytest .ci/script/test_generate_vllm_benchmark_matrix.py` --------- Signed-off-by: Huy Do <huydhn@gmail.com>
1 parent 8b2ddb2 commit 70ca29e

File tree

6 files changed

+145
-39
lines changed

6 files changed

+145
-39
lines changed

.ci/scripts/gather_benchmark_configs.py

Lines changed: 98 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,65 @@
1616
from examples.models import MODEL_NAME_TO_MODEL
1717

1818

19-
# Device pools for AWS Device Farm
19+
DEVICE_POOLS_REGEX = re.compile(r"(?P<device_name>[^\+]+)\+(?P<variant>[^\+]+)")
20+
# Device pools for AWS Device Farm. Initially, I choose to distribute models to these pool
21+
# round-robin for simplicity. For public pool, only one per device type is needed because
22+
# AWS will scale the number of devices there for us. However, for private pool, we need to
23+
# manually maintain multiple pools of the same device to evenly distribute models there.
24+
# The pool ARNs are extracted from the output of the following command:
25+
# aws devicefarm list-device-pools \
26+
# --arn arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 \
27+
# --region us-west-2
2028
DEVICE_POOLS = {
21-
"apple_iphone_15": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d",
22-
"apple_iphone_15+ios_18": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/12c8b15c-8d03-4e07-950d-0a627e7595b4",
23-
"samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",
24-
"samsung_galaxy_s22_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/ea6b049d-1508-4233-9a56-5d9eacbe1078",
25-
"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
26-
"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
27-
"google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",
28-
"apple_iphone_15_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/55929353-2f28-4ee5-bdff-d1a95f58cb28",
29+
"apple_iphone_15": {
30+
"public": [
31+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d",
32+
],
33+
"ios_18_public": [
34+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/12c8b15c-8d03-4e07-950d-0a627e7595b4",
35+
],
36+
"private": [
37+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/55929353-2f28-4ee5-bdff-d1a95f58cb28",
38+
],
39+
"plus_private": [
40+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/767bfb3e-a00e-4d92-998b-4eafdcf7213b",
41+
],
42+
"pro_private": [
43+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/1394f34c-2981-4c55-aaa2-246871ac713b",
44+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/099e8def-4609-4383-8787-76b88e500c1d",
45+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d6707270-b009-479e-a83a-7bdb255f9de5",
46+
],
47+
},
48+
"samsung_galaxy_s22": {
49+
"public": [
50+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",
51+
],
52+
"private": [
53+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/ea6b049d-1508-4233-9a56-5d9eacbe1078",
54+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/1fa924a1-5aff-475b-8f4d-f7c6d8de4fe9",
55+
],
56+
"ultra_private": [
57+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/5f79d72e-e229-4f9c-962f-5d37196fcfe7",
58+
],
59+
},
60+
"samsung_galaxy_s24": {
61+
"public": [
62+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
63+
],
64+
"ultra_private": [
65+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/5f79d72e-e229-4f9c-962f-5d37196fcfe7",
66+
],
67+
},
68+
"google_pixel_8": {
69+
"pro_public": [
70+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
71+
],
72+
},
73+
"google_pixel_3": {
74+
"rooted_private": [
75+
"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",
76+
],
77+
},
2978
}
3079

3180
# Predefined benchmark configurations
@@ -318,25 +367,56 @@ def get_benchmark_configs() -> Dict[str, Dict]: # noqa: C901
318367

319368
# Add configurations for each valid device
320369
for device in devices:
370+
# Parse the device name
371+
m = re.match(DEVICE_POOLS_REGEX, device)
372+
if not m:
373+
logging.warning(
374+
f"Invalid device name: {device} is not in DEVICE_NAME+VARIANT format. Skipping."
375+
)
376+
continue
377+
378+
device_name = m.group("device_name")
379+
variant = m.group("variant")
380+
381+
if device_name not in DEVICE_POOLS:
382+
logging.warning(f"Unsupported device '{device}'. Skipping.")
383+
continue
384+
385+
if variant not in DEVICE_POOLS[device_name]:
386+
logging.warning(
387+
f"Unsupported {device}'s variant '{variant}'. Skipping."
388+
)
389+
continue
390+
391+
device_pool_count = len(DEVICE_POOLS[device_name][variant])
392+
if not device_pool_count:
393+
logging.warning(
394+
f"No device pool defined for {device}'s variant '{variant}'. Skipping."
395+
)
396+
continue
397+
398+
device_pool_index = 0
321399
for config in configs:
322-
if config == "llama3_coreml_ane" and not device.endswith("+ios_18"):
323-
device = f"{device}+ios_18"
400+
if config == "llama3_coreml_ane" and "ios_18" not in variant:
401+
variant = "ios_18_public"
324402
logging.info(
325-
f"Benchmark config '{config}' only works on iOS 18+, auto-upgraded device pool to '{device}'"
403+
f"Benchmark config '{config}' only works on iOS 18+, auto-upgraded device variant to '{variant}'"
326404
)
327405

328-
if device not in DEVICE_POOLS:
329-
logging.warning(f"Unsupported device '{device}'. Skipping.")
330-
continue
331-
332406
record = {
333407
"model": model_name,
334408
"config": config,
335-
"device_name": device,
336-
"device_arn": DEVICE_POOLS[device],
409+
"device_name": device_name,
410+
"variant": variant,
411+
"device_arn": DEVICE_POOLS[device_name][variant][
412+
device_pool_index % device_pool_count
413+
],
337414
}
338415
benchmark_configs["include"].append(record)
339416

417+
# Distribute configs to pools of the same device round-robin
418+
device_pool_index += 1
419+
340420
set_output("benchmark_configs", json.dumps(benchmark_configs))
341421

342422

.ci/scripts/tests/test_gather_benchmark_configs.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -192,20 +192,28 @@ def test_set_output_no_github_env(self, mock_getenv, mock_file):
192192

193193
def test_device_pools_contains_all_devices(self):
194194
expected_devices = [
195-
"apple_iphone_15",
196-
"apple_iphone_15+ios_18",
197-
"samsung_galaxy_s22",
198-
"samsung_galaxy_s24",
199-
"google_pixel_8_pro",
195+
"apple_iphone_15+public",
196+
"apple_iphone_15+ios_18_public",
197+
"samsung_galaxy_s22+public",
198+
"samsung_galaxy_s24+ultra_private",
199+
"google_pixel_8+pro_public",
200200
]
201201
for device in expected_devices:
202-
self.assertIn(device, self.gather_benchmark_configs.DEVICE_POOLS)
202+
m = re.match(self.gather_benchmark_configs.DEVICE_POOLS_REGEX, device)
203+
204+
device_name = m.group("device_name")
205+
variant = m.group("variant")
206+
207+
self.assertIn(device_name, self.gather_benchmark_configs.DEVICE_POOLS)
208+
self.assertIn(
209+
variant, self.gather_benchmark_configs.DEVICE_POOLS[device_name]
210+
)
203211

204212
def test_gather_benchmark_configs_cli(self):
205213
args = {
206214
"models": "mv2,dl3",
207215
"os": "ios",
208-
"devices": "apple_iphone_15",
216+
"devices": "apple_iphone_15+pro_private",
209217
"configs": None,
210218
}
211219

@@ -223,11 +231,29 @@ def test_gather_benchmark_configs_cli(self):
223231
self.assertIn('"config": "xnnpack_q8"', result.stdout)
224232
self.assertIn('"config": "mps"', result.stdout)
225233

226-
def test_gather_benchmark_configs_cli_specified_configs(self):
234+
def test_gather_benchmark_configs_cli_invalid_device(self):
227235
args = {
228236
"models": "mv2,dl3",
229237
"os": "ios",
230238
"devices": "apple_iphone_15",
239+
"configs": None,
240+
}
241+
242+
cmd = ["python", ".ci/scripts/gather_benchmark_configs.py"]
243+
for key, value in args.items():
244+
if value is not None:
245+
cmd.append(f"--{key}")
246+
cmd.append(value)
247+
248+
result = subprocess.run(cmd, capture_output=True, text=True)
249+
self.assertEqual(result.returncode, 0, f"Error: {result.stderr}")
250+
self.assertIn('{"include": []}', result.stdout)
251+
252+
def test_gather_benchmark_configs_cli_specified_configs(self):
253+
args = {
254+
"models": "mv2,dl3",
255+
"os": "ios",
256+
"devices": "apple_iphone_15+private",
231257
"configs": "coreml_fp16,xnnpack_q8",
232258
}
233259

@@ -249,7 +275,7 @@ def test_gather_benchmark_configs_cli_specified_configs_raise(self):
249275
args = {
250276
"models": "mv2,dl3",
251277
"os": "ios",
252-
"devices": "apple_iphone_15",
278+
"devices": "apple_iphone_15+public",
253279
"configs": "qnn_q8",
254280
}
255281

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ on:
2323
description: Target devices to run benchmark
2424
required: false
2525
type: string
26-
default: samsung_galaxy_s22_private
26+
default: samsung_galaxy_s22+private
2727
benchmark_configs:
2828
description: The list of configs used the benchmark
2929
required: false
@@ -39,7 +39,7 @@ on:
3939
description: Target devices to run benchmark
4040
required: false
4141
type: string
42-
default: samsung_galaxy_s22_private
42+
default: samsung_galaxy_s22+private
4343
benchmark_configs:
4444
description: The list of configs used the benchmark
4545
required: false
@@ -58,5 +58,5 @@ jobs:
5858
contents: read
5959
with:
6060
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
61-
devices: samsung_galaxy_s22_private
61+
devices: samsung_galaxy_s22+private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/android-perf.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ on:
2727
description: Target devices to run benchmark
2828
required: false
2929
type: string
30-
default: samsung_galaxy_s22
30+
default: samsung_galaxy_s22+public
3131
benchmark_configs:
3232
description: The list of configs used the benchmark
3333
required: false
@@ -43,7 +43,7 @@ on:
4343
description: Target devices to run benchmark
4444
required: false
4545
type: string
46-
default: samsung_galaxy_s22
46+
default: samsung_galaxy_s22+public
4747
benchmark_configs:
4848
description: The list of configs used the benchmark
4949
required: false
@@ -73,7 +73,7 @@ jobs:
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
7575
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
76-
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
76+
CRON_DEFAULT_DEVICES: samsung_galaxy_s22+public
7777
run: |
7878
set -eux
7979

.github/workflows/apple-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ on:
2323
description: Target devices to run benchmark
2424
required: false
2525
type: string
26-
default: apple_iphone_15_private
26+
default: apple_iphone_15+pro_private
2727
benchmark_configs:
2828
description: The list of configs used the benchmark
2929
required: false
@@ -39,7 +39,7 @@ on:
3939
description: Target devices to run benchmark
4040
required: false
4141
type: string
42-
default: apple_iphone_15_private
42+
default: apple_iphone_15+pro_private
4343
benchmark_configs:
4444
description: The list of configs used the benchmark
4545
required: false
@@ -58,5 +58,5 @@ jobs:
5858
contents: read
5959
with:
6060
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
61-
devices: apple_iphone_15_private
61+
devices: apple_iphone_15+pro_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/apple-perf.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ on:
2727
description: Target devices to run benchmark
2828
required: false
2929
type: string
30-
default: apple_iphone_15
30+
default: apple_iphone_15+public
3131
benchmark_configs:
3232
description: The list of configs used the benchmark
3333
required: false
@@ -43,7 +43,7 @@ on:
4343
description: Target devices to run benchmark
4444
required: false
4545
type: string
46-
default: apple_iphone_15
46+
default: apple_iphone_15+public
4747
benchmark_configs:
4848
description: The list of configs used the benchmark
4949
required: false
@@ -73,7 +73,7 @@ jobs:
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
7575
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
76-
CRON_DEFAULT_DEVICES: apple_iphone_15
76+
CRON_DEFAULT_DEVICES: apple_iphone_15+public
7777
run: |
7878
set -eux
7979

0 commit comments

Comments
 (0)