File tree Expand file tree Collapse file tree 6 files changed +44
-18
lines changed Expand file tree Collapse file tree 6 files changed +44
-18
lines changed Original file line number Diff line number Diff line change @@ -280,11 +280,11 @@ def cluster_create(args) -> None:
280
280
xpk_print ('Installation of RayCluster failed.' )
281
281
xpk_exit (return_code )
282
282
283
- if hasattr (args , 'enable_mtc' ) and args .enable_mtc :
284
- return_code = install_mtc_on_cluster (args , system )
285
- if return_code != 0 :
286
- xpk_print ('Installation of MTC failed.' )
287
- xpk_exit (return_code )
283
+ # if hasattr(args, 'enable_mtc') and args.enable_mtc:
284
+ # return_code = install_mtc_on_cluster(args, system)
285
+ # if return_code != 0:
286
+ # xpk_print('Installation of MTC failed.')
287
+ # xpk_exit(return_code)
288
288
289
289
xpk_print ('GKE commands done! Resources are created.' )
290
290
xpk_print (
Original file line number Diff line number Diff line change @@ -617,14 +617,14 @@ def update_cluster_with_workload_identity_if_necessary(args) -> int:
617
617
618
618
if is_workload_identity_enabled_on_cluster (args ):
619
619
return 0
620
- cluster_update_return_code = (
621
- update_gke_cluster_with_workload_identity_enabled (args )
622
- )
623
- if cluster_update_return_code > 0 :
624
- xpk_print (
625
- 'Updating GKE cluster to enable Workload Identity Federation failed!'
626
- )
627
- return cluster_update_return_code
620
+ # cluster_update_return_code = (
621
+ # update_gke_cluster_with_workload_identity_enabled(args)
622
+ # )
623
+ # if cluster_update_return_code > 0:
624
+ # xpk_print(
625
+ # 'Updating GKE cluster to enable Workload Identity Federation failed!'
626
+ # )
627
+ # return cluster_update_return_code
628
628
629
629
return 0
630
630
Original file line number Diff line number Diff line change @@ -34,7 +34,7 @@ def get_main_container_resources(
34
34
Workload resources port as a YAML string
35
35
"""
36
36
# Resources requirements for Pathways workload containers are known.
37
- resources_yaml = """cpu: "24"
37
+ resources_yaml = f """cpu: "24"
38
38
memory: 100G"""
39
39
if args .use_pathways :
40
40
return resources_yaml
Original file line number Diff line number Diff line change @@ -310,18 +310,26 @@ def run_gke_node_pool_create_command(
310
310
create_commands .append (command )
311
311
create_task_names .append (task )
312
312
313
- desired_pw_cpu_node_pools = ['cpu-np' ]
313
+ desired_pw_cpu_node_pools = ['cpu-np' ] #, 'highmem-cpu-np']
314
314
if args .enable_pathways :
315
315
# Pathways needs CPU nodepools in addition to TPU nodepools
316
316
for node_pool_name in desired_pw_cpu_node_pools :
317
317
if node_pool_name in existing_node_pool_names :
318
318
continue
319
+ # if node_pool_name == 'cpu-np':
319
320
command = (
320
321
'gcloud beta container node-pools create'
321
322
f' { node_pool_name } --node-version={ gke_node_pool_version } --cluster={ args .cluster } --project={ args .project } --node-locations={ args .zone } --region={ zone_to_region (args .zone )} --num-nodes=1'
322
323
f' --machine-type={ args .pathways_gce_machine_type } --scopes=storage-full,gke-default,{ CLOUD_PLATFORM_AUTH_SCOPE_URL } --enable-autoscaling'
323
324
' --min-nodes=1 --max-nodes=20'
324
325
)
326
+ # else:
327
+ # command = (
328
+ # 'gcloud beta container node-pools create'
329
+ # f' {node_pool_name} --node-version={gke_node_pool_version} --cluster={args.cluster} --project={args.project} --node-locations={args.zone} --region={zone_to_region(args.zone)} --num-nodes=1'
330
+ # f' --machine-type={args.pathways_highmem_gce_machine_type} --scopes=storage-full,gke-default,{CLOUD_PLATFORM_AUTH_SCOPE_URL} --enable-autoscaling'
331
+ # ' --min-nodes=1 --max-nodes=20'
332
+ # )
325
333
task = f'NodepoolCreate-{ node_pool_name } '
326
334
create_commands .append (command )
327
335
create_task_names .append (task )
Original file line number Diff line number Diff line change @@ -33,6 +33,14 @@ def add_pw_resource_flavors(args):
33
33
nodeLabels:
34
34
cloud.google.com/gke-nodepool: cpu-np
35
35
---"""
36
+ # apiVersion: kueue.x-k8s.io/v1beta1
37
+ # kind: ResourceFlavor
38
+ # metadata:
39
+ # name: highmem-cpu-user
40
+ # spec:
41
+ # nodeLabels:
42
+ # cloud.google.com/gke-nodepool: highmem-cpu-np
43
+ # ---"""
36
44
if args .enable_pathways :
37
45
return resource_flavor_yaml
38
46
return ''
@@ -48,6 +56,12 @@ def add_pw_resources_to_kueue(args):
48
56
nominalQuota: 480
49
57
- name: "memory"
50
58
nominalQuota: 2000G"""
59
+ # - name: highmem-cpu-user
60
+ # resources:
61
+ # - name: "cpu"
62
+ # nominalQuota: 480
63
+ # - name: "memory"
64
+ # nominalQuota: 4000G"""
51
65
if args .enable_pathways :
52
66
return resources_yaml
53
67
return ''
@@ -79,7 +93,7 @@ def ensure_pathways_workload_prerequisites(args, system) -> bool:
79
93
80
94
# Ensure the cluster and CPU nodepools were created with create-pathways
81
95
all_node_pools = get_all_nodepools_programmatic (args )
82
- desired_pw_cpu_node_pools = {'cpu-np' }
96
+ desired_pw_cpu_node_pools = {'cpu-np' } #, 'highmem-cpu-np'}
83
97
if not desired_pw_cpu_node_pools .issubset (set (all_node_pools [0 ])):
84
98
xpk_print (
85
99
'Cluster needs to be created with `xpk create-pathways` to run'
@@ -263,8 +277,6 @@ def get_user_workload_for_pathways(
263
277
spec:
264
278
containers:
265
279
{container}
266
- nodeSelector:
267
- cloud.google.com/gke-nodepool: cpu-np
268
280
hostNetwork: true
269
281
dnsPolicy: ClusterFirstWithHostNet
270
282
restartPolicy: Never
Original file line number Diff line number Diff line change @@ -528,6 +528,12 @@ def add_shared_cluster_create_optional_arguments(parser: ArgumentParser):
528
528
default = 'n2-standard-64' ,
529
529
help = 'The CPU type for Pathways CPU nodepools' ,
530
530
)
531
+ # parser.add_argument(
532
+ # '--pathways-highmem-gce-machine-type',
533
+ # type=str,
534
+ # default='c4-highmem-192',
535
+ # help='The highmem CPU type for Pathways CPU nodepools',
536
+ # )
531
537
parser .add_argument (
532
538
'--default-pool-cpu-machine-type' ,
533
539
type = str ,
You can’t perform that action at this time.
0 commit comments