@@ -19,9 +19,9 @@ Role Variables
1919
2020` openhpc_packages ` : additional OpenHPC packages to install
2121
22- ` openhpc_enable ` :
22+ ` openhpc_enable ` :
2323* ` control ` : whether to enable control host
24- * ` batch ` : whether to enable compute nodes
24+ * ` batch ` : whether to enable compute nodes
2525* ` runtime ` : whether to enable OpenHPC runtime
2626* ` drain ` : whether to drain a compute nodes
2727* ` resume ` : whether to resume a compute nodes
@@ -38,6 +38,9 @@ And an Ansible inventory as this:
3838 openhpc-compute-0 ansible_host=10.60.253.31 ansible_user=centos
3939 openhpc-compute-1 ansible_host=10.60.253.32 ansible_user=centos
4040
41+ [cluster_login:children]
42+ openhpc_login
43+
4144 [cluster_control:children]
4245 openhpc_login
4346
@@ -46,7 +49,7 @@ And an Ansible inventory as this:
4649
4750Example Playbooks
4851----------------
49-
52+
5053To deploy, create a playbook which looks like this:
5154
5255 ---
@@ -80,24 +83,28 @@ To drain nodes, for example, before scaling down the cluster to 6 nodes:
8083 ---
8184 - hosts: openstack
8285 gather_facts: false
83-
86+ vars:
87+ partition: "{{ cluster_group.output_value | selectattr('group', 'equalto', item.name) | list }}"
88+ openhpc_slurm_partitions:
89+ - name: "compute"
90+ flavor: "compute-A"
91+ image: "CentOS7.5-OpenHPC"
92+ num_nodes: 6
93+ user: "centos"
94+ openhpc_cluster_name: openhpc
8495 roles:
96+ # Our stackhpc.cluster-infra role can be invoked in `query` mode which
97+ # looks up the state of the cluster by querying the Heat API.
8598 - role: stackhpc.cluster-infra
8699 cluster_name: "{{ cluster_name }}"
87100 cluster_state: query
88101 cluster_params:
89102 cluster_groups: "{{ cluster_groups }}"
90103 tasks:
104+ # Given that the original cluster that was created had 8 nodes and the
105+ # cluster we want to create has 6 nodes, the computed desired_state
106+ # variable stores the list of instances to leave untouched.
91107 - name: Count the number of compute nodes per slurm partition
92- vars:
93- partition: "{{ cluster_group.output_value | selectattr('group', 'equalto', item.name) | list }}"
94- openhpc_slurm_partitions:
95- - name: "compute"
96- flavor: "compute-A"
97- image: "CentOS7.5-OpenHPC"
98- num_nodes: 6
99- user: "centos"
100- openhpc_cluster_name: openhpc
101108 set_fact:
102109 desired_state: "{{ (( partition | first).nodes | map(attribute='name') | list )[:item.num_nodes] + desired_state | default([]) }}"
103110 when: partition | length > 0
@@ -106,9 +113,13 @@ To drain nodes, for example, before scaling down the cluster to 6 nodes:
106113
107114 - hosts: cluster_batch
108115 become: yes
116+ vars:
117+ desired_state: "{{ hostvars['localhost']['desired_state'] | default([]) }}"
109118 roles:
119+ # Now, the stackhpc.openhpc role is invoked in drain/resume modes where
120+ # the instances in desired_state are resumed if in a drained state and
121+ # drained if in a resumed state.
110122 - role: stackhpc.openhpc
111- desired_state: "{{ hostvars['localhost']['desired_state'] | default([]) }}"
112123 openhpc_slurm_control_host: "{{ groups['cluster_control'] | first }}"
113124 openhpc_enable:
114125 drain: "{{ inventory_hostname not in desired_state }}"
0 commit comments