Skip to content

Commit 664e08e

Browse files
authored
Image update - OpenHPC v3.1 for RL9 (#394)
* bump Packer source image to RL9.4 * downgrade OFED to LTS to get stable download url * bump OOD role, now ondemand dnf package installed will be latest * Revert Packer source image to RL9.3 to avoid hanging after post-update reboot" This reverts commit 851c494. * bump OFED to get RL9.4-supported version * bump leafcloud packer vm to 8GB RAM * DEBUG: disable (working) OFED build * Revert "DEBUG: disable (working) OFED build" This reverts commit 45a48c3. * DEBUG: output builder hostname * Revert "DEBUG: output builder hostname" This reverts commit 3f95f8e. * fix build workflow concurrency * DEBUG: disable updates * Revert "DEBUG: disable updates" This reverts commit 3581a35. * bump packer build volume size for non-ofed to avoid RL8 build running out of root space * try to prevent stackhpc env image build connection drops * bump packer source image to fixed RL9.4 image * run test CI workflow on RL8 image if PR labeled with 'RL8' * bump CI images * bump openhpc role to fix munge checks on key path
1 parent 4433026 commit 664e08e

File tree

10 files changed

+18
-14
lines changed

10 files changed

+18
-14
lines changed

.github/workflows/fatimage.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ name: Build fat image
88
description: Include RL8 image build
99
type: boolean
1010
default: false
11+
concurrency:
12+
group: ${{ github.ref }}-{{ matrix.os_version }} # to branch/PR + OS
13+
cancel-in-progress: true
1114
jobs:
1215
openstack:
1316
name: openstack-imagebuild
1417
runs-on: ubuntu-20.04
15-
concurrency: ${{ github.ref }}-{{ matrix.os_version }} # to branch/PR + OS
1618
strategy:
1719
matrix:
1820
os_version: [RL8, RL9]

.github/workflows/stackhpc.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@ jobs:
2424
- ${{ inputs.use_RL8 == true }} # only potentially true for workflow_dispatch
2525
rl8_branch:
2626
- ${{ startsWith(github.head_ref, 'rl8') == true }} # only potentially for pull_request, always false on merge
27+
rl8_label:
28+
- ${{ contains(github.event.pull_request.labels.*.name, 'RL8') }} # NB: needs a new commit if added after PR created
2729
exclude:
2830
- os_version: RL8
2931
rl8_selected: false
3032
rl8_branch: false
33+
rl8_label: false
3134
env:
3235
ANSIBLE_FORCE_COLOR: True
3336
OS_CLOUD: openstack

ansible/roles/ofed/defaults/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ofed_version: 24.01-0.3.3.1
1+
ofed_version: '24.04-0.6.6.0' # LTS version 23.10-2.1.3.1 does not support RL9.4
22
ofed_download_url: https://content.mellanox.com/ofed/MLNX_OFED-{{ ofed_version }}/MLNX_OFED_LINUX-{{ ofed_version }}-{{ ofed_distro }}{{ ofed_distro_version }}-{{ ofed_arch }}.tgz
33
ofed_distro: rhel # NB: not expected to work on other distros due to installation differences
44
ofed_distro_version: "{{ ansible_distribution_version }}" # e.g. '8.9'

environments/.stackhpc/ARCUS.pkrvars.hcl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
flavor = "vm.ska.cpu.general.small"
22
use_blockstorage_volume = true
3-
volume_size = 12 # GB. Compatible with SMS-lab's general.v1.tiny
3+
volume_size = 15 # GB
4+
volume_size_ofed = 15 # GB
45
image_disk_format = "qcow2"
56
networks = ["4b6b2722-ee5b-40ec-8e52-a6610e14cc51"] # portal-internal (DNS broken on ilab-60)
67
ssh_keypair_name = "slurm-app-ci"

environments/.stackhpc/LEAFCLOUD.pkrvars.hcl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
flavor = "ec1.medium"
1+
flavor = "ec1.large"
22
use_blockstorage_volume = true
3-
volume_size = 12 # GB. Compatible with SMS-lab's general.v1.tiny
3+
volume_size = 15 # GB
44
volume_size_ofed = 15 # GB
55
volume_type = "unencrypted"
66
image_disk_format = "qcow2"

environments/.stackhpc/ansible.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ roles_path = ../../ansible/roles
1212
filter_plugins = ../../ansible/filter_plugins
1313

1414
[ssh_connection]
15-
ssh_args = -o ControlMaster=auto -o ControlPath=~/.ssh/%r@%h-%p -o ControlPersist=240s -o PreferredAuthentications=publickey -o UserKnownHostsFile=/dev/null
15+
ssh_args = -o ServerAliveInterval=10 -o ControlMaster=auto -o ControlPath=~/.ssh/%r@%h-%p -o ControlPersist=240s -o PreferredAuthentications=publickey -o UserKnownHostsFile=/dev/null
1616
pipelining = True

environments/.stackhpc/terraform/main.tf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ variable "cluster_image" {
2929
description = "single image for all cluster nodes, keyed by os_version - a convenience for CI"
3030
type = map(string)
3131
default = {
32-
# https://github.com/stackhpc/ansible-slurm-appliance/pull/353
33-
RL8: "openhpc-RL8-240423-1002-4b09ba85"
34-
RL9: "openhpc-ofed-RL9-240423-1059-4b09ba85"
32+
# https://github.com/stackhpc/ansible-slurm-appliance/pull/394
33+
RL8: "openhpc-RL8-240605-1205-a3002d19"
34+
RL9: "openhpc-ofed-RL9-240605-1204-a3002d19"
3535
}
3636
}
3737

environments/common/inventory/group_vars/all/openondemand.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
# or include regex special characters.
1414
openondemand_host_regex: "{{ (groups['compute'] + groups['grafana']) | to_ood_regex }}"
1515

16-
ondemand_package: ondemand-3.0.3
17-
1816
# Add grafana to dashboard links to OOD only if grafana group is available
1917
openondemand_dashboard_links_grafana:
2018
- name: Grafana

packer/openstack.pkr.hcl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ variable "fatimage_source_image_name" {
4949
type = map(string)
5050
default = {
5151
RL8: "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2"
52-
RL9: "Rocky-9-GenericCloud-Base-9.3-20231113.0.x86_64.qcow2"
52+
RL9: "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2"
5353
}
5454
}
5555

requirements.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ roles:
33
- src: stackhpc.nfs
44
version: v23.12.1 # Tolerate state nfs file handles
55
- src: https://github.com/stackhpc/ansible-role-openhpc.git
6-
version: v0.25.0 # https://github.com/stackhpc/ansible-role-openhpc/pull/167
6+
version: v0.26.0 # https://github.com/stackhpc/ansible-role-openhpc/pull/168
77
name: stackhpc.openhpc
88
- src: https://github.com/stackhpc/ansible-node-exporter.git
99
version: stackhpc
@@ -19,7 +19,7 @@ roles:
1919
# No versions available
2020
- src: https://github.com/OSC/ood-ansible.git
2121
name: osc.ood
22-
version: v3.0.6
22+
version: v3.1.5
2323
- src: https://github.com/stackhpc/ansible-role-os-manila-mount.git
2424
name: stackhpc.os-manila-mount
2525
version: v24.2.0 # Support RockyLinux 9

0 commit comments

Comments
 (0)