From eff158f7f949910cd5637446cfa26510b9882b0a Mon Sep 17 00:00:00 2001 From: Bartosz Bezak Date: Tue, 7 Oct 2025 19:14:33 +0200 Subject: [PATCH 1/3] rewrite ovn-fix-chassis-priorities playbook It is now aligning the HA chassis priorities based on gateway chassis ones. Signed-off-by: Bartosz Bezak (cherry picked from commit 2be3fd2ebac705fda66fa9c5720ccf3d7eeda048) --- .../ansible/ovn-fix-chassis-priorities.yml | 159 ++++++++++++++---- 1 file changed, 122 insertions(+), 37 deletions(-) diff --git a/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml index f5c7197fc0..c92b7a5fc4 100644 --- a/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml +++ b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml @@ -5,23 +5,23 @@ # metal/SR-IOV) ports. # This playbook can be used to fix the issue by realigning the priorities of -# the table entries. It does so by assigning the highest priority to the -# "first" (sorted alphabetically) OVN NB DB host. This results in all gateways -# being scheduled to a single host, but is less complicated than trying to -# balance them (and it's also not clear to me how to map between individual -# ha_chassis and gateway_chassis entries). +# the table entries. It executes a small inline shell script against the +# OVN northbound database to ensure that, for each router, the HA chassis +# backing its internal networks is aligned with the chassis currently hosting +# the router's external gateway interface. # The playbook can be run as follows: -# kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/ovn-fix-chassis-priorities.yml +# kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/fixes/ovn-fix-chassis-priorities.yml +# By default this runs in dry-run mode; pass '-e apply=yes' to perform the updates. # If the 'controllers' group does not align with the group used to deploy the # OVN NB DB, this can be overridden by passing the following: # '-e ovn_nb_db_group=some_other_group' -- name: Find OVN DB DB Leader +- name: Find OVN NB DB Leader hosts: "{{ ovn_nb_db_group | default('controllers') }}" tasks: - - name: Find OVN DB Leader + - name: Find OVN NB DB Leader when: kolla_enable_ovn | bool block: - name: Find the OVN NB DB leader @@ -43,34 +43,119 @@ - name: Fix OVN chassis priorities hosts: ovn_nb_leader + gather_facts: false vars: - ovn_nb_db_group: controllers - ovn_nb_db_hosts_sorted: "{{ query('inventory_hostnames', ovn_nb_db_group) | sort | list }}" - ha_chassis_max_priority: 32767 - gateway_chassis_max_priority: "{{ ovn_nb_db_hosts_sorted | length }}" + apply_updates: "{{ apply | default(false) | bool }}" tasks: - - name: Fix ha_chassis priorities - ansible.builtin.command: >- - docker exec ovn_nb_db - bash -c ' - ovn-nbctl find ha_chassis chassis_name={{ item }} | - awk '\''$1 == "_uuid" { print $3 }'\'' | - while read uuid; do ovn-nbctl set ha_chassis $uuid priority={{ priority }}; done' - loop: "{{ ovn_nb_db_hosts_sorted }}" - vars: - priority: "{{ ha_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}" - register: ha_chassis_command - changed_when: ha_chassis_command.rc == 0 - - - name: Fix gateway_chassis priorities - ansible.builtin.command: >- - docker exec ovn_nb_db - bash -c ' - ovn-nbctl find gateway_chassis chassis_name={{ item }} | - awk '\''$1 == "_uuid" { print $3 }'\'' | - while read uuid; do ovn-nbctl set gateway_chassis $uuid priority={{ priority }}; done' - loop: "{{ ovn_nb_db_hosts_sorted }}" - vars: - priority: "{{ gateway_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}" - register: gateway_chassis_command - changed_when: gateway_chassis_command.rc == 0 + - name: Realign HA chassis priorities with active gateways + when: kolla_enable_ovn | bool + ansible.builtin.shell: | + docker exec -i ovn_nb_db bash -s <<'EOF' + set -euo pipefail + + MAX_PRIORITY=32767 + APPLY="{{ 'yes' if apply_updates else 'no' }}" + + if [ "$APPLY" = "yes" ]; then + echo "APPLY MODE: Updating OVN HA priorities" + else + echo "DRY-RUN MODE: Showing proposed changes only" + echo "Re-run with -e apply=yes to apply changes" + fi + echo "" + + # Get all external gateway ports + ext_ports=$(ovn-nbctl --data=bare --no-headings --columns=name find logical_router_port 'external_ids:"neutron:is_ext_gw"="True"') + + for ext_port in $ext_ports; do + # Get router name + router=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$ext_port" 'external_ids:"neutron:router_name"' | tr -d '"') + + if [ -z "$router" ]; then + echo "Skipping $ext_port: no router name found" + continue + fi + + # Get gateway chassis list (ordered by priority) + gateway_chassis="" + gateway_info=$(ovn-nbctl lrp-get-gateway-chassis "$ext_port" 2>/dev/null || true) + + while IFS= read -r line; do + # Strip prefix + chassis=$(echo "$line" | awk '{print $1}' | cut -d'_' -f2-) + gateway_chassis="$gateway_chassis $chassis" + done <<< "$gateway_info" + + gateway_chassis=${gateway_chassis# } + + if [ -z "$gateway_chassis" ]; then + echo "Router $router: no gateway chassis configured" + continue + fi + + # The first chassis in the list is the active gateway + active_gateway=$(echo "$gateway_chassis" | awk '{print $1}') + echo "Router: $router | Port: $ext_port | Active Gateway: $active_gateway" + + # Process all internal ports on this router + router_ports=$(ovn-nbctl --data=bare --no-headings --columns=name \ + find logical_router_port "external_ids:\"neutron:router_name\"=\"$router\"") + + for port in $router_ports; do + + # Skip external gateway ports + is_external=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$port" 'external_ids:"neutron:is_ext_gw"' 2>/dev/null) + [ "$is_external" = "True" ] && continue + + # Get network name and HA chassis group + network=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$port" 'external_ids:"neutron:network_name"' 2>/dev/null) + ha_group=$(ovn-nbctl --data=bare --no-headings --columns=_uuid find ha_chassis_group name="$network") + + if [ -z "$ha_group" ]; then + echo " Port $port: no HA group found for network '$network'" + continue + fi + + echo " Port: $port | Network: $network" + + # Update priorities for each chassis in the HA group + ha_chassis_list=$(ovn-nbctl --data=bare --no-headings get ha_chassis_group "$ha_group" ha_chassis | tr -d '[],') + + for uuid in $ha_chassis_list; do + chassis_name=$(ovn-nbctl --data=bare --no-headings get ha_chassis "$uuid" chassis_name) + current_priority=$(ovn-nbctl --data=bare --no-headings get ha_chassis "$uuid" priority) + + # Calculate desired priority + desired_priority="" + index=0 + for gw in $gateway_chassis; do + if [ "$chassis_name" = "$gw" ]; then + desired_priority=$((MAX_PRIORITY - index)) + break + fi + index=$((index + 1)) + done + [ -z "$desired_priority" ] && continue + + # Apply or report change + if [ "$current_priority" -ne "$desired_priority" ]; then + if [ "$APPLY" = "yes" ]; then + ovn-nbctl set ha_chassis "$uuid" priority=$desired_priority + echo " $chassis_name: updated priority $current_priority to $desired_priority" + else + echo " $chassis_name: would update priority $current_priority to $desired_priority" + fi + else + echo " $chassis_name: priority $current_priority (no change needed)" + fi + done + done + echo "" + done + EOF + register: fix_output + changed_when: apply_updates and ('updated priority' in (fix_output.stdout | default(''))) + + - name: Display results + ansible.builtin.debug: + msg: "{{ fix_output.stdout }}" From 717a04416125646c39d242489d5f09fde87c30cf Mon Sep 17 00:00:00 2001 From: Bartosz Bezak Date: Fri, 10 Oct 2025 16:48:46 +0200 Subject: [PATCH 2/3] Fix gateway chassis prefix stripping (#1928) (cherry picked from commit e46c18c17837bbcff00c69b0c2af525ae8383a96) --- etc/kayobe/ansible/ovn-fix-chassis-priorities.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml index c92b7a5fc4..0589ba7614 100644 --- a/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml +++ b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml @@ -81,8 +81,8 @@ gateway_info=$(ovn-nbctl lrp-get-gateway-chassis "$ext_port" 2>/dev/null || true) while IFS= read -r line; do - # Strip prefix - chassis=$(echo "$line" | awk '{print $1}' | cut -d'_' -f2-) + # Strip prefix, allowing '-' or '_' separator + chassis=$(echo "$line" | awk '{print $1}' | sed "s/^${ext_port}[-_]//") gateway_chassis="$gateway_chassis $chassis" done <<< "$gateway_info" From 0c161bccd71781c264220662784ee046e0dc9fac Mon Sep 17 00:00:00 2001 From: Bartosz Bezak Date: Thu, 6 Nov 2025 12:15:32 +0100 Subject: [PATCH 3/3] Fix OVN leader detection in chassis priority fix (#1970) After kolla-ansible patch [1] ovn commands can run on all nodes. Changing method of getting the leader. [1] https://review.opendev.org/c/openstack/kolla-ansible/+/963412 Signed-off-by: Bartosz Bezak (cherry picked from commit f8f9b24f828665f738b8bb62499324268accb78e) --- etc/kayobe/ansible/ovn-fix-chassis-priorities.yml | 9 ++++++--- .../ovn-fix-chassis-leader-check-551d0e94cbb94ac4.yaml | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/ovn-fix-chassis-leader-check-551d0e94cbb94ac4.yaml diff --git a/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml index 0589ba7614..0b83b472cb 100644 --- a/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml +++ b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml @@ -25,15 +25,18 @@ when: kolla_enable_ovn | bool block: - name: Find the OVN NB DB leader - ansible.builtin.command: docker exec ovn_nb_db ovn-nbctl get-connection + ansible.builtin.command: >- + docker exec ovn_nb_db + ovs-appctl -t /var/run/ovn/ovnnb_db.ctl + cluster/status OVN_Northbound changed_when: false failed_when: false - register: ovn_check_result + register: ovn_cluster_status check_mode: false - name: Group hosts by leader/follower role ansible.builtin.group_by: - key: ovn_nb_{{ 'leader' if ovn_check_result.rc == 0 else 'follower' }} + key: "{{ 'ovn_nb_leader' if 'Role: leader' in ovn_cluster_status.stdout else 'ovn_nb_follower' }}" changed_when: false - name: Assert one leader exists diff --git a/releasenotes/notes/ovn-fix-chassis-leader-check-551d0e94cbb94ac4.yaml b/releasenotes/notes/ovn-fix-chassis-leader-check-551d0e94cbb94ac4.yaml new file mode 100644 index 0000000000..8d47f49c63 --- /dev/null +++ b/releasenotes/notes/ovn-fix-chassis-leader-check-551d0e94cbb94ac4.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Updated the OVN chassis priority fix playbook to detect the northbound + database leader via ``ovs-appctl cluster/status``, ensuring only the true + leader runs the priority alignment.