From d3e9a828228767a24c21ee4b191eefd2f2a9111f Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 25 Sep 2025 13:02:18 +0100 Subject: [PATCH 1/2] Add alerting for RadosGW Usage Exporter metrics --- etc/kayobe/kolla/config/prometheus/ceph.rules | 12 ++++++++++++ ...t-on-radosgw-usage-exporter-8f03f7013204ea22.yaml | 6 ++++++ 2 files changed, 18 insertions(+) create mode 100644 releasenotes/notes/alert-on-radosgw-usage-exporter-8f03f7013204ea22.yaml diff --git a/etc/kayobe/kolla/config/prometheus/ceph.rules b/etc/kayobe/kolla/config/prometheus/ceph.rules index 88b04f1e62..b510916438 100644 --- a/etc/kayobe/kolla/config/prometheus/ceph.rules +++ b/etc/kayobe/kolla/config/prometheus/ceph.rules @@ -205,3 +205,15 @@ groups: description: "{{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)" {% endraw %} + +{% if stackhpc_enable_radosgw_usage_exporter | bool %} +- name: rgws + rules: + - alert: RadosGWUsageExporterNotServingMetrics + expr: absent(radosgw_user_metadata) + for: 5m + labels: + severity: critical + annotations: + description: "The RadosGW Usage Exporter has not been serving RGW metrics for more than 5 minutes. The exporter may be unable to reach the RGWs." +{% endif %} diff --git a/releasenotes/notes/alert-on-radosgw-usage-exporter-8f03f7013204ea22.yaml b/releasenotes/notes/alert-on-radosgw-usage-exporter-8f03f7013204ea22.yaml new file mode 100644 index 0000000000..5abd35a050 --- /dev/null +++ b/releasenotes/notes/alert-on-radosgw-usage-exporter-8f03f7013204ea22.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Added a new alert ``RadosGWUsageExporterNotServingMetrics``, which will + fire when the RadosGW Usage Exporter fails to serve RGW metrics for more + than 5 minutes. From 82323bb80592bff6fc46b960948c1bdb2a02aac8 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Thu, 25 Sep 2025 13:02:18 +0100 Subject: [PATCH 2/2] Add alerting for RadosGW Usage Exporter metrics --- etc/kayobe/kolla/config/prometheus/ceph.rules | 1 - 1 file changed, 1 deletion(-) diff --git a/etc/kayobe/kolla/config/prometheus/ceph.rules b/etc/kayobe/kolla/config/prometheus/ceph.rules index b510916438..6477e0feb2 100644 --- a/etc/kayobe/kolla/config/prometheus/ceph.rules +++ b/etc/kayobe/kolla/config/prometheus/ceph.rules @@ -205,7 +205,6 @@ groups: description: "{{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)" {% endraw %} - {% if stackhpc_enable_radosgw_usage_exporter | bool %} - name: rgws rules: