openshift
diff --git a/‎bindata/assets/alerts/cpu-utilization-sno.yaml‎
Lines changed: 47 additions & 0 deletions b/‎bindata/assets/alerts/cpu-utilization-sno.yaml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎pkg/operator/highcpuusagealertcontroller/highcpuusagealert_controller.go‎
Lines changed: 166 additions & 0 deletions b/‎pkg/operator/highcpuusagealertcontroller/highcpuusagealert_controller.go‎
Lines changed: 166 additions & 0 deletions
@@ -0,0 +1,47 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: cpu-utilization
+  namespace: openshift-kube-apiserver
+spec:
+  groups:
+    - name: control-plane-cpu-utilization
+      rules:
+        - alert: HighOverallControlPlaneCPU
+          annotations:
+            summary: >-
+              CPU utilization across control plane pods is more than 60% of total CPU. High CPU usage usually means that something goes wrong.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
+            description: >-
+              This level of CPU utlization of an control plane is probably not a problem under most circumstances, but high levels of utilization may indicate
+              problems with cluster or control plane pods. To manage this alert or modify threshold it in case of false positives see the following link:
+              https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+          expr: |
+            sum(rate(container_cpu_usage_seconds_total{namespace=~"openshift-.*",image!=""}[4m])) / ${CPU-COUNT} * 100 > 60
+          for: 10m
+          labels:
+            namespace: openshift-kube-apiserver
+            severity: warning
+        - alert: ExtremelyHighIndividualControlPlaneCPU
+          annotations:
+            summary: >-
+              CPU utilization across control plane pods is more than 90% of total CPU. High CPU usage usually means that something goes wrong.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
+            description: >-
+              This level of CPU utlization of an control plane is probably not a problem under most circumstances, but high levels of utilization may indicate
+              problems with cluster or control plane pods. When workload partitioning is enabled,
+              Extreme CPU pressure can cause slow serialization and poor performance from the kube-apiserver and etcd.
+              When this happens, there is a risk of clients seeing non-responsive API requests which are issued again
+              causing even more CPU pressure.
+              It can also cause failing liveness probes due to slow etcd responsiveness on the backend.
+              If one kube-apiserver fails under this condition, chances are you will experience a cascade as the remaining
+              kube-apiservers are also under-provisioned.
+              To fix this, increase the CPU and memory on your control plane nodes.
+              To manage this alert or modify threshold it in case of false positives see the following link: 
+              https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+          expr: |
+            sum(rate(container_cpu_usage_seconds_total{namespace=~"openshift-.*",image!=""}[4m])) / ${CPU-COUNT} * 100 > 90
+          for: 1h
+          labels:
+            namespace: openshift-kube-apiserver
+            severity: critical
@@ -0,0 +1,166 @@
+package highcpuusagealertcontroller
+
+import (
+	"bytes"
+	"context"
+	"strconv"
+	"time"
+
+	configv1 "github.com/openshift/api/config/v1"
+	configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1"
+	configlistersv1 "github.com/openshift/client-go/config/listers/config/v1"
+	"github.com/openshift/cluster-kube-apiserver-operator/bindata"
+	"github.com/openshift/library-go/pkg/controller/factory"
+	"github.com/openshift/library-go/pkg/operator/events"
+	"github.com/openshift/library-go/pkg/operator/resource/resourceapply"
+	"github.com/openshift/library-go/pkg/operator/resource/resourceread"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/client-go/dynamic"
+	"k8s.io/client-go/dynamic/dynamicinformer"
+	"k8s.io/utils/cpuset"
+)
+
+// default and taken from the docs
+const defaultCoresNum = 8
+
+var performanceGroup = schema.GroupVersionResource{Group: "performance.openshift.io", Version: "v2", Resource: "performanceprofiles"}
+
+type highCPUUsageAlertController struct {
+	client               dynamic.Interface
+	infraLister          configlistersv1.InfrastructureLister
+	clusterVersionLister configlistersv1.ClusterVersionLister
+}
+
+func NewHighCPUUsageAlertController(
+	configInformer configv1informers.Interface,
+	dynamicInformersForTargetNamespace dynamicinformer.DynamicSharedInformerFactory,
+	client dynamic.Interface,
+	recorder events.Recorder,
+) factory.Controller {
+	c := &highCPUUsageAlertController{
+		client:               client,
+		infraLister:          configInformer.Infrastructures().Lister(),
+		clusterVersionLister: configInformer.ClusterVersions().Lister(),
+	}
+
+	prometheusAlertInformerForTargetNamespace := dynamicInformersForTargetNamespace.ForResource(schema.GroupVersionResource{
+		Group:    "monitoring.coreos.com",
+		Version:  "v1",
+		Resource: "prometheusrules",
+	})
+
+	return factory.New().
+		WithInformers(configInformer.Infrastructures().Informer(), configInformer.ClusterVersions().Informer(), prometheusAlertInformerForTargetNamespace.Informer()).
+		WithSync(c.sync).ResyncEvery(10*time.Minute).
+		ToController("highCPUUsageAlertController", recorder.WithComponentSuffix("high-cpu-usage-alert-controller"))
+}
+
+func (c *highCPUUsageAlertController) sync(ctx context.Context, syncCtx factory.SyncContext) error {
+	infra, err := c.infraLister.Get("cluster")
+	if err != nil {
+		return err
+	}
+
+	var alertRaw []byte
+
+	if infra.Status.InfrastructureTopology != configv1.SingleReplicaTopologyMode {
+		// we moved creation of the alert here because the static resource controller was constantly
+		// deleting the alert and was fighting with this controller
+		alertRaw, err = bindata.Asset("assets/alerts/cpu-utilization.yaml")
+		if err != nil {
+			return err
+		}
+	} else {
+		clusterVersion, err := c.clusterVersionLister.Get("version")
+		if err != nil {
+			return err
+		}
+
+		alertRaw, err = snoAlert(ctx, c.client, clusterVersion.Status.Capabilities.EnabledCapabilities, infra.Status.CPUPartitioning)
+		if err != nil {
+			return err
+		}
+	}
+
+	alertObj, err := resourceread.ReadGenericWithUnstructured(alertRaw)
+	if err != nil {
+		return err
+	}
+
+	_, _, err = resourceapply.ApplyPrometheusRule(ctx, c.client, syncCtx.Recorder(), alertObj.(*unstructured.Unstructured))
+	return err
+}
+
+func snoAlert(ctx context.Context, client dynamic.Interface, enabledCapabilities []configv1.ClusterVersionCapability, cpuMode configv1.CPUPartitioningMode) ([]byte, error) {
+	cores := defaultCoresNum
+
+	// if NodeTuning capability disabled, there are no PerformanceProfile, so we proceed
+	// with default value.
+	if sets.New(enabledCapabilities...).Has(configv1.ClusterVersionCapabilityNodeTuning) && cpuMode == configv1.CPUPartitioningAllNodes {
+		foundCores, found, err := performanceProfileControlPlaneCores(ctx, client)
+		if err != nil {
+			return nil, err
+		}
+		// set cores from PerformanceProfile if expectedToFindCores
+		// if not, proceed with default values
+		if found {
+			cores = foundCores
+		}
+	}
+
+	fileData, err := bindata.Asset("assets/alerts/cpu-utilization-sno.yaml")
+	if err != nil {
+		return nil, err
+	}
+	fileData = bytes.ReplaceAll(fileData, []byte(`${CPU-COUNT}`), []byte(strconv.Itoa(cores)))
+
+	return fileData, nil
+}
+
+// performanceProfileControlPlaneCores returns cores allocated for control plane pods via
+// PerformanceProfile object. Bool value indicates if PerformanceProfile is expectedToFindCores for master node
+func performanceProfileControlPlaneCores(ctx context.Context, client dynamic.Interface) (int, bool, error) {
+	// fetch resource directly instead of using an informer because
+	// NodeTuning capability can be disabled at start and enabled later
+	obj, err := client.Resource(performanceGroup).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return 0, false, err
+	}
+
+	for _, pf := range obj.Items {
+		nodeSelector, found, err := unstructured.NestedStringMap(pf.Object, "spec", "nodeSelector")
+		if err != nil {
+			return 0, false, err
+		}
+		if !found {
+			continue
+		}
+		if _, ok := nodeSelector["node-role.kubernetes.io/master"]; !ok {
+			continue
+		}
+
+		reservedCPU, found, err := unstructured.NestedString(pf.Object, "spec", "cpu", "reserved")
+		if err != nil {
+			return 0, false, err
+		}
+		if !found {
+			continue
+		}
+
+		cores, err := coresInCPUSet(reservedCPU)
+		if err != nil {
+			return 0, false, err
+		}
+		return cores, true, nil
+	}
+
+	return 0, false, nil
+}
+
+func coresInCPUSet(set string) (int, error) {
+	cpuMap, err := cpuset.Parse(set)
+	return cpuMap.Size(), err
+}