From d006aae7c987e2f04af7d4ddb39925eb921105a7 Mon Sep 17 00:00:00 2001 From: David Collom Date: Tue, 1 Apr 2025 15:36:55 +0100 Subject: [PATCH 1/6] Track Kubernetes Channels for latest versions --- cmd/app/app.go | 16 +++- cmd/app/options.go | 18 +++- go.mod | 1 + go.sum | 2 + pkg/client/docker/docker.go | 2 + pkg/client/fallback/fallback.go | 4 +- pkg/client/util/http_backoff.go | 19 ++++ pkg/controller/kube_controller.go | 140 ++++++++++++++++++++++++++++++ pkg/metrics/kubernetes.go | 21 +++++ pkg/metrics/metrics.go | 48 +++++----- 10 files changed, 244 insertions(+), 27 deletions(-) create mode 100644 pkg/client/util/http_backoff.go create mode 100644 pkg/controller/kube_controller.go create mode 100644 pkg/metrics/kubernetes.go diff --git a/cmd/app/app.go b/cmd/app/app.go index e18d85f0..af61316f 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -110,15 +110,27 @@ func NewCommand(ctx context.Context) *cobra.Command { return fmt.Errorf("failed to setup image registry clients: %s", err) } - c := controller.NewPodReconciler(opts.CacheTimeout, + _ = client + + podController := controller.NewPodReconciler(opts.CacheTimeout, metricsServer, client, mgr.GetClient(), log, opts.DefaultTestAll, ) + if err := podController.SetupWithManager(mgr); err != nil { + return err + } - if err := c.SetupWithManager(mgr); err != nil { + kubeController := controller.NewKubeReconciler( + log, + mgr.GetConfig(), + metricsServer, + opts.KubeInterval, + opts.KubeChannel, + ) + if err := mgr.Add(kubeController); err != nil { return err } diff --git a/cmd/app/options.go b/cmd/app/options.go index b4bb191c..4e2a6b80 100644 --- a/cmd/app/options.go +++ b/cmd/app/options.go @@ -72,10 +72,16 @@ type Options struct { GracefulShutdownTimeout time.Duration CacheSyncPeriod time.Duration + KubeChannel string + KubeInterval time.Duration + + // kubeConfigFlags holds the flags for the kubernetes client kubeConfigFlags *genericclioptions.ConfigFlags - selfhosted selfhosted.Options + // Client holds the options for the image client(s) Client client.Options + // selfhosted holds the options for the selfhosted registry + selfhosted selfhosted.Options } func (o *Options) addFlags(cmd *cobra.Command) { @@ -133,7 +139,15 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.DurationVarP(&o.CacheSyncPeriod, "cache-sync-period", "", 5*time.Hour, - "The time in which all resources should be updated.") + "The duration in which all resources should be updated.") + + fs.DurationVarP(&o.KubeInterval, + "kube-interval", "", o.CacheSyncPeriod, + "The time in which kubernetes channels updates are checked.") + + fs.StringVarP(&o.KubeChannel, + "kube-channel", "", "stable", + "The Kubernetes channel to check against for cluster updates.") } func (o *Options) addAuthFlags(fs *pflag.FlagSet) { diff --git a/go.mod b/go.mod index 14584f84..391ad2fe 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( ) require ( + github.com/Masterminds/semver/v3 v3.3.1 github.com/aws/aws-sdk-go-v2/config v1.29.12 github.com/aws/aws-sdk-go-v2/credentials v1.17.65 github.com/aws/aws-sdk-go-v2/service/ecr v1.43.0 diff --git a/go.sum b/go.sum index 1d17bdfe..52b36d6d 100644 --- a/go.sum +++ b/go.sum @@ -19,6 +19,8 @@ github.com/Azure/go-autorest/logger v0.2.2/go.mod h1:I5fg9K52o+iuydlWfa9T5K6WFos github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/Azure/go-autorest/tracing v0.6.1 h1:YUMSrC/CeD1ZnnXcNYU4a/fzsO35u2Fsful9L/2nyR0= github.com/Azure/go-autorest/tracing v0.6.1/go.mod h1:/3EgjbsjraOqiicERAeu3m7/z0x1TzjQGAwDrJrXGkc= +github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= +github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= github.com/aws/aws-sdk-go-v2/config v1.29.12 h1:Y/2a+jLPrPbHpFkpAAYkVEtJmxORlXoo5k2g1fa2sUo= diff --git a/pkg/client/docker/docker.go b/pkg/client/docker/docker.go index 12e34628..93dc0913 100644 --- a/pkg/client/docker/docker.go +++ b/pkg/client/docker/docker.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/go-retryablehttp" "github.com/jetstack/version-checker/pkg/api" + "github.com/jetstack/version-checker/pkg/client/util" ) const ( @@ -43,6 +44,7 @@ func New(opts Options, log *logrus.Entry) (*Client, error) { retryclient.RetryMax = 10 retryclient.RetryWaitMax = 2 * time.Minute retryclient.RetryWaitMin = 1 * time.Second + retryclient.Backoff = util.HTTPBackOff retryclient.Logger = log.WithField("client", "docker") client := retryclient.StandardClient() diff --git a/pkg/client/fallback/fallback.go b/pkg/client/fallback/fallback.go index 12fc0c52..b273c5bc 100644 --- a/pkg/client/fallback/fallback.go +++ b/pkg/client/fallback/fallback.go @@ -54,9 +54,9 @@ func (c *Client) Tags(ctx context.Context, host, repo, image string) (tags []api remaining := len(c.clients) - i - 1 if remaining == 0 { - c.log.Debugf("failed to lookup via %q, Giving up, no more clients", client.Name()) + c.log.Infof("failed to lookup via %q, Giving up, no more clients", client.Name()) } else { - c.log.Debugf("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) + c.log.Infof("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) } } diff --git a/pkg/client/util/http_backoff.go b/pkg/client/util/http_backoff.go new file mode 100644 index 00000000..ab65ed1c --- /dev/null +++ b/pkg/client/util/http_backoff.go @@ -0,0 +1,19 @@ +package util + +import ( + "net/http" + "time" + + "github.com/hashicorp/go-retryablehttp" +) + +// This is a custom Backoff that enforces the Max wait duration. +// If the sleep is greater we refuse to sleep at all +func HTTPBackOff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration { + sleep := retryablehttp.DefaultBackoff(min, max, attemptNum, resp) + if sleep <= max { + return sleep + } + + return 0 +} diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go new file mode 100644 index 00000000..eb812280 --- /dev/null +++ b/pkg/controller/kube_controller.go @@ -0,0 +1,140 @@ +package controller + +import ( + "context" + "fmt" + "io" + "strings" + "time" + + "github.com/sirupsen/logrus" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/hashicorp/go-retryablehttp" + "github.com/jetstack/version-checker/pkg/metrics" + + "github.com/Masterminds/semver/v3" +) + +const channelURLSuffix = "https://dl.k8s.io/release/" + +type ClusterVersionScheduler struct { + client kubernetes.Interface + log *logrus.Entry + metrics *metrics.Metrics + interval time.Duration + channel string +} + +func NewKubeReconciler( + log *logrus.Entry, + config *rest.Config, + metrics *metrics.Metrics, + interval time.Duration, + channel string, +) *ClusterVersionScheduler { + + return &ClusterVersionScheduler{ + log: log, + client: kubernetes.NewForConfigOrDie(config), + interval: interval, + metrics: metrics, + channel: channel, + } +} + +func (s *ClusterVersionScheduler) Start(ctx context.Context) error { + go s.runScheduler(ctx) + return s.reconcile(ctx) +} + +func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { + ticker := time.NewTicker(s.interval) + defer ticker.Stop() + + s.log.WithField("interval", s.interval).WithField("channel", s.channel). + Info("ClusterVersionScheduler started") + + for { + select { + case <-ctx.Done(): + s.log.Info("ClusterVersionScheduler stopping") + return + case <-ticker.C: + if err := s.reconcile(ctx); err != nil { + s.log.Error(err, "Failed to reconcile cluster version") + } + } + } +} + +func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { + // Get current cluster version + current, err := s.client.Discovery().ServerVersion() + if err != nil { + return fmt.Errorf("getting cluster version: %w", err) + } + + // Get latest stable version + latest, err := getLatestStableVersion(s.channel) + if err != nil { + return fmt.Errorf("fetching latest stable version: %w", err) + } + + latestSemVer, err := semver.NewVersion(latest) + if err != nil { + return err + } + currentSemVer, err := semver.NewVersion(current.GitVersion) + if err != nil { + return err + } + // Strip metadata from the versions + currentSemVerNoMeta, _ := currentSemVer.SetMetadata("") + latestSemVerNoMeta, _ := latestSemVer.SetMetadata("") + + // Register metrics! + s.metrics.RegisterKubeVersion(!currentSemVerNoMeta.LessThan(&latestSemVerNoMeta), + currentSemVerNoMeta.String(), latestSemVerNoMeta.String(), + s.channel, + ) + + s.log.WithFields(logrus.Fields{ + "currentVersion": currentSemVerNoMeta, + "latestStable": latestSemVerNoMeta, + "channel": s.channel, + }).Info("Cluster version check complete") + + return nil +} + +func getLatestStableVersion(channel string) (string, error) { + if !strings.HasSuffix(channel, ".txt") { + channel += ".txt" + } + + // We don't need a `/` here as its should be in the channelURLSuffix + channelURL := fmt.Sprintf("%s%s", channelURLSuffix, channel) + + client := retryablehttp.NewClient() + client.RetryMax = 3 + client.RetryWaitMin = 1 * time.Second + client.RetryWaitMax = 30 * time.Second + // Optional: Log using your own logrus/logr logger + client.Logger = nil + + resp, err := client.Get(channelURL) + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(body)), nil +} diff --git a/pkg/metrics/kubernetes.go b/pkg/metrics/kubernetes.go new file mode 100644 index 00000000..c4f326d8 --- /dev/null +++ b/pkg/metrics/kubernetes.go @@ -0,0 +1,21 @@ +package metrics + +import "github.com/prometheus/client_golang/prometheus" + +func (m *Metrics) RegisterKubeVersion(isLatest bool, currentVersion, latestVersion, channel string) { + m.mu.Lock() + defer m.mu.Unlock() + + isLatestF := 0.0 + if isLatest { + isLatestF = 1.0 + } + + m.kubernetesVersion.With( + prometheus.Labels{ + "current_version": currentVersion, + "latest_version": latestVersion, + "channel": channel, + }, + ).Set(isLatestF) +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 09aa10ee..c6690a90 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -28,6 +28,9 @@ type Metrics struct { containerImageDuration *prometheus.GaugeVec containerImageErrors *prometheus.CounterVec + // Kubernetes version metric + kubernetesVersion *prometheus.GaugeVec + cache k8sclient.Reader // Contains all metrics for the roundtripper @@ -80,6 +83,16 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R "namespace", "pod", "container", "image", }, ) + kubernetesVersion := promauto.With(reg).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "version_checker", + Name: "is_latest_kube_version", + Help: "Where the current cluster is using the latest release channel version", + }, + []string{ + "current_version", "latest_version", "channel", + }, + ) return &Metrics{ log: log.WithField("module", "metrics"), @@ -90,6 +103,7 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R containerImageDuration: containerImageDuration, containerImageChecked: containerImageChecked, containerImageErrors: containerImageErrors, + kubernetesVersion: kubernetesVersion, roundTripper: NewRoundTripper(reg), } } @@ -113,15 +127,11 @@ func (m *Metrics) AddImage(namespace, pod, container, containerType, imageURL st ).Set(float64(time.Now().Unix())) } -func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { - m.mu.Lock() - defer m.mu.Unlock() - total := 0 - - total += m.containerImageVersion.DeletePartialMatch( +func (m *Metrics) CleanUpMetrics(namespace, pod string) (total int) { + total += m.containerImageDuration.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) - total += m.containerImageDuration.DeletePartialMatch( + total += m.containerImageChecked.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) @@ -131,6 +141,15 @@ func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { total += m.containerImageErrors.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) + return total +} + +func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { + m.mu.Lock() + defer m.mu.Unlock() + + total := m.CleanUpMetrics(namespace, pod) + m.log.Infof("Removed %d metrics for image %s/%s/%s", total, namespace, pod, container) } @@ -138,20 +157,7 @@ func (m *Metrics) RemovePod(namespace, pod string) { m.mu.Lock() defer m.mu.Unlock() - total := 0 - total += m.containerImageVersion.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageDuration.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageChecked.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageErrors.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - + total := m.CleanUpMetrics(namespace, pod) m.log.Infof("Removed %d metrics for pod %s/%s", total, namespace, pod) } From c574db552f0ba0c556c963ff5ab08bdfc396d262 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Mon, 21 Jul 2025 17:25:59 +0200 Subject: [PATCH 2/6] Fix semVer --- pkg/controller/kube_controller.go | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go index eb812280..95dd1269 100644 --- a/pkg/controller/kube_controller.go +++ b/pkg/controller/kube_controller.go @@ -14,8 +14,7 @@ import ( "github.com/hashicorp/go-retryablehttp" "github.com/jetstack/version-checker/pkg/metrics" - - "github.com/Masterminds/semver/v3" + "github.com/jetstack/version-checker/pkg/version/semver" ) const channelURLSuffix = "https://dl.k8s.io/release/" @@ -83,21 +82,20 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { return fmt.Errorf("fetching latest stable version: %w", err) } - latestSemVer, err := semver.NewVersion(latest) - if err != nil { - return err - } - currentSemVer, err := semver.NewVersion(current.GitVersion) - if err != nil { - return err - } - // Strip metadata from the versions - currentSemVerNoMeta, _ := currentSemVer.SetMetadata("") - latestSemVerNoMeta, _ := latestSemVer.SetMetadata("") + latestSemVer := semver.Parse(latest) + currentSemVer := semver.Parse(current.GitVersion) + + // Create version strings without metadata for comparison + currentSemVerNoMeta := fmt.Sprintf("%d.%d.%d", currentSemVer.Major(), currentSemVer.Minor(), currentSemVer.Patch()) + latestSemVerNoMeta := fmt.Sprintf("%d.%d.%d", latestSemVer.Major(), latestSemVer.Minor(), latestSemVer.Patch()) + + // Parse the versions without metadata for comparison + currentComparable := semver.Parse(currentSemVerNoMeta) + latestComparable := semver.Parse(latestSemVerNoMeta) // Register metrics! - s.metrics.RegisterKubeVersion(!currentSemVerNoMeta.LessThan(&latestSemVerNoMeta), - currentSemVerNoMeta.String(), latestSemVerNoMeta.String(), + s.metrics.RegisterKubeVersion(!currentComparable.LessThan(latestComparable), + currentSemVerNoMeta, latestSemVerNoMeta, s.channel, ) From bcca8e3797132aff86401553ab69363619be6975 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Tue, 22 Jul 2025 16:29:46 +0200 Subject: [PATCH 3/6] update docs --- README.md | 24 +++++++++++++++++------- docs/metrics.md | 20 ++++++++++++++++++-- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 48216780..ca1cfe89 100644 --- a/README.md +++ b/README.md @@ -6,20 +6,29 @@ ![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/jetstack/version-checker) version-checker is a Kubernetes utility for observing the current versions of -images running in the cluster, as well as the latest available upstream. These -checks get exposed as Prometheus metrics to be viewed on a dashboard, or _soft_ -alert cluster operators. +images running in the cluster, as well as the latest available upstream. Additionally, +it monitors the Kubernetes cluster version against the latest available releases +using official Kubernetes release channels. These checks get exposed as Prometheus +metrics to be viewed on a dashboard, or _soft_ alert cluster operators. + +## Features + +- **Container Image Version Checking**: Monitor and compare container image versions running in the cluster against their latest upstream versions +- **Kubernetes Version Monitoring**: Track your cluster's Kubernetes version against the latest available releases from official Kubernetes channels +- **Prometheus Metrics Integration**: Export all version information as Prometheus metrics for monitoring and alerting +- **Flexible Channel Selection**: Configure which Kubernetes release channel to track (stable, latest, etc.) --- ## Why Use version-checker? -- **Improved Security**: Ensures images are up-to-date, reducing the risk of using vulnerable or compromised versions. -- **Enhanced Visibility**: Provides a clear overview of all running container versions across clusters. -- **Operational Efficiency**: Automates image tracking and reduces manual intervention in version management. -- **Compliance and Policy**: Enforcement: Helps maintain version consistency and adherence to organizational policies. +- **Improved Security**: Ensures images and Kubernetes clusters are up-to-date, reducing the risk of using vulnerable or compromised versions. +- **Enhanced Visibility**: Provides a clear overview of all running container versions and cluster versions across clusters. +- **Operational Efficiency**: Automates image and Kubernetes version tracking and reduces manual intervention in version management. +- **Compliance and Policy Enforcement**: Helps maintain version consistency and adherence to organizational policies for both applications and infrastructure. - **Incremental Upgrades**: Facilitates frequent, incremental updates to reduce the risk of large, disruptive upgrades. - **Add-On Compatibility**: Ensures compatibility with the latest versions of Kubernetes add-ons and dependencies. +- **Proactive Cluster Management**: Stay informed about Kubernetes security updates and new features through automated version monitoring. --- @@ -45,6 +54,7 @@ These registries support authentication. - [Installation Guide](docs/installation.md) - [Metrics](docs/metrics.md) +- [New Features](docs/new_features.md) --- diff --git a/docs/metrics.md b/docs/metrics.md index 8393f8b6..49405e1d 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -2,16 +2,32 @@ By default, version-checker exposes the following Prometheus metrics on `0.0.0.0:8080/metrics`: +## Container Image Metrics + - `version_checker_is_latest_version`: Indicates whether the container in use is using the latest upstream registry version. - `version_checker_last_checked`: Timestamp when the image was last checked. - `version_checker_image_lookup_duration`: Duration of the image version check. - `version_checker_image_failures_total`: Total of errors encountered during image version checks. +## Kubernetes Version Metrics + +- `version_checker_is_latest_kube_version`: Indicates whether the cluster is running the latest version from the configured Kubernetes release channel. + - Labels: `current_version`, `latest_version`, `channel` + - Value `1`: Cluster is up-to-date + - Value `0`: Update available + --- -## Example Prometheus Query +## Example Prometheus Queries +### Check container image versions ```sh QUERY="version_checker_is_latest_version" curl -s --get --data-urlencode query=$QUERY -``` \ No newline at end of file +``` + +### Check Kubernetes cluster version +```sh +QUERY="version_checker_is_latest_kube_version" +curl -s --get --data-urlencode query=$QUERY +``` From e33bb7e0628413dddbf8e60725221738132ad145 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Fri, 25 Jul 2025 15:19:40 +0200 Subject: [PATCH 4/6] add tests --- pkg/metrics/kubernetes_test.go | 140 +++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 pkg/metrics/kubernetes_test.go diff --git a/pkg/metrics/kubernetes_test.go b/pkg/metrics/kubernetes_test.go new file mode 100644 index 00000000..509e3061 --- /dev/null +++ b/pkg/metrics/kubernetes_test.go @@ -0,0 +1,140 @@ +package metrics + +import ( + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var fakeK8sClient = fake.NewFakeClient() + +func TestRegisterKubeVersion(t *testing.T) { + tests := []struct { + name string + isLatest bool + currentVersion string + latestVersion string + channel string + expectedValue float64 + }{ + { + name: "cluster is up to date", + isLatest: true, + currentVersion: "1.28.2", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 1.0, + }, + { + name: "cluster needs update", + isLatest: false, + currentVersion: "1.27.1", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 0.0, + }, + { + name: "cluster is ahead of stable", + isLatest: true, + currentVersion: "1.29.0", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 1.0, + }, + { + name: "latest channel with pre-release", + isLatest: false, + currentVersion: "1.28.1", + latestVersion: "1.29.0-alpha.1", + channel: "latest", + expectedValue: 0.0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a new metrics instance for each test to avoid interference + registry := prometheus.NewRegistry() + m := New(logrus.NewEntry(logrus.New()), registry, fakeK8sClient) + + // Register the Kubernetes version + m.RegisterKubeVersion(tt.isLatest, tt.currentVersion, tt.latestVersion, tt.channel) + + // Gather metrics + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + // Find the kubernetes version metric + var kubeMetric *dto.MetricFamily + for _, mf := range metricFamilies { + if mf.GetName() == "version_checker_is_latest_kube_version" { + kubeMetric = mf + break + } + } + + require.NotNil(t, kubeMetric, "Kubernetes version metric should be present") + require.Len(t, kubeMetric.GetMetric(), 1, "Should have exactly one metric value") + + metric := kubeMetric.GetMetric()[0] + assert.Equal(t, tt.expectedValue, metric.GetGauge().GetValue()) + + // Check labels + labels := metric.GetLabel() + assert.Len(t, labels, 3, "Should have 3 labels: current_version, latest_version, channel") + + labelMap := make(map[string]string) + for _, label := range labels { + labelMap[label.GetName()] = label.GetValue() + } + + assert.Equal(t, tt.currentVersion, labelMap["current_version"]) + assert.Equal(t, tt.latestVersion, labelMap["latest_version"]) + assert.Equal(t, tt.channel, labelMap["channel"]) + }) + } +} + +func TestRegisterKubeVersion_MultipleChannels(t *testing.T) { + registry := prometheus.NewRegistry() + m := New(logrus.NewEntry(logrus.New()), registry, fakeK8sClient) + + // Register metrics for different channels + m.RegisterKubeVersion(true, "1.28.2", "1.28.2", "stable") + m.RegisterKubeVersion(false, "1.28.2", "1.29.0-alpha.1", "latest") + + // Gather metrics + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + // Find the kubernetes version metric + var kubeMetric *dto.MetricFamily + for _, mf := range metricFamilies { + if mf.GetName() == "version_checker_is_latest_kube_version" { + kubeMetric = mf + break + } + } + + require.NotNil(t, kubeMetric, "Kubernetes version metric should be present") + require.Len(t, kubeMetric.GetMetric(), 2, "Should have exactly two metric values for different channels") + + // Check that both metrics are present + channels := make(map[string]float64) + for _, metric := range kubeMetric.GetMetric() { + labelMap := make(map[string]string) + for _, label := range metric.GetLabel() { + labelMap[label.GetName()] = label.GetValue() + } + channels[labelMap["channel"]] = metric.GetGauge().GetValue() + } + + assert.Equal(t, 1.0, channels["stable"], "Stable channel should be up to date") + assert.Equal(t, 0.0, channels["latest"], "Latest channel should need update") +} From 09f0a0f4bad2a98bde2ecb423e060cecab010954 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Tue, 29 Jul 2025 16:22:16 +0200 Subject: [PATCH 5/6] fixes --- docs/new_features.md | 43 +++++++++++++++++++++++++++++++ pkg/controller/kube_controller.go | 39 ++++++++++++++++++++-------- 2 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 docs/new_features.md diff --git a/docs/new_features.md b/docs/new_features.md new file mode 100644 index 00000000..60641435 --- /dev/null +++ b/docs/new_features.md @@ -0,0 +1,43 @@ +# Kubernetes Version Monitoring + +version-checker now includes built-in Kubernetes cluster version monitoring capabilities. This feature automatically compares your cluster's current Kubernetes version against the latest available versions from official Kubernetes release channels. + +### How It Works + +The Kubernetes version checker: +- Fetches the current cluster version using the Kubernetes Discovery API +- Compares it against the latest version from the configured Kubernetes release channel (using official `https://dl.k8s.io/release/` endpoints) +- Exposes the comparison as Prometheus metrics for monitoring and alerting +- Strips metadata from versions for accurate semantic version comparison (e.g., `v1.28.2-gke.1` becomes `v1.28.2`) + +### Configuration + +You can configure the Kubernetes version checking behavior using the following CLI flags: + +- `--kube-channel`: Specifies which Kubernetes release channel to check against (default: `"stable"`) + - Examples: `stable`, `latest`, `stable-1.28`, `latest-1.29` +- `--kube-interval`: How often to check for Kubernetes version updates (default: same as `--cache-sync-period`, 5 hours) + +### Metrics + +The Kubernetes version monitoring exposes the following Prometheus metric: + +``` +version_checker_is_latest_kube_version{current_version="1.28.2", latest_version="1.29.1", channel="stable"} 0 +``` + +- Value `1`: Cluster is running the latest version from the specified channel +- Value `0`: Cluster is not running the latest version (update available) + +### Examples + +```bash +# Use the default stable channel +./version-checker + +# Monitor against the latest channel +./version-checker --kube-channel=latest + +# Monitor against a specific version channel with custom interval +./version-checker --kube-channel=stable-1.28 --kube-interval=1h +``` \ No newline at end of file diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go index 95dd1269..2d79d509 100644 --- a/pkg/controller/kube_controller.go +++ b/pkg/controller/kube_controller.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "net/url" "strings" "time" @@ -76,10 +77,10 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { return fmt.Errorf("getting cluster version: %w", err) } - // Get latest stable version - latest, err := getLatestStableVersion(s.channel) + // Get latest version from specified channel + latest, err := getLatestVersion(s.channel) if err != nil { - return fmt.Errorf("fetching latest stable version: %w", err) + return fmt.Errorf("fetching latest version from channel %s: %w", s.channel, err) } latestSemVer := semver.Parse(latest) @@ -101,20 +102,23 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { s.log.WithFields(logrus.Fields{ "currentVersion": currentSemVerNoMeta, - "latestStable": latestSemVerNoMeta, + "latestVersion": latestSemVerNoMeta, "channel": s.channel, }).Info("Cluster version check complete") return nil } -func getLatestStableVersion(channel string) (string, error) { +func getLatestVersion(channel string) (string, error) { if !strings.HasSuffix(channel, ".txt") { channel += ".txt" } - // We don't need a `/` here as its should be in the channelURLSuffix - channelURL := fmt.Sprintf("%s%s", channelURLSuffix, channel) + // Use url.JoinPath to safely join the base URL and channel path + channelURL, err := url.JoinPath(channelURLSuffix, channel) + if err != nil { + return "", fmt.Errorf("failed to join channel URL: %w", err) + } client := retryablehttp.NewClient() client.RetryMax = 3 @@ -125,14 +129,27 @@ func getLatestStableVersion(channel string) (string, error) { resp, err := client.Get(channelURL) if err != nil { - return "", err + return "", fmt.Errorf("failed to fetch from channel URL %s: %w", channelURL, err) + } + defer func() { + if cerr := resp.Body.Close(); cerr != nil { + fmt.Printf("warning: failed to close response body: %v\n", cerr) + } + }() + + if resp.StatusCode != 200 { + return "", fmt.Errorf("unexpected status code %d when fetching channel %s", resp.StatusCode, channel) } - defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - return "", err + return "", fmt.Errorf("failed to read response body: %w", err) + } + + version := strings.TrimSpace(string(body)) + if version == "" { + return "", fmt.Errorf("empty version returned from channel %s", channel) } - return strings.TrimSpace(string(body)), nil + return version, nil } From ae1ad29e4de349887ea2a32e33d5a950a75f5881 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Fri, 1 Aug 2025 11:35:32 +0200 Subject: [PATCH 6/6] some improvements --- cmd/app/app.go | 9 ++++-- docs/new_features.md | 29 ++++++++++++++---- pkg/controller/kube_controller.go | 49 ++++++++++++++++++++++++------- 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/cmd/app/app.go b/cmd/app/app.go index 9ba174c5..9cced0d9 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -131,8 +131,13 @@ func NewCommand(ctx context.Context) *cobra.Command { opts.KubeInterval, opts.KubeChannel, ) - if err := mgr.Add(kubeController); err != nil { - return err + + // Only add to manager if controller was created (channel was specified) + if kubeController != nil { + if err := mgr.Add(kubeController); err != nil { + return err + } + log.WithField("channel", opts.KubeChannel).Info("Kubernetes version checking enabled") } // Start the manager and all controllers diff --git a/docs/new_features.md b/docs/new_features.md index 60641435..e6560d7c 100644 --- a/docs/new_features.md +++ b/docs/new_features.md @@ -29,15 +29,34 @@ version_checker_is_latest_kube_version{current_version="1.28.2", latest_version= - Value `1`: Cluster is running the latest version from the specified channel - Value `0`: Cluster is not running the latest version (update available) +### Supported Channels + +version-checker uses official Kubernetes release channels: + +- `stable` - Latest stable Kubernetes release (recommended) +- `latest` - Latest Kubernetes release (including pre-releases) +- `latest-1.28` - Latest patch for Kubernetes 1.28.x +- `latest-1.27` - Latest patch for Kubernetes 1.27.x + ### Examples ```bash -# Use the default stable channel -./version-checker +# Check against latest stable Kubernetes +version-checker --kube-version-channel=stable + +# Check against latest Kubernetes (including alpha/beta) +version-checker --kube-version-channel=latest -# Monitor against the latest channel -./version-checker --kube-channel=latest +# Check against latest 1.28.x patch +version-checker --kube-version-channel=latest-1.28 # Monitor against a specific version channel with custom interval ./version-checker --kube-channel=stable-1.28 --kube-interval=1h -``` \ No newline at end of file +``` + +### Managed Kubernetes Support + +Works with all managed Kubernetes services: +- **Amazon EKS**: Compares `v1.28.2-eks-abc123` against upstream `v1.28.2` +- **Google GKE**: Compares `v1.28.2-gke.1034000` against upstream `v1.28.2` +- **Azure AKS**: Compares `v1.28.2-aks-xyz789` against upstream `v1.28.2` \ No newline at end of file diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go index 2d79d509..5246458e 100644 --- a/pkg/controller/kube_controller.go +++ b/pkg/controller/kube_controller.go @@ -35,9 +35,14 @@ func NewKubeReconciler( interval time.Duration, channel string, ) *ClusterVersionScheduler { + // If no channel is specified, return nil to indicate disabled + if channel == "" { + log.Info("Kubernetes version checking disabled (no channel specified)") + return nil + } return &ClusterVersionScheduler{ - log: log, + log: log.WithField("channel", channel), client: kubernetes.NewForConfigOrDie(config), interval: interval, metrics: metrics, @@ -47,7 +52,7 @@ func NewKubeReconciler( func (s *ClusterVersionScheduler) Start(ctx context.Context) error { go s.runScheduler(ctx) - return s.reconcile(ctx) + return s.reconcile() } func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { @@ -63,14 +68,14 @@ func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { s.log.Info("ClusterVersionScheduler stopping") return case <-ticker.C: - if err := s.reconcile(ctx); err != nil { + if err := s.reconcile(); err != nil { s.log.Error(err, "Failed to reconcile cluster version") } } } } -func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { +func (s *ClusterVersionScheduler) reconcile() error { // Get current cluster version current, err := s.client.Discovery().ServerVersion() if err != nil { @@ -110,11 +115,21 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { } func getLatestVersion(channel string) (string, error) { + // Always use upstream Kubernetes channels - this is the authoritative source + // Platform detection is kept for logging purposes only + return getLatestVersionFromUpstream(channel) +} + +func getLatestVersionFromUpstream(channel string) (string, error) { + // Validate channel - only allow known Kubernetes channels + if !isValidKubernetesChannel(channel) { + return "", fmt.Errorf("unsupported channel: %s. Valid channels: stable, latest, latest-1.xx", channel) + } + if !strings.HasSuffix(channel, ".txt") { channel += ".txt" } - // Use url.JoinPath to safely join the base URL and channel path channelURL, err := url.JoinPath(channelURLSuffix, channel) if err != nil { return "", fmt.Errorf("failed to join channel URL: %w", err) @@ -124,18 +139,13 @@ func getLatestVersion(channel string) (string, error) { client.RetryMax = 3 client.RetryWaitMin = 1 * time.Second client.RetryWaitMax = 30 * time.Second - // Optional: Log using your own logrus/logr logger client.Logger = nil resp, err := client.Get(channelURL) if err != nil { return "", fmt.Errorf("failed to fetch from channel URL %s: %w", channelURL, err) } - defer func() { - if cerr := resp.Body.Close(); cerr != nil { - fmt.Printf("warning: failed to close response body: %v\n", cerr) - } - }() + defer resp.Body.Close() if resp.StatusCode != 200 { return "", fmt.Errorf("unexpected status code %d when fetching channel %s", resp.StatusCode, channel) @@ -153,3 +163,20 @@ func getLatestVersion(channel string) (string, error) { return version, nil } + +func isValidKubernetesChannel(channel string) bool { + // Only allow official Kubernetes channels + validChannels := []string{"stable", "latest"} + + // Allow latest-X.Y format + if strings.HasPrefix(channel, "latest-1.") { + return true + } + + for _, valid := range validChannels { + if channel == valid { + return true + } + } + return false +}