From 0402ae95d9a02b41afc0e8c22ead4038f5620df0 Mon Sep 17 00:00:00 2001 From: Ciprian Hacman Date: Tue, 22 Jul 2025 09:28:32 +0300 Subject: [PATCH] kubelet: Wait less for control-plane pods to restart --- k8s/crds/kops.k8s.io_clusters.yaml | 12 ++++++++++++ k8s/crds/kops.k8s.io_instancegroups.yaml | 6 ++++++ nodeup/pkg/model/kubelet.go | 16 ++++++++++++++++ pkg/apis/kops/componentconfig.go | 2 ++ pkg/apis/kops/v1alpha2/componentconfig.go | 2 ++ .../kops/v1alpha2/zz_generated.conversion.go | 2 ++ pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go | 5 +++++ pkg/apis/kops/v1alpha3/componentconfig.go | 2 ++ .../kops/v1alpha3/zz_generated.conversion.go | 2 ++ pkg/apis/kops/v1alpha3/zz_generated.deepcopy.go | 5 +++++ pkg/apis/kops/validation/validation.go | 8 ++++++++ pkg/apis/kops/zz_generated.deepcopy.go | 5 +++++ 12 files changed, 67 insertions(+) diff --git a/k8s/crds/kops.k8s.io_clusters.yaml b/k8s/crds/kops.k8s.io_clusters.yaml index 1aff75953996f..82cfb2f67a15a 100644 --- a/k8s/crds/kops.k8s.io_clusters.yaml +++ b/k8s/crds/kops.k8s.io_clusters.yaml @@ -4115,6 +4115,12 @@ spec: description: CpuManagerPolicy allows for changing the default policy of None to static type: string + crashLoopBackOffMaxContainerRestartPeriod: + description: CrashLoopBackOffMaxContainerRestartPeriod is the + maximum duration the backoff delay can accrue to for container + restarts, minimum 1 second, maximum 300 seconds. If not set, + defaults to the internal crashloopbackoff maximum (300s). + type: string dockerDisableSharedPID: description: DockerDisableSharedPID was removed. type: boolean @@ -4568,6 +4574,12 @@ spec: description: CpuManagerPolicy allows for changing the default policy of None to static type: string + crashLoopBackOffMaxContainerRestartPeriod: + description: CrashLoopBackOffMaxContainerRestartPeriod is the + maximum duration the backoff delay can accrue to for container + restarts, minimum 1 second, maximum 300 seconds. If not set, + defaults to the internal crashloopbackoff maximum (300s). + type: string dockerDisableSharedPID: description: DockerDisableSharedPID was removed. type: boolean diff --git a/k8s/crds/kops.k8s.io_instancegroups.yaml b/k8s/crds/kops.k8s.io_instancegroups.yaml index c90e8426a4cdb..43067fe06b398 100644 --- a/k8s/crds/kops.k8s.io_instancegroups.yaml +++ b/k8s/crds/kops.k8s.io_instancegroups.yaml @@ -506,6 +506,12 @@ spec: description: CpuManagerPolicy allows for changing the default policy of None to static type: string + crashLoopBackOffMaxContainerRestartPeriod: + description: CrashLoopBackOffMaxContainerRestartPeriod is the + maximum duration the backoff delay can accrue to for container + restarts, minimum 1 second, maximum 300 seconds. If not set, + defaults to the internal crashloopbackoff maximum (300s). + type: string dockerDisableSharedPID: description: DockerDisableSharedPID was removed. type: boolean diff --git a/nodeup/pkg/model/kubelet.go b/nodeup/pkg/model/kubelet.go index a42d538f92233..43e46f996e7bf 100644 --- a/nodeup/pkg/model/kubelet.go +++ b/nodeup/pkg/model/kubelet.go @@ -26,10 +26,12 @@ import ( "path" "path/filepath" "strings" + "time" awsconfig "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/klog/v2" @@ -242,6 +244,9 @@ func buildKubeletComponentConfig(kubeletConfig *kops.KubeletConfigSpec, provider if providerID != "" { componentConfig.ProviderID = providerID } + componentConfig.CrashLoopBackOff = kubelet.CrashLoopBackOffConfig{ + MaxContainerRestartPeriod: kubeletConfig.CrashLoopBackOffMaxContainerRestartPeriod, + } if kubeletConfig.ShutdownGracePeriod != nil { componentConfig.ShutdownGracePeriod = *kubeletConfig.ShutdownGracePeriod } @@ -648,6 +653,17 @@ func (b *KubeletBuilder) buildKubeletConfigSpec(ctx context.Context) (*kops.Kube c.ClientCAFile = filepath.Join(b.PathSrvKubernetes(), "ca.crt") + // Wait less for pods to restart, especially during the bootstrap sequence + if b.IsMaster && c.CrashLoopBackOffMaxContainerRestartPeriod == nil { + c.CrashLoopBackOffMaxContainerRestartPeriod = &metav1.Duration{Duration: time.Minute} + } + if c.CrashLoopBackOffMaxContainerRestartPeriod != nil { + if c.FeatureGates == nil { + c.FeatureGates = make(map[string]string) + } + c.FeatureGates["KubeletCrashLoopBackOffMax"] = "true" + } + // Respect any MaxPods value the user sets explicitly. if (b.NodeupConfig.Networking.AmazonVPC != nil || (b.NodeupConfig.Networking.Cilium != nil && b.NodeupConfig.Networking.Cilium.IPAM == kops.CiliumIpamEni)) && c.MaxPods == nil { config, err := awsconfig.LoadDefaultConfig(ctx) diff --git a/pkg/apis/kops/componentconfig.go b/pkg/apis/kops/componentconfig.go index 974c4e5948ec4..7044ae08fc0ef 100644 --- a/pkg/apis/kops/componentconfig.go +++ b/pkg/apis/kops/componentconfig.go @@ -243,6 +243,8 @@ type KubeletConfigSpec struct { // MemorySwapBehavior defines how swap is used by container workloads. // Supported values: LimitedSwap, "UnlimitedSwap. MemorySwapBehavior string `json:"memorySwapBehavior,omitempty"` + // CrashLoopBackOffMaxContainerRestartPeriod is the maximum duration the backoff delay can accrue to for container restarts, minimum 1 second, maximum 300 seconds. If not set, defaults to the internal crashloopbackoff maximum (300s). + CrashLoopBackOffMaxContainerRestartPeriod *metav1.Duration `json:"crashLoopBackOffMaxContainerRestartPeriod,omitempty"` } // KubeProxyConfig defines the configuration for a proxy diff --git a/pkg/apis/kops/v1alpha2/componentconfig.go b/pkg/apis/kops/v1alpha2/componentconfig.go index cd6d87f50a114..f651242e7e329 100644 --- a/pkg/apis/kops/v1alpha2/componentconfig.go +++ b/pkg/apis/kops/v1alpha2/componentconfig.go @@ -243,6 +243,8 @@ type KubeletConfigSpec struct { // MemorySwapBehavior defines how swap is used by container workloads. // Supported values: LimitedSwap, "UnlimitedSwap. MemorySwapBehavior string `json:"memorySwapBehavior,omitempty"` + // CrashLoopBackOffMaxContainerRestartPeriod is the maximum duration the backoff delay can accrue to for container restarts, minimum 1 second, maximum 300 seconds. If not set, defaults to the internal crashloopbackoff maximum (300s). + CrashLoopBackOffMaxContainerRestartPeriod *metav1.Duration `json:"crashLoopBackOffMaxContainerRestartPeriod,omitempty"` } // KubeProxyConfig defines the configuration for a proxy diff --git a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go index 0365638fe7b18..801b704ec95f0 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go @@ -5714,6 +5714,7 @@ func autoConvert_v1alpha2_KubeletConfigSpec_To_kops_KubeletConfigSpec(in *Kubele out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods out.MemorySwapBehavior = in.MemorySwapBehavior + out.CrashLoopBackOffMaxContainerRestartPeriod = in.CrashLoopBackOffMaxContainerRestartPeriod return nil } @@ -5818,6 +5819,7 @@ func autoConvert_kops_KubeletConfigSpec_To_v1alpha2_KubeletConfigSpec(in *kops.K out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods out.MemorySwapBehavior = in.MemorySwapBehavior + out.CrashLoopBackOffMaxContainerRestartPeriod = in.CrashLoopBackOffMaxContainerRestartPeriod return nil } diff --git a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go index dab1ba487501b..a811db1c860bf 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go @@ -4388,6 +4388,11 @@ func (in *KubeletConfigSpec) DeepCopyInto(out *KubeletConfigSpec) { *out = new(v1.Duration) **out = **in } + if in.CrashLoopBackOffMaxContainerRestartPeriod != nil { + in, out := &in.CrashLoopBackOffMaxContainerRestartPeriod, &out.CrashLoopBackOffMaxContainerRestartPeriod + *out = new(v1.Duration) + **out = **in + } return } diff --git a/pkg/apis/kops/v1alpha3/componentconfig.go b/pkg/apis/kops/v1alpha3/componentconfig.go index ea79995cc6343..a5b5875cc7f7b 100644 --- a/pkg/apis/kops/v1alpha3/componentconfig.go +++ b/pkg/apis/kops/v1alpha3/componentconfig.go @@ -241,6 +241,8 @@ type KubeletConfigSpec struct { // MemorySwapBehavior defines how swap is used by container workloads. // Supported values: LimitedSwap, "UnlimitedSwap. MemorySwapBehavior string `json:"memorySwapBehavior,omitempty"` + // CrashLoopBackOffMaxContainerRestartPeriod is the maximum duration the backoff delay can accrue to for container restarts, minimum 1 second, maximum 300 seconds. If not set, defaults to the internal crashloopbackoff maximum (300s). + CrashLoopBackOffMaxContainerRestartPeriod *metav1.Duration `json:"crashLoopBackOffMaxContainerRestartPeriod,omitempty"` } // KubeProxyConfig defines the configuration for a proxy diff --git a/pkg/apis/kops/v1alpha3/zz_generated.conversion.go b/pkg/apis/kops/v1alpha3/zz_generated.conversion.go index cbf6974d27981..b69d627c5396d 100644 --- a/pkg/apis/kops/v1alpha3/zz_generated.conversion.go +++ b/pkg/apis/kops/v1alpha3/zz_generated.conversion.go @@ -6109,6 +6109,7 @@ func autoConvert_v1alpha3_KubeletConfigSpec_To_kops_KubeletConfigSpec(in *Kubele out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods out.MemorySwapBehavior = in.MemorySwapBehavior + out.CrashLoopBackOffMaxContainerRestartPeriod = in.CrashLoopBackOffMaxContainerRestartPeriod return nil } @@ -6213,6 +6214,7 @@ func autoConvert_kops_KubeletConfigSpec_To_v1alpha3_KubeletConfigSpec(in *kops.K out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods out.MemorySwapBehavior = in.MemorySwapBehavior + out.CrashLoopBackOffMaxContainerRestartPeriod = in.CrashLoopBackOffMaxContainerRestartPeriod return nil } diff --git a/pkg/apis/kops/v1alpha3/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha3/zz_generated.deepcopy.go index bf40f83e0ea6b..c69b553aab672 100644 --- a/pkg/apis/kops/v1alpha3/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha3/zz_generated.deepcopy.go @@ -4367,6 +4367,11 @@ func (in *KubeletConfigSpec) DeepCopyInto(out *KubeletConfigSpec) { *out = new(v1.Duration) **out = **in } + if in.CrashLoopBackOffMaxContainerRestartPeriod != nil { + in, out := &in.CrashLoopBackOffMaxContainerRestartPeriod, &out.CrashLoopBackOffMaxContainerRestartPeriod + *out = new(v1.Duration) + **out = **in + } return } diff --git a/pkg/apis/kops/validation/validation.go b/pkg/apis/kops/validation/validation.go index f9855f2eb4649..adc4fbafd7611 100644 --- a/pkg/apis/kops/validation/validation.go +++ b/pkg/apis/kops/validation/validation.go @@ -25,6 +25,7 @@ import ( "path/filepath" "regexp" "strings" + "time" "github.com/aws/aws-sdk-go-v2/aws/arn" "github.com/blang/semver/v4" @@ -966,6 +967,13 @@ func validateKubelet(k *kops.KubeletConfigSpec, c *kops.Cluster, kubeletPath *fi } } + containerRestartPeriod := k.CrashLoopBackOffMaxContainerRestartPeriod + if containerRestartPeriod != nil { + if containerRestartPeriod.Duration < time.Second || containerRestartPeriod.Duration > 300*time.Second { + allErrs = append(allErrs, field.Invalid(kubeletPath.Child("crashLoopBackOffMaxContainerRestartPeriod"), containerRestartPeriod.String(), "crashLoopBackOffMaxContainerRestartPeriod must be a value between 1s and 300s")) + } + } + if k.MemorySwapBehavior != "" { allErrs = append(allErrs, IsValidValue(kubeletPath.Child("memorySwapBehavior"), &k.MemorySwapBehavior, []string{"LimitedSwap", "UnlimitedSwap"})...) } diff --git a/pkg/apis/kops/zz_generated.deepcopy.go b/pkg/apis/kops/zz_generated.deepcopy.go index eec0a833b88a7..f46a917237381 100644 --- a/pkg/apis/kops/zz_generated.deepcopy.go +++ b/pkg/apis/kops/zz_generated.deepcopy.go @@ -4470,6 +4470,11 @@ func (in *KubeletConfigSpec) DeepCopyInto(out *KubeletConfigSpec) { *out = new(v1.Duration) **out = **in } + if in.CrashLoopBackOffMaxContainerRestartPeriod != nil { + in, out := &in.CrashLoopBackOffMaxContainerRestartPeriod, &out.CrashLoopBackOffMaxContainerRestartPeriod + *out = new(v1.Duration) + **out = **in + } return }