Skip to content

kubelet: Wait less for control-plane pods to restart #17510

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions k8s/crds/kops.k8s.io_clusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4115,6 +4115,12 @@ spec:
description: CpuManagerPolicy allows for changing the default
policy of None to static
type: string
crashLoopBackOffMaxContainerRestartPeriod:
description: CrashLoopBackOffMaxContainerRestartPeriod is the
maximum duration the backoff delay can accrue to for container
restarts, minimum 1 second, maximum 300 seconds. If not set,
defaults to the internal crashloopbackoff maximum (300s).
type: string
dockerDisableSharedPID:
description: DockerDisableSharedPID was removed.
type: boolean
Expand Down Expand Up @@ -4568,6 +4574,12 @@ spec:
description: CpuManagerPolicy allows for changing the default
policy of None to static
type: string
crashLoopBackOffMaxContainerRestartPeriod:
description: CrashLoopBackOffMaxContainerRestartPeriod is the
maximum duration the backoff delay can accrue to for container
restarts, minimum 1 second, maximum 300 seconds. If not set,
defaults to the internal crashloopbackoff maximum (300s).
type: string
dockerDisableSharedPID:
description: DockerDisableSharedPID was removed.
type: boolean
Expand Down
6 changes: 6 additions & 0 deletions k8s/crds/kops.k8s.io_instancegroups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,12 @@ spec:
description: CpuManagerPolicy allows for changing the default
policy of None to static
type: string
crashLoopBackOffMaxContainerRestartPeriod:
description: CrashLoopBackOffMaxContainerRestartPeriod is the
maximum duration the backoff delay can accrue to for container
restarts, minimum 1 second, maximum 300 seconds. If not set,
defaults to the internal crashloopbackoff maximum (300s).
type: string
dockerDisableSharedPID:
description: DockerDisableSharedPID was removed.
type: boolean
Expand Down
16 changes: 16 additions & 0 deletions nodeup/pkg/model/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ import (
"path"
"path/filepath"
"strings"
"time"

awsconfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/klog/v2"
Expand Down Expand Up @@ -242,6 +244,9 @@ func buildKubeletComponentConfig(kubeletConfig *kops.KubeletConfigSpec, provider
if providerID != "" {
componentConfig.ProviderID = providerID
}
componentConfig.CrashLoopBackOff = kubelet.CrashLoopBackOffConfig{
MaxContainerRestartPeriod: kubeletConfig.CrashLoopBackOffMaxContainerRestartPeriod,
}
if kubeletConfig.ShutdownGracePeriod != nil {
componentConfig.ShutdownGracePeriod = *kubeletConfig.ShutdownGracePeriod
}
Expand Down Expand Up @@ -648,6 +653,17 @@ func (b *KubeletBuilder) buildKubeletConfigSpec(ctx context.Context) (*kops.Kube

c.ClientCAFile = filepath.Join(b.PathSrvKubernetes(), "ca.crt")

// Wait less for pods to restart, especially during the bootstrap sequence
if b.IsMaster && c.CrashLoopBackOffMaxContainerRestartPeriod == nil {
c.CrashLoopBackOffMaxContainerRestartPeriod = &metav1.Duration{Duration: time.Minute}
}
if c.CrashLoopBackOffMaxContainerRestartPeriod != nil {
if c.FeatureGates == nil {
c.FeatureGates = make(map[string]string)
}
c.FeatureGates["KubeletCrashLoopBackOffMax"] = "true"
}

// Respect any MaxPods value the user sets explicitly.
if (b.NodeupConfig.Networking.AmazonVPC != nil || (b.NodeupConfig.Networking.Cilium != nil && b.NodeupConfig.Networking.Cilium.IPAM == kops.CiliumIpamEni)) && c.MaxPods == nil {
config, err := awsconfig.LoadDefaultConfig(ctx)
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/kops/componentconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ type KubeletConfigSpec struct {
// MemorySwapBehavior defines how swap is used by container workloads.
// Supported values: LimitedSwap, "UnlimitedSwap.
MemorySwapBehavior string `json:"memorySwapBehavior,omitempty"`
// CrashLoopBackOffMaxContainerRestartPeriod is the maximum duration the backoff delay can accrue to for container restarts, minimum 1 second, maximum 300 seconds. If not set, defaults to the internal crashloopbackoff maximum (300s).
CrashLoopBackOffMaxContainerRestartPeriod *metav1.Duration `json:"crashLoopBackOffMaxContainerRestartPeriod,omitempty"`
}

// KubeProxyConfig defines the configuration for a proxy
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/kops/v1alpha2/componentconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ type KubeletConfigSpec struct {
// MemorySwapBehavior defines how swap is used by container workloads.
// Supported values: LimitedSwap, "UnlimitedSwap.
MemorySwapBehavior string `json:"memorySwapBehavior,omitempty"`
// CrashLoopBackOffMaxContainerRestartPeriod is the maximum duration the backoff delay can accrue to for container restarts, minimum 1 second, maximum 300 seconds. If not set, defaults to the internal crashloopbackoff maximum (300s).
CrashLoopBackOffMaxContainerRestartPeriod *metav1.Duration `json:"crashLoopBackOffMaxContainerRestartPeriod,omitempty"`
}

// KubeProxyConfig defines the configuration for a proxy
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/kops/v1alpha2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkg/apis/kops/v1alpha3/componentconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ type KubeletConfigSpec struct {
// MemorySwapBehavior defines how swap is used by container workloads.
// Supported values: LimitedSwap, "UnlimitedSwap.
MemorySwapBehavior string `json:"memorySwapBehavior,omitempty"`
// CrashLoopBackOffMaxContainerRestartPeriod is the maximum duration the backoff delay can accrue to for container restarts, minimum 1 second, maximum 300 seconds. If not set, defaults to the internal crashloopbackoff maximum (300s).
CrashLoopBackOffMaxContainerRestartPeriod *metav1.Duration `json:"crashLoopBackOffMaxContainerRestartPeriod,omitempty"`
}

// KubeProxyConfig defines the configuration for a proxy
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/kops/v1alpha3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/apis/kops/v1alpha3/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions pkg/apis/kops/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"regexp"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws/arn"
"github.com/blang/semver/v4"
Expand Down Expand Up @@ -966,6 +967,13 @@ func validateKubelet(k *kops.KubeletConfigSpec, c *kops.Cluster, kubeletPath *fi
}
}

containerRestartPeriod := k.CrashLoopBackOffMaxContainerRestartPeriod
if containerRestartPeriod != nil {
if containerRestartPeriod.Duration < time.Second || containerRestartPeriod.Duration > 300*time.Second {
allErrs = append(allErrs, field.Invalid(kubeletPath.Child("crashLoopBackOffMaxContainerRestartPeriod"), containerRestartPeriod.String(), "crashLoopBackOffMaxContainerRestartPeriod must be a value between 1s and 300s"))
}
}

if k.MemorySwapBehavior != "" {
allErrs = append(allErrs, IsValidValue(kubeletPath.Child("memorySwapBehavior"), &k.MemorySwapBehavior, []string{"LimitedSwap", "UnlimitedSwap"})...)
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/kops/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading