From 5cd90ae490cd24ca7f7a95363bde9fc62455398b Mon Sep 17 00:00:00 2001 From: Johann Wagner Date: Wed, 19 Jun 2019 12:28:17 +0200 Subject: [PATCH] update-agent: Added reboot-wait parameter This adds an reboot-wait parameter, which waits, after the last pod was terminated, an fixed amount of time to finalize operations before reboot. This solves some problems this storage provisioners like rook. --- cmd/update-agent/main.go | 6 +++++- pkg/agent/agent.go | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cmd/update-agent/main.go b/cmd/update-agent/main.go index 252c6fa3..5ba41060 100644 --- a/cmd/update-agent/main.go +++ b/cmd/update-agent/main.go @@ -17,6 +17,7 @@ var ( node = flag.String("node", "", "Kubernetes node name") printVersion = flag.Bool("version", false, "Print version and exit") reapTimeout = flag.Int("grace-period", 600, "Period of time in seconds given to a pod to terminate when rebooting for an update") + rebootWait = flag.Int("reboot-wait", 0, "Period of time in seconds waiting after last pod deletion for reboot") ) func main() { @@ -37,7 +38,10 @@ func main() { } rt := time.Duration(*reapTimeout) * time.Second - a, err := agent.New(*node, rt) + rw := time.Duration(*rebootWait) * time.Second + + glog.Infof("Waiting %v for reboot", rw) + a, err := agent.New(*node, rt, rw) if err != nil { glog.Fatalf("Failed to initialize %s: %v", os.Args[0], err) } diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index ae0f7801..f2654621 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -29,6 +29,7 @@ type Klocksmith struct { ue *updateengine.Client lc *login1.Conn reapTimeout time.Duration + rebootWait time.Duration } const defaultPollInterval = 10 * time.Second @@ -40,7 +41,7 @@ var ( }).AsSelector() ) -func New(node string, reapTimeout time.Duration) (*Klocksmith, error) { +func New(node string, reapTimeout time.Duration, rebootWait time.Duration) (*Klocksmith, error) { // set up kubernetes in-cluster client kc, err := k8sutil.GetClient("") if err != nil { @@ -62,7 +63,7 @@ func New(node string, reapTimeout time.Duration) (*Klocksmith, error) { return nil, fmt.Errorf("error establishing connection to logind dbus: %v", err) } - return &Klocksmith{node, kc, nc, ue, lc, reapTimeout}, nil + return &Klocksmith{node, kc, nc, ue, lc, reapTimeout, rebootWait}, nil } // Run starts the agent to listen for an update_engine reboot signal and react @@ -220,6 +221,11 @@ func (k *Klocksmith) process(stop <-chan struct{}) error { } wg.Wait() + // We wait a little bit more time to perform finalizing operations + // This solves problems with some storage provisioners like rook. + glog.Infof("Waiting for finalizing operations, waiting %v", k.rebootWait) + time.Sleep(k.rebootWait) + glog.Info("Node drained, rebooting") // reboot