From 26931816c42425c4408a113a19a88db7439470e7 Mon Sep 17 00:00:00 2001 From: Dong Chen Date: Tue, 15 Mar 2016 16:48:33 -0700 Subject: [PATCH] Manager should retry EventMonitoring on failure. Signed-off-by: Dong Chen --- cluster/engine.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cluster/engine.go b/cluster/engine.go index 645804b174..6e72273100 100644 --- a/cluster/engine.go +++ b/cluster/engine.go @@ -183,10 +183,13 @@ func (e *Engine) StartMonitorEvents() { e.client.StartMonitorEvents(e.handler, ec) go func() { - if err := <-ec; err != nil && !strings.Contains(err.Error(), "EOF") { - log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Error monitoring events: %s", err) - } else if err != nil { - log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Debug("EOF monitoring events, restarting") + if err := <-ec; err != nil { + log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Error monitoring events: %s.", err) + if !strings.Contains(err.Error(), "EOF") { + // failing node reconnect should use back-off strategy + <-e.refreshDelayer.Wait(e.failureCount) + } + log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Restart event monitoring.") e.StartMonitorEvents() } close(ec)