From 4ecc09d3948173bd76ce7d6f05212289289213e2 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 22 Jan 2016 10:46:08 -0500 Subject: [PATCH] lib: Allow zclient do-over of connect on initial attempt When a protocol is attempting to connect to the zebra daemon through it's socket. If the inital attempt fails, give it a few more attempts before giving up and leaving the daemon in a bizarre state. This problem was found by Ashley Penney, and Ashley was of immense help in debugging and testing the fix for this issue. Signed-off-by: Donald Sharp Tested-by: Ashley Penney --- lib/zclient.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/zclient.c b/lib/zclient.c index 351bfa99d1..8279fc70a1 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -202,6 +202,7 @@ zclient_socket(void) ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv)); if (ret < 0) { + zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno); close (sock); return -1; } @@ -237,6 +238,7 @@ zclient_socket_un (const char *path) ret = connect (sock, (struct sockaddr *) &addr, len); if (ret < 0) { + zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno); close (sock); return -1; } @@ -443,11 +445,23 @@ zclient_start (struct zclient *zclient) if (zclient->t_connect) return 0; - if (zclient_socket_connect(zclient) < 0) + /* + * If we fail to connect to the socket on initialization, + * Let's wait a second and see if we can reconnect. + * Cause if we don't connect, we never attempt to + * reconnect. On startup if zebra is slow we + * can get into this situation. + */ + while (zclient_socket_connect(zclient) < 0 && zclient->fail < 5) { if (zclient_debug) zlog_debug ("zclient connection fail"); zclient->fail++; + sleep (1); + } + + if (zclient->sock < 0) + { zclient_event (ZCLIENT_CONNECT, zclient); return -1; } -- 2.39.5