summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/user/index.rst1
-rw-r--r--doc/user/subdir.am1
-rw-r--r--doc/user/watchfrr.rst30
-rw-r--r--watchfrr/subdir.am3
-rw-r--r--watchfrr/watchfrr.c37
-rw-r--r--watchfrr/watchfrr.h2
-rw-r--r--watchfrr/watchfrr_vty.c20
7 files changed, 92 insertions, 2 deletions
diff --git a/doc/user/index.rst b/doc/user/index.rst
index 6c3b14e062..416c51f13b 100644
--- a/doc/user/index.rst
+++ b/doc/user/index.rst
@@ -58,6 +58,7 @@ Protocols
vnc
vrrp
bmp
+ watchfrr
########
Appendix
diff --git a/doc/user/subdir.am b/doc/user/subdir.am
index 0f0a8a0774..ce519fbfbf 100644
--- a/doc/user/subdir.am
+++ b/doc/user/subdir.am
@@ -43,6 +43,7 @@ user_RSTFILES = \
doc/user/zebra.rst \
doc/user/bfd.rst \
doc/user/flowspec.rst \
+ doc/user/watchfrr.rst \
# end
EXTRA_DIST += \
diff --git a/doc/user/watchfrr.rst b/doc/user/watchfrr.rst
new file mode 100644
index 0000000000..df04a1e375
--- /dev/null
+++ b/doc/user/watchfrr.rst
@@ -0,0 +1,30 @@
+.. _watchfrr:
+
+********
+WATCHFRR
+********
+
+:abbr:`WATCHFRR` is a daemon that handles failed daemon processes and
+intelligently restarts them as needed.
+
+Starting WATCHFRR
+=================
+
+WATCHFRR is started as per normal systemd startup and typically does not
+require end users management.
+
+WATCHFRR commands
+=================
+
+.. index:: show watchfrr
+.. clicmd:: show watchfrr
+
+ Give status information about the state of the different daemons being
+ watched by WATCHFRR
+
+.. index:: [no] watchfrr ignore DAEMON
+.. clicmd:: [no] watchfrr ignore DAEMON
+
+ Tell WATCHFRR to ignore a particular DAEMON if it goes unresponsive.
+ This is particularly useful when you are a developer and need to debug
+ a working system, without watchfrr pulling the rug out from under you.
diff --git a/watchfrr/subdir.am b/watchfrr/subdir.am
index c27491e55c..30f606c202 100644
--- a/watchfrr/subdir.am
+++ b/watchfrr/subdir.am
@@ -19,3 +19,6 @@ watchfrr_watchfrr_SOURCES = \
watchfrr/watchfrr_errors.c \
watchfrr/watchfrr_vty.c \
# end
+
+watchfrr/watchfrr_vty_clippy.c: $(CLIPPY_DEPS)
+watchfrr/watchfrr_vty.$(OBJEXT): watchfrr/watchfrr_vty_clippy.c
diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c
index c17d381730..a6a910a1db 100644
--- a/watchfrr/watchfrr.c
+++ b/watchfrr/watchfrr.c
@@ -159,6 +159,15 @@ struct daemon {
struct thread *t_write;
struct daemon *next;
struct restart_info restart;
+
+ /*
+ * For a given daemon, if we've turned on ignore timeouts
+ * ignore the timeout value and assume everything is ok
+ * This is for daemon debugging w/ gdb after we have started
+ * FRR and realize we have something that needs to be looked
+ * at
+ */
+ bool ignore_timeout;
};
#define OPTION_MINRESTART 2000
@@ -191,6 +200,25 @@ static void phase_check(void);
static void restart_done(struct daemon *dmn);
static const char *progname;
+
+void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, bool ignore)
+{
+ struct daemon *dmn;
+
+ for (dmn = gs.daemons; dmn; dmn = dmn->next) {
+ if (strncmp(dmn->name, dname, strlen(dmn->name)) == 0)
+ break;
+ }
+
+ if (dmn) {
+ dmn->ignore_timeout = ignore;
+ vty_out(vty, "%s switching to %s\n", dmn->name,
+ ignore ? "ignore" : "watch");
+ } else
+ vty_out(vty, "%s is not configured for running at the moment",
+ dname);
+}
+
static void printhelp(FILE *target)
{
fprintf(target,
@@ -533,7 +561,9 @@ static int wakeup_init(struct thread *t_wakeup)
static void restart_done(struct daemon *dmn)
{
if (dmn->state != DAEMON_DOWN) {
- zlog_warn("wtf?");
+ zlog_warn(
+ "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
+ dmn->name, state_str[dmn->state]);
return;
}
if (dmn->t_wakeup)
@@ -961,6 +991,8 @@ static int wakeup_no_answer(struct thread *t_wakeup)
dmn->t_wakeup = NULL;
dmn->state = DAEMON_UNRESPONSIVE;
+ if (dmn->ignore_timeout)
+ return 0;
flog_err(EC_WATCHFRR_CONNECTION,
"%s state -> unresponsive : no response yet to ping "
"sent %ld seconds ago",
@@ -1014,7 +1046,8 @@ void watchfrr_status(struct vty *vty)
(long)gs.restart.pid);
for (dmn = gs.daemons; dmn; dmn = dmn->next) {
- vty_out(vty, " %-20s %s\n", dmn->name, state_str[dmn->state]);
+ vty_out(vty, " %-20s %s%s", dmn->name, state_str[dmn->state],
+ dmn->ignore_timeout ? "/Ignoring Timeout\n" : "\n");
if (dmn->restart.pid)
vty_out(vty, " restart running, pid %ld\n",
(long)dmn->restart.pid);
diff --git a/watchfrr/watchfrr.h b/watchfrr/watchfrr.h
index c5f54769bd..ba6e94960f 100644
--- a/watchfrr/watchfrr.h
+++ b/watchfrr/watchfrr.h
@@ -41,4 +41,6 @@ extern void watchfrr_status(struct vty *vty);
*/
extern bool check_all_up(void);
+extern void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname,
+ bool ignore);
#endif /* FRR_WATCHFRR_H */
diff --git a/watchfrr/watchfrr_vty.c b/watchfrr/watchfrr_vty.c
index 9b844d67f2..c06cb89382 100644
--- a/watchfrr/watchfrr_vty.c
+++ b/watchfrr/watchfrr_vty.c
@@ -134,6 +134,23 @@ DEFUN (show_watchfrr,
return CMD_SUCCESS;
}
+#ifndef VTYSH_EXTRACT_PL
+#include "watchfrr/watchfrr_vty_clippy.c"
+#endif
+
+DEFPY (watchfrr_ignore_daemon,
+ watchfrr_ignore_daemon_cmd,
+ "[no] watchfrr ignore DAEMON$dname",
+ NO_STR
+ "Watchfrr Specific sub-command\n"
+ "Ignore a specified daemon when it does not respond to echo request\n"
+ "The daemon to ignore\n")
+{
+ watchfrr_set_ignore_daemon(vty, dname, no ? false : true );
+
+ return CMD_SUCCESS;
+}
+
void integrated_write_sigchld(int status)
{
uint8_t reply[4] = {0, 0, 0, CMD_WARNING};
@@ -168,6 +185,9 @@ void watchfrr_vty_init(void)
integrated_write_pid = -1;
install_element(ENABLE_NODE, &config_write_integrated_cmd);
install_element(ENABLE_NODE, &show_debugging_watchfrr_cmd);
+
+ install_element(ENABLE_NODE, &watchfrr_ignore_daemon_cmd);
+
install_element(CONFIG_NODE, &show_debugging_watchfrr_cmd);
install_element(VIEW_NODE, &show_watchfrr_cmd);
}