From cc53b605e6b200caa0654344771e532cb880d693 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Mon, 16 Sep 2019 13:47:50 -0400 Subject: [PATCH] watchfrr: Allow end users to turn off watchfrr for a particular daemon Allow an end user who is debugging behavior, with say gdb, to turn off watchfrr and it's attempts to keep control of a daemons up/responsiveness With code change: donna.cumulusnetworks.com# show watchfrr watchfrr global phase: Idle zebra Up bgpd Up/Ignoring Timeout staticd Up Now grab bgpd with gdb: sharpd@donna ~/frr4> date ; sudo gdb -p 27893 Mon 16 Sep 2019 01:44:57 PM EDT GNU gdb (GDB) Fedora 8.3-6.fc30 Copyright (C) 2019 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "x86_64-redhat-linux-gnu". Type "show configuration" for configuration details. For bug reporting instructions, please see: . Find the GDB manual and other documentation resources online at: . For help, type "help". Type "apropos word" to search for commands related to "word". Attaching to process 27893 [New LWP 27894] [New LWP 27895] [New LWP 27896] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". 0x00007f1787a3e5c7 in poll () from /lib64/libc.so.6 Missing separate debuginfos, use: dnf debuginfo-install glibc-2.29-15.fc30.x86_64 gperftools-libs-2.7-5.fc30.x86_64 json-c-0.13.1-4.fc30.x86_64 libcap-2.26-5.fc30.x86_64 libgcc-9.1.1-1.fc30.x86_64 libgcrypt-1.8.4-3.fc30.x86_64 libgpg-error-1.33-2.fc30.x86_64 libstdc++-9.1.1-1.fc30.x86_64 libxcrypt-4.4.6-2.fc30.x86_64 libyang-0.16.105-1.fc30.x86_64 lua-libs-5.3.5-5.fc30.x86_64 lz4-libs-1.8.3-2.fc30.x86_64 pcre-8.43-2.fc30.x86_64 xz-libs-5.2.4-5.fc30.x86_64 (gdb) In another window we can see when watchfrr thinks it's not responding: donna.cumulusnetworks.com# show watchfrr watchfrr global phase: Idle zebra Up bgpd Unresponsive/Ignoring Timeout staticd Up Finally exit gdb and watchfrr now believes bgpd is good to go again: donna.cumulusnetworks.com# show watchfrr watchfrr global phase: Idle zebra Up bgpd Up/Ignoring Timeout staticd Up Signed-off-by: Donald Sharp --- watchfrr/subdir.am | 3 +++ watchfrr/watchfrr.c | 33 ++++++++++++++++++++++++++++++++- watchfrr/watchfrr.h | 2 ++ watchfrr/watchfrr_vty.c | 20 ++++++++++++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/watchfrr/subdir.am b/watchfrr/subdir.am index c27491e55c..30f606c202 100644 --- a/watchfrr/subdir.am +++ b/watchfrr/subdir.am @@ -19,3 +19,6 @@ watchfrr_watchfrr_SOURCES = \ watchfrr/watchfrr_errors.c \ watchfrr/watchfrr_vty.c \ # end + +watchfrr/watchfrr_vty_clippy.c: $(CLIPPY_DEPS) +watchfrr/watchfrr_vty.$(OBJEXT): watchfrr/watchfrr_vty_clippy.c diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index c17d381730..7586718cd4 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -159,6 +159,15 @@ struct daemon { struct thread *t_write; struct daemon *next; struct restart_info restart; + + /* + * For a given daemon, if we've turned on ignore timeouts + * ignore the timeout value and assume everything is ok + * This is for daemon debugging w/ gdb after we have started + * FRR and realize we have something that needs to be looked + * at + */ + bool ignore_timeout; }; #define OPTION_MINRESTART 2000 @@ -191,6 +200,25 @@ static void phase_check(void); static void restart_done(struct daemon *dmn); static const char *progname; + +void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, bool ignore) +{ + struct daemon *dmn; + + for (dmn = gs.daemons; dmn; dmn = dmn->next) { + if (strncmp(dmn->name, dname, strlen(dmn->name)) == 0) + break; + } + + if (dmn) { + dmn->ignore_timeout = ignore; + vty_out(vty, "%s switching to %s\n", dmn->name, + ignore ? "ignore" : "watch"); + } else + vty_out(vty, "%s is not configured for running at the moment", + dname); +} + static void printhelp(FILE *target) { fprintf(target, @@ -961,6 +989,8 @@ static int wakeup_no_answer(struct thread *t_wakeup) dmn->t_wakeup = NULL; dmn->state = DAEMON_UNRESPONSIVE; + if (dmn->ignore_timeout) + return 0; flog_err(EC_WATCHFRR_CONNECTION, "%s state -> unresponsive : no response yet to ping " "sent %ld seconds ago", @@ -1014,7 +1044,8 @@ void watchfrr_status(struct vty *vty) (long)gs.restart.pid); for (dmn = gs.daemons; dmn; dmn = dmn->next) { - vty_out(vty, " %-20s %s\n", dmn->name, state_str[dmn->state]); + vty_out(vty, " %-20s %s%s", dmn->name, state_str[dmn->state], + dmn->ignore_timeout ? "/Ignoring Timeout\n" : "\n"); if (dmn->restart.pid) vty_out(vty, " restart running, pid %ld\n", (long)dmn->restart.pid); diff --git a/watchfrr/watchfrr.h b/watchfrr/watchfrr.h index c5f54769bd..ba6e94960f 100644 --- a/watchfrr/watchfrr.h +++ b/watchfrr/watchfrr.h @@ -41,4 +41,6 @@ extern void watchfrr_status(struct vty *vty); */ extern bool check_all_up(void); +extern void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, + bool ignore); #endif /* FRR_WATCHFRR_H */ diff --git a/watchfrr/watchfrr_vty.c b/watchfrr/watchfrr_vty.c index 9b844d67f2..c06cb89382 100644 --- a/watchfrr/watchfrr_vty.c +++ b/watchfrr/watchfrr_vty.c @@ -134,6 +134,23 @@ DEFUN (show_watchfrr, return CMD_SUCCESS; } +#ifndef VTYSH_EXTRACT_PL +#include "watchfrr/watchfrr_vty_clippy.c" +#endif + +DEFPY (watchfrr_ignore_daemon, + watchfrr_ignore_daemon_cmd, + "[no] watchfrr ignore DAEMON$dname", + NO_STR + "Watchfrr Specific sub-command\n" + "Ignore a specified daemon when it does not respond to echo request\n" + "The daemon to ignore\n") +{ + watchfrr_set_ignore_daemon(vty, dname, no ? false : true ); + + return CMD_SUCCESS; +} + void integrated_write_sigchld(int status) { uint8_t reply[4] = {0, 0, 0, CMD_WARNING}; @@ -168,6 +185,9 @@ void watchfrr_vty_init(void) integrated_write_pid = -1; install_element(ENABLE_NODE, &config_write_integrated_cmd); install_element(ENABLE_NODE, &show_debugging_watchfrr_cmd); + + install_element(ENABLE_NODE, &watchfrr_ignore_daemon_cmd); + install_element(CONFIG_NODE, &show_debugging_watchfrr_cmd); install_element(VIEW_NODE, &show_watchfrr_cmd); } -- 2.39.5