From f168b713002ed079a7d00f880567e68fe5ab547b Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Thu, 1 Jun 2017 14:16:26 +0200 Subject: watchfrr: remove abundance of modes This leaves what were previously modes 0 (monitor-only) and 3 (restart daemons individually, but restart everything if zebra is restarted). Signed-off-by: David Lamparter --- watchfrr/watchfrr.c | 273 ++++++++++++---------------------------------------- 1 file changed, 63 insertions(+), 210 deletions(-) (limited to 'watchfrr') diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index 1aa25b41d0..4708059c7d 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -66,21 +66,7 @@ /* Needs to be global, referenced somewhere inside libfrr. */ struct thread_master *master; -typedef enum { - MODE_MONITOR = 0, - MODE_GLOBAL_RESTART, - MODE_SEPARATE_RESTART, - MODE_PHASED_ZEBRA_RESTART, - MODE_PHASED_ALL_RESTART -} watch_mode_t; - -static const char *mode_str[] = { - "monitor", - "global restart", - "individual daemon restart", - "phased zebra restart", - "phased global restart for any failure", -}; +static bool watch_only = false; typedef enum { PHASE_NONE = 0, @@ -112,7 +98,6 @@ struct restart_info { }; static struct global_state { - watch_mode_t mode; restart_phase_t phase; struct thread *t_phase_hanging; const char *vtydir; @@ -134,7 +119,6 @@ static struct global_state { int numpids; int numdown; /* # of daemons that are not UP or UNRESPONSIVE */ } gs = { - .mode = MODE_MONITOR, .phase = PHASE_NONE, .vtydir = VTYDIR, .period = 1000 * DEFAULT_PERIOD, @@ -176,6 +160,7 @@ struct daemon { #define OPTION_MINRESTART 2000 #define OPTION_MAXRESTART 2001 +#define OPTION_DRY 2002 static const struct option longopts[] = { {"daemon", no_argument, NULL, 'd'}, @@ -188,10 +173,8 @@ static const struct option longopts[] = { {"restart", required_argument, NULL, 'r'}, {"start-command", required_argument, NULL, 's'}, {"kill-command", required_argument, NULL, 'k'}, - {"restart-all", required_argument, NULL, 'R'}, - {"all-restart", no_argument, NULL, 'a'}, - {"always-all-restart", no_argument, NULL, 'A'}, {"unresponsive-restart", no_argument, NULL, 'z'}, + {"dry", no_argument, NULL, OPTION_DRY}, {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART}, {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART}, {"pid-file", required_argument, NULL, 'p'}, @@ -217,46 +200,13 @@ It then repeatedly sends echo commands over that socket to determine whether\n\ the daemon is responsive. If the daemon crashes, we will receive an EOF\n\ on the socket connection and know immediately that the daemon is down.\n\n\ The daemons to be monitored should be listed on the command line.\n\n\ -This program can run in one of 5 modes:\n\n\ -0. Mode: %s.\n\ - Just monitor and report on status changes. Example:\n\ - %s -d zebra ospfd bgpd\n\n\ -1. Mode: %s.\n\ - Whenever any daemon hangs or crashes, use the given command to restart\n\ - them all. Example:\n\ - %s -dz \\\n\ - -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\ - zebra ospfd\n\n\ -2. Mode: %s.\n\ - When any single daemon hangs or crashes, restart only the daemon that's\n\ - in trouble using the supplied restart command. Example:\n\ - %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\ -3. Mode: %s.\n\ - The same as the previous mode, except that there is special treatment when\n\ - the zebra daemon is in trouble. In that case, a phased restart approach\n\ - is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\ - daemons. Example:\n\ - %s -adz -r '/sbin/service %%s restart' \\\n\ - -s '/sbin/service %%s start' \\\n\ - -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\ -4. Mode: %s.\n\ - This is the same as the previous mode, except that the phased restart\n\ - procedure is used whenever any of the daemons hangs or crashes. Example:\n\ - %s -Adz -r '/sbin/service %%s restart' \\\n\ - -s '/sbin/service %%s start' \\\n\ - -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\ -As of this writing, it is believed that mode 2 [%s]\n\ -is not safe, and mode 3 [%s] may not be safe with some of the\n\ -routing daemons.\n\n\ In order to avoid attempting to restart the daemons in a fast loop,\n\ the -m and -M options allow you to control the minimum delay between\n\ restart commands. The minimum restart delay is recalculated each time\n\ a restart is attempted: if the time since the last restart attempt exceeds\n\ twice the -M value, then the restart delay is set to the -m value.\n\ Otherwise, the interval is doubled (but capped at the -M value).\n\n", - progname, mode_str[0], progname, mode_str[1], progname, - mode_str[2], progname, mode_str[3], progname, mode_str[4], - progname, mode_str[2], mode_str[3]); + progname); fprintf(target, "Options:\n\ @@ -285,7 +235,6 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n", -r, --restart Supply a Bourne shell command to use to restart a single\n\ daemon. The command string should include '%%s' where the\n\ name of the daemon should be substituted.\n\ - Note that -r and -R are incompatible.\n\ -s, --start-command\n\ Supply a Bourne shell to command to use to start a single\n\ daemon. The command string should include '%%s' where the\n\ @@ -294,26 +243,15 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n", Supply a Bourne shell to command to use to stop a single\n\ daemon. The command string should include '%%s' where the\n\ name of the daemon should be substituted.\n\ --R, --restart-all\n\ - When one or more daemons is down, try to restart everything\n\ - using the Bourne shell command supplied as the argument.\n\ - Note that -r and -R are incompatible.\n\ -z, --unresponsive-restart\n\ When a daemon is unresponsive, treat it as being down for\n\ restart purposes.\n\ --a, --all-restart\n\ - When zebra hangs or crashes, restart all daemons using\n\ - this phased approach: 1. stop all other daemons; 2. restart\n\ - zebra; 3. start other daemons. Requires -r, -s, and -k.\n\ --A, --always-all-restart\n\ - When any daemon (not just zebra) hangs or crashes, use the\n\ - same phased restart mechanism described above for -a.\n\ - Requires -r, -s, and -k.\n\ + --dry Do not start or restart anything, just log.\n\ -p, --pid-file Set process identifier file name\n\ (default is %s).\n\ -b, --blank-string\n\ When the supplied argument string is found in any of the\n\ - various shell command arguments (-r, -s, -k, or -R), replace\n\ + various shell command arguments (-r, -s, or -k), replace\n\ it with a space. This is an ugly hack to circumvent problems\n\ passing command-line arguments with embedded spaces.\n\ -v, --version Print program version\n\ @@ -390,15 +328,10 @@ static int restart_kill(struct thread *t_kill) static struct restart_info *find_child(pid_t child) { - if (gs.mode == MODE_GLOBAL_RESTART) { - if (gs.restart.pid == child) - return &gs.restart; - } else { - struct daemon *dmn; - for (dmn = gs.daemons; dmn; dmn = dmn->next) { - if (dmn->restart.pid == child) - return &dmn->restart; - } + struct daemon *dmn; + for (dmn = gs.daemons; dmn; dmn = dmn->next) { + if (dmn->restart.pid == child) + return &dmn->restart; } return NULL; } @@ -887,61 +820,46 @@ static void phase_check(void) static void try_restart(struct daemon *dmn) { - switch (gs.mode) { - case MODE_MONITOR: + if (watch_only) return; - case MODE_GLOBAL_RESTART: - run_job(&gs.restart, "restart", gs.restart_command, 0, 1); - break; - case MODE_SEPARATE_RESTART: - run_job(&dmn->restart, "restart", gs.restart_command, 0, 1); - break; - case MODE_PHASED_ZEBRA_RESTART: - if (dmn != gs.special) { - if ((gs.special->state == DAEMON_UP) - && (gs.phase == PHASE_NONE)) - run_job(&dmn->restart, "restart", - gs.restart_command, 0, 1); - else - zlog_debug( - "%s: postponing restart attempt because master %s daemon " - "not up [%s], or phased restart in progress", - dmn->name, gs.special->name, - state_str[gs.special->state]); - break; - } - /*FALLTHRU*/ - case MODE_PHASED_ALL_RESTART: - if ((gs.phase != PHASE_NONE) || gs.numpids) { + if (dmn != gs.special) { + if ((gs.special->state == DAEMON_UP) + && (gs.phase == PHASE_NONE)) + run_job(&dmn->restart, "restart", gs.restart_command, 0, + 1); + else + zlog_debug( + "%s: postponing restart attempt because master %s daemon " + "not up [%s], or phased restart in progress", + dmn->name, gs.special->name, + state_str[gs.special->state]); + return; + } + + if ((gs.phase != PHASE_NONE) || gs.numpids) { + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug( + "postponing phased global restart: restart already in " + "progress [%s], or outstanding child processes [%d]", + phase_str[gs.phase], gs.numpids); + return; + } + /* Is it too soon for a restart? */ + { + struct timeval delay; + if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec + < gs.special->restart.interval) { if (gs.loglevel > LOG_DEBUG + 1) zlog_debug( - "postponing phased global restart: restart already in " - "progress [%s], or outstanding child processes [%d]", - phase_str[gs.phase], gs.numpids); - break; - } - /* Is it too soon for a restart? */ - { - struct timeval delay; - if (time_elapsed(&delay, &gs.special->restart.time) - ->tv_sec - < gs.special->restart.interval) { - if (gs.loglevel > LOG_DEBUG + 1) - zlog_debug( - "postponing phased global restart: " - "elapsed time %ld < retry interval %ld", - (long)delay.tv_sec, - gs.special->restart.interval); - break; - } + "postponing phased global restart: " + "elapsed time %ld < retry interval %ld", + (long)delay.tv_sec, + gs.special->restart.interval); + return; } - run_job(&gs.restart, "restart", gs.restart_command, 0, 1); - break; - default: - zlog_err("error: unknown restart mode %d", gs.mode); - break; } + run_job(&gs.restart, "restart", gs.restart_command, 0, 1); } static int wakeup_unresponsive(struct thread *t_wakeup) @@ -1079,34 +997,19 @@ int main(int argc, char **argv) frr_preinit(&watchfrr_di, argc, argv); progname = watchfrr_di.progname; - frr_opt_add("aAb:dek:l:i:p:r:R:S:s:t:T:z", longopts, ""); + frr_opt_add("b:dek:l:i:p:r:S:s:t:T:z", longopts, ""); gs.restart.name = "all"; while ((opt = frr_getopt(argc, argv, NULL)) != EOF) { switch (opt) { case 0: break; - case 'a': - if ((gs.mode != MODE_MONITOR) - && (gs.mode != MODE_SEPARATE_RESTART)) { - fputs("Ambiguous operating mode selected.\n", - stderr); - frr_help_exit(1); - } - gs.mode = MODE_PHASED_ZEBRA_RESTART; - break; - case 'A': - if ((gs.mode != MODE_MONITOR) - && (gs.mode != MODE_SEPARATE_RESTART)) { - fputs("Ambiguous operating mode selected.\n", - stderr); - frr_help_exit(1); - } - gs.mode = MODE_PHASED_ALL_RESTART; - break; case 'b': blankstr = optarg; break; + case OPTION_DRY: + watch_only = true; + break; case 'e': gs.do_ping = 0; break; @@ -1170,12 +1073,6 @@ int main(int argc, char **argv) pidfile = optarg; break; case 'r': - if ((gs.mode == MODE_GLOBAL_RESTART) - || (gs.mode == MODE_SEPARATE_RESTART)) { - fputs("Ambiguous operating mode selected.\n", - stderr); - frr_help_exit(1); - } if (!valid_command(optarg)) { fprintf(stderr, "Invalid restart command, must contain '%%s': %s\n", @@ -1183,23 +1080,6 @@ int main(int argc, char **argv) frr_help_exit(1); } gs.restart_command = optarg; - if (gs.mode == MODE_MONITOR) - gs.mode = MODE_SEPARATE_RESTART; - break; - case 'R': - if (gs.mode != MODE_MONITOR) { - fputs("Ambiguous operating mode selected.\n", - stderr); - frr_help_exit(1); - } - if (strchr(optarg, '%')) { - fprintf(stderr, - "Invalid restart-all arg, must not contain '%%s': %s\n", - optarg); - frr_help_exit(1); - } - gs.restart_command = optarg; - gs.mode = MODE_GLOBAL_RESTART; break; case 's': if (!valid_command(optarg)) { @@ -1245,40 +1125,17 @@ int main(int argc, char **argv) } } - if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR)) { - fputs("Option -z requires a -r or -R restart option.\n", + if (watch_only && (gs.unresponsive_restart || gs.start_command + || gs.stop_command || gs.restart_command)) { + fputs("Options -z/-r/-s/-k make no sense combined with -D.\n", stderr); frr_help_exit(1); } - switch (gs.mode) { - case MODE_MONITOR: - if (gs.restart_command || gs.start_command || gs.stop_command) { - fprintf(stderr, - "No kill/(re)start commands needed for %s mode.\n", - mode_str[gs.mode]); - frr_help_exit(1); - } - break; - case MODE_GLOBAL_RESTART: - case MODE_SEPARATE_RESTART: - if (!gs.restart_command || gs.start_command - || gs.stop_command) { - fprintf(stderr, - "No start/kill commands needed in [%s] mode.\n", - mode_str[gs.mode]); - frr_help_exit(1); - } - break; - case MODE_PHASED_ZEBRA_RESTART: - case MODE_PHASED_ALL_RESTART: - if (!gs.restart_command || !gs.start_command - || !gs.stop_command) { - fprintf(stderr, - "Need start, kill, and restart commands in [%s] mode.\n", - mode_str[gs.mode]); - frr_help_exit(1); - } - break; + if (!watch_only + && (!gs.restart_command || !gs.start_command || !gs.stop_command)) { + fprintf(stderr, + "Options -s (start), -k (kill), and -r (restart) are required.\n"); + frr_help_exit(1); } if (blankstr) { @@ -1341,9 +1198,7 @@ int main(int argc, char **argv) gs.daemons = dmn; tail = dmn; - if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) - || (gs.mode == MODE_PHASED_ALL_RESTART)) - && !strcmp(dmn->name, special)) + if (!strcmp(dmn->name, special)) gs.special = dmn; } } @@ -1351,12 +1206,9 @@ int main(int argc, char **argv) fputs("Must specify one or more daemons to monitor.\n", stderr); frr_help_exit(1); } - if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) - || (gs.mode == MODE_PHASED_ALL_RESTART)) - && !gs.special) { - fprintf(stderr, - "In mode [%s], but cannot find master daemon %s\n", - mode_str[gs.mode], special); + if (!watch_only && !gs.special) { + fprintf(stderr, "\"%s\" daemon must be in daemon list\n", + special); frr_help_exit(1); } @@ -1381,8 +1233,9 @@ int main(int argc, char **argv) strcpy(p, dmn->name); p += strlen(p); } - zlog_notice("%s %s watching [%s], mode [%s]", progname, - FRR_VERSION, buf, mode_str[gs.mode]); + zlog_notice("%s %s watching [%s]%s", progname, + FRR_VERSION, buf, + watch_only ? ", monitor mode" : ""); } } -- cgit v1.2.3 From a8cbb8b383ae03fc62997de7060535df813bb9ea Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Thu, 1 Jun 2017 14:17:55 +0200 Subject: watchfrr: remove -e option Why would we not want to PING? Signed-off-by: David Lamparter --- watchfrr/watchfrr.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'watchfrr') diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index 4708059c7d..a910600229 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -106,7 +106,6 @@ static struct global_state { long restart_timeout; long min_restart_interval; long max_restart_interval; - int do_ping; struct daemon *daemons; const char *restart_command; const char *start_command; @@ -127,7 +126,6 @@ static struct global_state { .loglevel = DEFAULT_LOGLEVEL, .min_restart_interval = DEFAULT_MIN_RESTART, .max_restart_interval = DEFAULT_MAX_RESTART, - .do_ping = 1, }; typedef enum { @@ -165,7 +163,6 @@ struct daemon { static const struct option longopts[] = { {"daemon", no_argument, NULL, 'd'}, {"statedir", required_argument, NULL, 'S'}, - {"no-echo", no_argument, NULL, 'e'}, {"loglevel", required_argument, NULL, 'l'}, {"interval", required_argument, NULL, 'i'}, {"timeout", required_argument, NULL, 't'}, @@ -213,9 +210,6 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n", -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\ to syslog instead of stdout.\n\ -S, --statedir Set the vty socket directory (default is %s)\n\ --e, --no-echo Do not ping the daemons to test responsiveness (this\n\ - option is necessary if the daemons do not support the\n\ - echo command)\n\ -l, --loglevel Set the logging level (default is %d).\n\ The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\ but it can be set higher than %d if extra-verbose debugging\n\ @@ -631,8 +625,7 @@ static void daemon_up(struct daemon *dmn, const char *why) dmn->connect_tries = 0; zlog_notice("%s state -> up : %s", dmn->name, why); daemon_send_ready(); - if (gs.do_ping) - SET_WAKEUP_ECHO(dmn); + SET_WAKEUP_ECHO(dmn); phase_check(); } @@ -997,7 +990,7 @@ int main(int argc, char **argv) frr_preinit(&watchfrr_di, argc, argv); progname = watchfrr_di.progname; - frr_opt_add("b:dek:l:i:p:r:S:s:t:T:z", longopts, ""); + frr_opt_add("b:dk:l:i:p:r:S:s:t:T:z", longopts, ""); gs.restart.name = "all"; while ((opt = frr_getopt(argc, argv, NULL)) != EOF) { @@ -1010,9 +1003,6 @@ int main(int argc, char **argv) case OPTION_DRY: watch_only = true; break; - case 'e': - gs.do_ping = 0; - break; case 'k': if (!valid_command(optarg)) { fprintf(stderr, -- cgit v1.2.3 From 71e7975aaf6af5fdafe6786c1962995390646ee5 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Thu, 1 Jun 2017 14:19:09 +0200 Subject: watchfrr: remove -z option Why would we not want to restart a daemon that's hanging? Signed-off-by: David Lamparter --- watchfrr/watchfrr.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'watchfrr') diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index a910600229..be36b2a5db 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -111,7 +111,6 @@ static struct global_state { const char *start_command; const char *stop_command; struct restart_info restart; - int unresponsive_restart; int loglevel; struct daemon *special; /* points to zebra when doing phased restart */ int numdaemons; @@ -170,7 +169,6 @@ static const struct option longopts[] = { {"restart", required_argument, NULL, 'r'}, {"start-command", required_argument, NULL, 's'}, {"kill-command", required_argument, NULL, 'k'}, - {"unresponsive-restart", no_argument, NULL, 'z'}, {"dry", no_argument, NULL, OPTION_DRY}, {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART}, {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART}, @@ -237,9 +235,6 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n", Supply a Bourne shell to command to use to stop a single\n\ daemon. The command string should include '%%s' where the\n\ name of the daemon should be substituted.\n\ --z, --unresponsive-restart\n\ - When a daemon is unresponsive, treat it as being down for\n\ - restart purposes.\n\ --dry Do not start or restart anything, just log.\n\ -p, --pid-file Set process identifier file name\n\ (default is %s).\n\ @@ -882,10 +877,8 @@ static int wakeup_no_answer(struct thread *t_wakeup) "%s state -> unresponsive : no response yet to ping " "sent %ld seconds ago", dmn->name, gs.timeout); - if (gs.unresponsive_restart) { - SET_WAKEUP_UNRESPONSIVE(dmn); - try_restart(dmn); - } + SET_WAKEUP_UNRESPONSIVE(dmn); + try_restart(dmn); return 0; } @@ -990,7 +983,7 @@ int main(int argc, char **argv) frr_preinit(&watchfrr_di, argc, argv); progname = watchfrr_di.progname; - frr_opt_add("b:dk:l:i:p:r:S:s:t:T:z", longopts, ""); + frr_opt_add("b:dk:l:i:p:r:S:s:t:T:", longopts, ""); gs.restart.name = "all"; while ((opt = frr_getopt(argc, argv, NULL)) != EOF) { @@ -1106,18 +1099,15 @@ int main(int argc, char **argv) frr_help_exit(1); } } break; - case 'z': - gs.unresponsive_restart = 1; - break; default: fputs("Invalid option.\n", stderr); frr_help_exit(1); } } - if (watch_only && (gs.unresponsive_restart || gs.start_command - || gs.stop_command || gs.restart_command)) { - fputs("Options -z/-r/-s/-k make no sense combined with -D.\n", + if (watch_only + && (gs.start_command || gs.stop_command || gs.restart_command)) { + fputs("Options -r/-s/-k make no sense combined with -D.\n", stderr); frr_help_exit(1); } -- cgit v1.2.3 From d87ae5cc1a233b12591eff5687f89be281ba0aa8 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 2 Aug 2017 09:10:47 +0200 Subject: doc: update watchfrr manpage Remove -R, -a, -A, -e and -z options. Also remove blocker in the code that refuses to start if --dry is given together with -k / -s / -r. Signed-off-by: David Lamparter --- doc/watchfrr.8.in | 152 +++++++++++----------------------------------------- watchfrr/watchfrr.c | 3 +- 2 files changed, 32 insertions(+), 123 deletions(-) (limited to 'watchfrr') diff --git a/doc/watchfrr.8.in b/doc/watchfrr.8.in index 82098e1b0d..033f1efb0f 100644 --- a/doc/watchfrr.8.in +++ b/doc/watchfrr.8.in @@ -20,59 +20,6 @@ daemon's VTY UNIX stream socket, and send echo commands to ensure the daemon responds. When the daemon crashes, EOF is received from the socket, so that watchfrr can react immediately. .PP -This program can run in one of the following 5 modes: -.TP -.B Mode 0: monitor -In this mode, the program serves as a monitor and reports status changes. -.IP -Example usage: watchfrr \-d zebra ospfd bgpd -.TP -.B Mode 1: global restart -In this mode, whenever a daemon hangs or crashes, the given command is used -to restart all watched daemons. -.IP -Example usage: watchfrr \-dz \e -.br --R '/sbin/service zebra restart; /sbin/service ospfd restart' \e -.br -zebra ospfd -.TP -.B Mode 2: individual daemon restart -In this mode, whenever a single daemon hangs or crashes, the given command -is used to restart this daemon only. -.IP -Example usage: watchfrr \-dz \-r '/sbin/service %s restart' \e -.br -zebra ospfd bgpd -.TP -.B Mode 3: phased zebra restart -In this mode, whenever a single daemon hangs or crashes, the given command -is used to restart this daemon only. The only exception is the zebra -daemon; in this case, the following steps are taken: (1) all other daemons -are stopped, (2) zebra is restarted, and (3) other daemons are started -again. -.IP -Example usage: watchfrr \-adz \-r '/sbin/service %s restart' \e -.br -\-s '/sbin/service %s start' \e -.br -\-k '/sbin/service %s stop' zebra ospfd bgpd -.TP -.B Mode 4: phased global restart for any failure -In this mode, whenever a single daemon hangs or crashes, the following -steps are taken: (1) all other daemons are stopped, (2) zebra is restarted, -and (3) other daemons are started again. -.IP -Example usage: watchfrr \-Adz \-r '/sbin/service %s restart' \e -.br -\-s '/sbin/service %s start' \e -.br -\-k '/sbin/service %s stop' zebra ospfd bgpd -.PP -Important: It is believed that mode 2 (individual daemon restart) is not -safe, and mode 3 (phased zebra restart) may not be safe with certain -routing daemons. -.PP In order to avoid restarting the daemons in quick succession, you can supply the .B \-m @@ -87,6 +34,36 @@ the restart delay is set to the value of otherwise the interval is doubled (but capped at the value of .BR \-M ). .SH OPTIONS +The following 3 options specify scripts that +.B watchfrr +uses to perform start/stop/restart actions. These options are mandatory +unless the +.B --dry +option is used: +.TP +.BI \-s " command" "\fR, \fB\-\-start\-command " command +Supply a Bourne shell +.I command +to start a single daemon. The command string should contain the '%s' +placeholder to be substituted with the daemon name. +.TP +.BI \-k " command" "\fR, \fB\-\-kill\-command " command +Supply a Bourne shell +.I command +to stop a single daemon. The command string should contain the '%s' +placeholder to be substituted with the daemon name. +.TP +.BI \-r " command" "\fR, \fB\-\-restart " command +Supply a Bourne shell +.I command +to restart a single daemon. The command string should contain the '%s' +placeholder to be substituted with the daemon name. +.PP +Other options: +.TP +.BI \-\-dry +Run watchfrr in "dry-run" mode, only monitoring the specified daemons but not +performing any start/stop/restart actions. .TP .BR \-d ", " \-\-daemon Run in daemon mode. When supplied, error messages are sent to Syslog @@ -97,10 +74,6 @@ Set the VTY socket .I directory (the default value is "/var/run/frr"). .TP -.BR \-e ", " \-\-no\-echo -Do not ping the daemons to test whether they respond. This option is -necessary if one or more daemons do not support the echo command. -.TP .BI \-l " level" "\fR, \fB\-\-loglevel " level Set the logging .I level @@ -131,68 +104,6 @@ Set the restart (kill) timeout in seconds (the default value is "20"). If any background jobs are still running after this period has elapsed, they will be killed. .TP -.BI \-r " command" "\fR, \fB\-\-restart " command -Supply a Bourne shell -.I command -to restart a single daemon. The command string should contain the '%s' -placeholder to be substituted with the daemon name. -.IP -Note that -.B \-r -and -.B \-R -options are not compatible. -.TP -.BI \-s " command" "\fR, \fB\-\-start\-command " command -Supply a Bourne shell -.I command -to start a single daemon. The command string should contain the '%s' -placeholder to be substituted with the daemon name. -.TP -.BI \-k " command" "\fR, \fB\-\-kill\-command " command -Supply a Bourne shell -.I command -to stop a single daemon. The command string should contain the '%s' -placeholder to be substituted with the daemon name. -.TP -.BR \-R ", " \-\-restart\-all -When one or more daemons are shut down, try to restart them using the -Bourne shell command supplied on the command line. -.IP -Note that -.B \-r -and -.B \-R -options are not compatible. -.TP -.BR \-z ", " \-\-unresponsive\-restart -When a daemon is in an unresponsive state, treat it as being shut down for -the restart purposes. -.TP -.BR \-a ", " \-\-all\-restart -When zebra hangs or crashes, restart all daemons taking the following -steps: (1) stop all other daemons, (2) restart zebra, and (3) start other -daemons again. -.IP -Note that this option also requires -.BR \-r , -.BR \-s , -and -.B \-k -options to be specified. -.TP -.BR \-A ", " \-\-always\-all\-restart -When any daemon (i.e., not just zebra) hangs or crashes, restart all -daemons taking the following steps: (1) stop all other daemons, (2) restart -zebra, and (3) start other daemons again. -.IP -Note that this option also requires -.BR \-r , -.BR \-s , -and -.B \-k -options to be specified. -.TP .BI \-p " filename" "\fR, \fB\-\-pid\-file " filename Set the process identifier .I filename @@ -204,9 +115,8 @@ When the supplied is found in any of the command line option arguments (i.e., .BR \-r , .BR \-s , -.BR \-k , or -.BR \-R ), +.BR \-k ), replace it with a space. .IP This is an ugly hack to circumvent problems with passing the command line diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index be36b2a5db..6926154552 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -1107,9 +1107,8 @@ int main(int argc, char **argv) if (watch_only && (gs.start_command || gs.stop_command || gs.restart_command)) { - fputs("Options -r/-s/-k make no sense combined with -D.\n", + fputs("Options -r/-s/-k are not used when --dry is active.\n", stderr); - frr_help_exit(1); } if (!watch_only && (!gs.restart_command || !gs.start_command || !gs.stop_command)) { -- cgit v1.2.3 From 64a249ad9e25a4613d6c7994e94e5616c1e10288 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 2 Aug 2017 10:46:01 +0200 Subject: watchfrr: remove STATEDIR preprocessor define use frr_vtydir from libfrr instead. Signed-off-by: David Lamparter --- watchfrr/Makefile.am | 2 -- watchfrr/watchfrr.c | 22 ++++++++-------------- 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'watchfrr') diff --git a/watchfrr/Makefile.am b/watchfrr/Makefile.am index abe2266f20..bb53641eff 100644 --- a/watchfrr/Makefile.am +++ b/watchfrr/Makefile.am @@ -1,8 +1,6 @@ ## Process this file with Automake to create Makefile.in AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib -I$(top_builddir)/lib -DEFS = @DEFS@ -DSTATEDIR=\"$(localstatedir)/\" - AM_CFLAGS = $(WERROR) sbin_PROGRAMS = watchfrr diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index 6926154552..37f6fb53dc 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -50,21 +50,12 @@ #define DEFAULT_LOGLEVEL LOG_INFO #define DEFAULT_MIN_RESTART 60 #define DEFAULT_MAX_RESTART 600 -#ifdef PATH_WATCHFRR_PID -#define DEFAULT_PIDFILE PATH_WATCHFRR_PID -#else -#define DEFAULT_PIDFILE STATEDIR "/watchfrr.pid" -#endif -#ifdef DAEMON_VTY_DIR -#define VTYDIR DAEMON_VTY_DIR -#else -#define VTYDIR STATEDIR -#endif #define PING_TOKEN "PING" /* Needs to be global, referenced somewhere inside libfrr. */ struct thread_master *master; +static char pidfile_default[256]; static bool watch_only = false; @@ -118,7 +109,7 @@ static struct global_state { int numdown; /* # of daemons that are not UP or UNRESPONSIVE */ } gs = { .phase = PHASE_NONE, - .vtydir = VTYDIR, + .vtydir = frr_vtydir, .period = 1000 * DEFAULT_PERIOD, .timeout = DEFAULT_TIMEOUT, .restart_timeout = DEFAULT_RESTART_TIMEOUT, @@ -245,9 +236,9 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n", passing command-line arguments with embedded spaces.\n\ -v, --version Print program version\n\ -h, --help Display this help and exit\n", - VTYDIR, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG, + frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG, DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD, - DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, DEFAULT_PIDFILE); + DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, pidfile_default); } static pid_t run_background(char *shell_cmd) @@ -976,10 +967,13 @@ FRR_DAEMON_INFO(watchfrr, WATCHFRR, int main(int argc, char **argv) { int opt; - const char *pidfile = DEFAULT_PIDFILE; + const char *pidfile = pidfile_default; const char *special = "zebra"; const char *blankstr = NULL; + snprintf(pidfile_default, sizeof(pidfile_default), "%s/watchfrr.pid", + frr_vtydir); + frr_preinit(&watchfrr_di, argc, argv); progname = watchfrr_di.progname; -- cgit v1.2.3 From 999f153ecea7eccb6fd541f7b38d88b7eb8dfd40 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 9 Aug 2017 11:28:22 +0200 Subject: watchfrr: print specific error for removed options ... and document them in the man page. Signed-off-by: David Lamparter --- doc/watchfrr.8.in | 14 ++++++++++++++ watchfrr/watchfrr.c | 12 +++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'watchfrr') diff --git a/doc/watchfrr.8.in b/doc/watchfrr.8.in index 033f1efb0f..782ac7b46e 100644 --- a/doc/watchfrr.8.in +++ b/doc/watchfrr.8.in @@ -127,6 +127,20 @@ Display the version information and exit. .TP .BR \-h ", " \-\-help Display the usage information and exit. +.SH PREVIOUS OPTIONS +Prior versions of \fBwatchfrr\fR supported some additional options that no +longer exist: +.IP +.BR \-a ,\ \-A ,\ \-e ,\ \-R ,\ \-z +.PP +The \fB-a\fR, \fB-A\fR and \fB-R\fR options were used to select alternate +monitoring modes that offered different patterns of restarting daemons. The +"correct" mode (phased restart) is now the default. The \fB-e\fR and \fB-z\fR +options used to disable some monitoring aspects, watchfrr now always has all +monitoring features enabled. +.PP +Removing these options should result in correct operation, if it does not +please file a bug report. .SH SEE ALSO .BR zebra (8), .BR bgpd (8), diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index 37f6fb53dc..efdba4b7e6 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -964,6 +964,8 @@ FRR_DAEMON_INFO(watchfrr, WATCHFRR, .privs = &watchfrr_privs, ) +#define DEPRECATED_OPTIONS "aAezR:" + int main(int argc, char **argv) { int opt; @@ -977,10 +979,18 @@ int main(int argc, char **argv) frr_preinit(&watchfrr_di, argc, argv); progname = watchfrr_di.progname; - frr_opt_add("b:dk:l:i:p:r:S:s:t:T:", longopts, ""); + frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, ""); gs.restart.name = "all"; while ((opt = frr_getopt(argc, argv, NULL)) != EOF) { + if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) { + fprintf(stderr, + "The -%c option no longer exists.\n" + "Please refer to the watchfrr(8) man page.\n", + opt); + exit(1); + } + switch (opt) { case 0: break; -- cgit v1.2.3