diff options
| -rw-r--r-- | watchfrr/watchfrr.c | 2168 | ||||
| -rw-r--r-- | watchfrr/watchfrr.h | 2 | ||||
| -rw-r--r-- | watchfrr/watchfrr_vty.c | 38 | 
3 files changed, 1079 insertions, 1129 deletions
diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index 903e2bbe91..122d6db6d7 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -65,149 +65,136 @@  /* Needs to be global, referenced somewhere inside libzebra. */  struct thread_master *master; -typedef enum -{ -  MODE_MONITOR = 0, -  MODE_GLOBAL_RESTART, -  MODE_SEPARATE_RESTART, -  MODE_PHASED_ZEBRA_RESTART, -  MODE_PHASED_ALL_RESTART +typedef enum { +	MODE_MONITOR = 0, +	MODE_GLOBAL_RESTART, +	MODE_SEPARATE_RESTART, +	MODE_PHASED_ZEBRA_RESTART, +	MODE_PHASED_ALL_RESTART  } watch_mode_t; -static const char *mode_str[] = -{ -  "monitor", -  "global restart", -  "individual daemon restart", -  "phased zebra restart", -  "phased global restart for any failure", +static const char *mode_str[] = { +	"monitor", +	"global restart", +	"individual daemon restart", +	"phased zebra restart", +	"phased global restart for any failure",  }; -typedef enum -{ -  PHASE_NONE = 0, -  PHASE_STOPS_PENDING, -  PHASE_WAITING_DOWN, -  PHASE_ZEBRA_RESTART_PENDING, -  PHASE_WAITING_ZEBRA_UP +typedef enum { +	PHASE_NONE = 0, +	PHASE_STOPS_PENDING, +	PHASE_WAITING_DOWN, +	PHASE_ZEBRA_RESTART_PENDING, +	PHASE_WAITING_ZEBRA_UP  } restart_phase_t; -static const char *phase_str[] = -{ -  "None", -  "Stop jobs running", -  "Waiting for other daemons to come down", -  "Zebra restart job running", -  "Waiting for zebra to come up", -  "Start jobs running", +static const char *phase_str[] = { +	"None", +	"Stop jobs running", +	"Waiting for other daemons to come down", +	"Zebra restart job running", +	"Waiting for zebra to come up", +	"Start jobs running",  };  #define PHASE_TIMEOUT (3*gs.restart_timeout) -struct restart_info -{ -  const char *name; -  const char *what; -  pid_t pid; -  struct timeval time; -  long interval; -  struct thread *t_kill; -  int kills; +struct restart_info { +	const char *name; +	const char *what; +	pid_t pid; +	struct timeval time; +	long interval; +	struct thread *t_kill; +	int kills;  }; -static struct global_state -{ -  watch_mode_t mode; -  restart_phase_t phase; -  struct thread *t_phase_hanging; -  const char *vtydir; -  long period; -  long timeout; -  long restart_timeout; -  long min_restart_interval; -  long max_restart_interval; -  int do_ping; -  struct daemon *daemons; -  const char *restart_command; -  const char *start_command; -  const char *stop_command; -  struct restart_info restart; -  int unresponsive_restart; -  int loglevel; -  struct daemon *special;	/* points to zebra when doing phased restart */ -  int numdaemons; -  int numpids; -  int numdown;		/* # of daemons that are not UP or UNRESPONSIVE */ +static struct global_state { +	watch_mode_t mode; +	restart_phase_t phase; +	struct thread *t_phase_hanging; +	const char *vtydir; +	long period; +	long timeout; +	long restart_timeout; +	long min_restart_interval; +	long max_restart_interval; +	int do_ping; +	struct daemon *daemons; +	const char *restart_command; +	const char *start_command; +	const char *stop_command; +	struct restart_info restart; +	int unresponsive_restart; +	int loglevel; +	struct daemon *special;	/* points to zebra when doing phased restart */ +	int numdaemons; +	int numpids; +	int numdown;		/* # of daemons that are not UP or UNRESPONSIVE */  } gs = { -  .mode = MODE_MONITOR, -  .phase = PHASE_NONE, -  .vtydir = VTYDIR, -  .period = 1000*DEFAULT_PERIOD, -  .timeout = DEFAULT_TIMEOUT, -  .restart_timeout = DEFAULT_RESTART_TIMEOUT, -  .loglevel = DEFAULT_LOGLEVEL, -  .min_restart_interval = DEFAULT_MIN_RESTART, -  .max_restart_interval = DEFAULT_MAX_RESTART, -  .do_ping = 1, -}; - -typedef enum -{ -  DAEMON_INIT, -  DAEMON_DOWN, -  DAEMON_CONNECTING, -  DAEMON_UP, -  DAEMON_UNRESPONSIVE +.mode = MODE_MONITOR,.phase = PHASE_NONE,.vtydir = VTYDIR,.period = +	    1000 * DEFAULT_PERIOD,.timeout = +	    DEFAULT_TIMEOUT,.restart_timeout = +	    DEFAULT_RESTART_TIMEOUT,.loglevel = +	    DEFAULT_LOGLEVEL,.min_restart_interval = +	    DEFAULT_MIN_RESTART,.max_restart_interval = +	    DEFAULT_MAX_RESTART,.do_ping = 1,}; + +typedef enum { +	DAEMON_INIT, +	DAEMON_DOWN, +	DAEMON_CONNECTING, +	DAEMON_UP, +	DAEMON_UNRESPONSIVE  } daemon_state_t;  #define IS_UP(DMN) \    (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE)) -static const char *state_str[] = -{ -  "Init", -  "Down", -  "Connecting", -  "Up", -  "Unresponsive", +static const char *state_str[] = { +	"Init", +	"Down", +	"Connecting", +	"Up", +	"Unresponsive",  };  struct daemon { -  const char *name; -  daemon_state_t state; -  int fd; -  struct timeval echo_sent; -  u_int connect_tries; -  struct thread *t_wakeup; -  struct thread *t_read; -  struct thread *t_write; -  struct daemon *next; -  struct restart_info restart; +	const char *name; +	daemon_state_t state; +	int fd; +	struct timeval echo_sent; +	u_int connect_tries; +	struct thread *t_wakeup; +	struct thread *t_read; +	struct thread *t_write; +	struct daemon *next; +	struct restart_info restart;  }; -static const struct option longopts[] =  -{ -  { "daemon", no_argument, NULL, 'd'}, -  { "statedir", required_argument, NULL, 'S'}, -  { "no-echo", no_argument, NULL, 'e'}, -  { "loglevel", required_argument, NULL, 'l'}, -  { "interval", required_argument, NULL, 'i'}, -  { "timeout", required_argument, NULL, 't'}, -  { "restart-timeout", required_argument, NULL, 'T'}, -  { "restart", required_argument, NULL, 'r'}, -  { "start-command", required_argument, NULL, 's'}, -  { "kill-command", required_argument, NULL, 'k'}, -  { "restart-all", required_argument, NULL, 'R'}, -  { "all-restart", no_argument, NULL, 'a'}, -  { "always-all-restart", no_argument, NULL, 'A'}, -  { "unresponsive-restart", no_argument, NULL, 'z'}, -  { "min-restart-interval", required_argument, NULL, 'm'}, -  { "max-restart-interval", required_argument, NULL, 'M'}, -  { "pid-file", required_argument, NULL, 'p'}, -  { "blank-string", required_argument, NULL, 'b'}, -  { "help", no_argument, NULL, 'h'}, -  { "version", no_argument, NULL, 'v'}, -  { NULL, 0, NULL, 0 } +static const struct option longopts[] = { +	{"daemon", no_argument, NULL, 'd'}, +	{"statedir", required_argument, NULL, 'S'}, +	{"no-echo", no_argument, NULL, 'e'}, +	{"loglevel", required_argument, NULL, 'l'}, +	{"interval", required_argument, NULL, 'i'}, +	{"timeout", required_argument, NULL, 't'}, +	{"restart-timeout", required_argument, NULL, 'T'}, +	{"restart", required_argument, NULL, 'r'}, +	{"start-command", required_argument, NULL, 's'}, +	{"kill-command", required_argument, NULL, 'k'}, +	{"restart-all", required_argument, NULL, 'R'}, +	{"all-restart", no_argument, NULL, 'a'}, +	{"always-all-restart", no_argument, NULL, 'A'}, +	{"unresponsive-restart", no_argument, NULL, 'z'}, +	{"min-restart-interval", required_argument, NULL, 'm'}, +	{"max-restart-interval", required_argument, NULL, 'M'}, +	{"pid-file", required_argument, NULL, 'p'}, +	{"blank-string", required_argument, NULL, 'b'}, +	{"help", no_argument, NULL, 'h'}, +	{"version", no_argument, NULL, 'v'}, +	{NULL, 0, NULL, 0}  };  static int try_connect(struct daemon *dmn); @@ -215,14 +202,13 @@ static int wakeup_send_echo(struct thread *t_wakeup);  static void try_restart(struct daemon *dmn);  static void phase_check(void); -static int -usage(const char *progname, int status) +static int usage(const char *progname, int status)  { -  if (status != 0) -    fprintf(stderr, "Try `%s --help' for more information.\n", progname); -  else -    { -      printf("Usage : %s [OPTION...] <daemon name> ...\n\n\ +	if (status != 0) +		fprintf(stderr, "Try `%s --help' for more information.\n", +			progname); +	else { +		printf("Usage : %s [OPTION...] <daemon name> ...\n\n\  Watchdog program to monitor status of frr daemons and try to restart\n\  them if they are down or unresponsive.  It determines whether a daemon is\n\  up based on whether it can connect to the daemon's vty unix stream socket.\n\ @@ -266,12 +252,9 @@ the -m and -M options allow you to control the minimum delay between\n\  restart commands.  The minimum restart delay is recalculated each time\n\  a restart is attempted: if the time since the last restart attempt exceeds\n\  twice the -M value, then the restart delay is set to the -m value.\n\ -Otherwise, the interval is doubled (but capped at the -M value).\n\n", -        progname,mode_str[0],progname,mode_str[1],progname,mode_str[2], -        progname,mode_str[3],progname,mode_str[4],progname,mode_str[2], -        mode_str[3]); +Otherwise, the interval is doubled (but capped at the -M value).\n\n", progname, mode_str[0], progname, mode_str[1], progname, mode_str[2], progname, mode_str[3], progname, mode_str[4], progname, mode_str[2], mode_str[3]); -      printf("Options:\n\ +		printf("Options:\n\  -d, --daemon	Run in daemon mode.  In this mode, error messages are sent\n\  		to syslog instead of stdout.\n\  -S, --statedir	Set the vty socket directory (default is %s)\n\ @@ -329,221 +312,203 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n",  		it with a space.  This is an ugly hack to circumvent problems\n\  		passing command-line arguments with embedded spaces.\n\  -v, --version	Print program version\n\ --h, --help	Display this help and exit\n", -        VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG, -        DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART, -        DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT, -        DEFAULT_PIDFILE); -    } - -  return status; +-h, --help	Display this help and exit\n", VTYDIR, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG, DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD, DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, DEFAULT_PIDFILE); +	} + +	return status;  } -static pid_t -run_background(char *shell_cmd) +static pid_t run_background(char *shell_cmd)  { -  pid_t child; - -  switch (child = fork()) -    { -    case -1: -      zlog_err("fork failed, cannot run command [%s]: %s", -	       shell_cmd,safe_strerror(errno)); -      return -1; -    case 0: -      /* Child process. */ -      /* Use separate process group so child processes can be killed easily. */ -      if (setpgid(0,0) < 0) -        zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno)); -      { -	char shell[] = "sh"; -	char dashc[] = "-c"; -	char * const argv[4] = { shell, dashc, shell_cmd, NULL}; -	execv("/bin/sh", argv); -	zlog_err("execv(/bin/sh -c '%s') failed: %s", -		 shell_cmd,safe_strerror(errno)); -	_exit(127); -      } -    default: -      /* Parent process: we will reap the child later. */ -      zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd); -      return child; -    } +	pid_t child; + +	switch (child = fork()) { +	case -1: +		zlog_err("fork failed, cannot run command [%s]: %s", +			 shell_cmd, safe_strerror(errno)); +		return -1; +	case 0: +		/* Child process. */ +		/* Use separate process group so child processes can be killed easily. */ +		if (setpgid(0, 0) < 0) +			zlog_warn("warning: setpgid(0,0) failed: %s", +				  safe_strerror(errno)); +		{ +			char shell[] = "sh"; +			char dashc[] = "-c"; +			char *const argv[4] = { shell, dashc, shell_cmd, NULL }; +			execv("/bin/sh", argv); +			zlog_err("execv(/bin/sh -c '%s') failed: %s", +				 shell_cmd, safe_strerror(errno)); +			_exit(127); +		} +	default: +		/* Parent process: we will reap the child later. */ +		zlog_err("Forked background command [pid %d]: %s", (int)child, +			 shell_cmd); +		return child; +	}  } -static struct timeval * -time_elapsed(struct timeval *result, const struct timeval *start_time) +static struct timeval *time_elapsed(struct timeval *result, +				    const struct timeval *start_time)  { -  gettimeofday(result,NULL); -  result->tv_sec -= start_time->tv_sec; -  result->tv_usec -= start_time->tv_usec; -  while (result->tv_usec < 0) -    { -      result->tv_usec += 1000000L; -      result->tv_sec--; -    } -  return result; +	gettimeofday(result, NULL); +	result->tv_sec -= start_time->tv_sec; +	result->tv_usec -= start_time->tv_usec; +	while (result->tv_usec < 0) { +		result->tv_usec += 1000000L; +		result->tv_sec--; +	} +	return result;  } -static int -restart_kill(struct thread *t_kill) +static int restart_kill(struct thread *t_kill)  { -  struct restart_info *restart = THREAD_ARG(t_kill); -  struct timeval delay; - -  time_elapsed(&delay,&restart->time); -  zlog_warn("Warning: %s %s child process %d still running after " -	    "%ld seconds, sending signal %d", -	    restart->what,restart->name,(int)restart->pid, (long)delay.tv_sec, -	    (restart->kills ? SIGKILL : SIGTERM)); -  kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM)); -  restart->kills++; -  restart->t_kill = thread_add_timer(master,restart_kill,restart, -				     gs.restart_timeout); -  return 0; +	struct restart_info *restart = THREAD_ARG(t_kill); +	struct timeval delay; + +	time_elapsed(&delay, &restart->time); +	zlog_warn("Warning: %s %s child process %d still running after " +		  "%ld seconds, sending signal %d", +		  restart->what, restart->name, (int)restart->pid, +		  (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM)); +	kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM)); +	restart->kills++; +	restart->t_kill = thread_add_timer(master, restart_kill, restart, +					   gs.restart_timeout); +	return 0;  } -static struct restart_info * -find_child(pid_t child) +static struct restart_info *find_child(pid_t child)  { -  if (gs.mode == MODE_GLOBAL_RESTART) -    { -      if (gs.restart.pid == child) -        return &gs.restart; -    } -  else -    { -      struct daemon *dmn; -      for (dmn = gs.daemons; dmn; dmn = dmn->next) -        { -	  if (dmn->restart.pid == child) -	    return &dmn->restart; -        } -    } -  return NULL; +	if (gs.mode == MODE_GLOBAL_RESTART) { +		if (gs.restart.pid == child) +			return &gs.restart; +	} else { +		struct daemon *dmn; +		for (dmn = gs.daemons; dmn; dmn = dmn->next) { +			if (dmn->restart.pid == child) +				return &dmn->restart; +		} +	} +	return NULL;  } -static void -sigchild(void) +static void sigchild(void)  { -  pid_t child; -  int status; -  const char *name; -  const char *what; -  struct restart_info *restart; - -  switch (child = waitpid(-1,&status,WNOHANG))  -    { -    case -1: -      zlog_err("waitpid failed: %s",safe_strerror(errno)); -      return; -    case 0: -      zlog_warn("SIGCHLD received, but waitpid did not reap a child"); -      return; -    } - -  if (child == integrated_write_pid) -    { -      integrated_write_sigchld(status); -      return; -    } - -  if ((restart = find_child(child)) != NULL) -    { -      name = restart->name; -      what = restart->what; -      restart->pid = 0; -      gs.numpids--; -      thread_cancel(restart->t_kill); -      restart->t_kill = NULL; -      /* Update restart time to reflect the time the command completed. */ -      gettimeofday(&restart->time,NULL); -    } -  else -    { -      zlog_err("waitpid returned status for an unknown child process %d", -	       (int)child); -      name = "(unknown)"; -      what = "background"; -    } -  if (WIFSTOPPED(status)) -      zlog_warn("warning: %s %s process %d is stopped", -		what,name,(int)child); -  else if (WIFSIGNALED(status)) -    zlog_warn("%s %s process %d terminated due to signal %d", -	      what,name,(int)child,WTERMSIG(status)); -  else if (WIFEXITED(status)) -    { -      if (WEXITSTATUS(status) != 0) -	zlog_warn("%s %s process %d exited with non-zero status %d", -		  what,name,(int)child,WEXITSTATUS(status)); -      else -	zlog_debug("%s %s process %d exited normally",what,name,(int)child); -    } -  else -    zlog_err("cannot interpret %s %s process %d wait status 0x%x", -	     what,name,(int)child,status); -  phase_check(); +	pid_t child; +	int status; +	const char *name; +	const char *what; +	struct restart_info *restart; + +	switch (child = waitpid(-1, &status, WNOHANG)) { +	case -1: +		zlog_err("waitpid failed: %s", safe_strerror(errno)); +		return; +	case 0: +		zlog_warn("SIGCHLD received, but waitpid did not reap a child"); +		return; +	} + +	if (child == integrated_write_pid) { +		integrated_write_sigchld(status); +		return; +	} + +	if ((restart = find_child(child)) != NULL) { +		name = restart->name; +		what = restart->what; +		restart->pid = 0; +		gs.numpids--; +		thread_cancel(restart->t_kill); +		restart->t_kill = NULL; +		/* Update restart time to reflect the time the command completed. */ +		gettimeofday(&restart->time, NULL); +	} else { +		zlog_err +		    ("waitpid returned status for an unknown child process %d", +		     (int)child); +		name = "(unknown)"; +		what = "background"; +	} +	if (WIFSTOPPED(status)) +		zlog_warn("warning: %s %s process %d is stopped", +			  what, name, (int)child); +	else if (WIFSIGNALED(status)) +		zlog_warn("%s %s process %d terminated due to signal %d", +			  what, name, (int)child, WTERMSIG(status)); +	else if (WIFEXITED(status)) { +		if (WEXITSTATUS(status) != 0) +			zlog_warn +			    ("%s %s process %d exited with non-zero status %d", +			     what, name, (int)child, WEXITSTATUS(status)); +		else +			zlog_debug("%s %s process %d exited normally", what, +				   name, (int)child); +	} else +		zlog_err("cannot interpret %s %s process %d wait status 0x%x", +			 what, name, (int)child, status); +	phase_check();  }  static int  run_job(struct restart_info *restart, const char *cmdtype, const char *command,  	int force, int update_interval)  { -  struct timeval delay; - -  if (gs.loglevel > LOG_DEBUG+1) -    zlog_debug("attempting to %s %s",cmdtype,restart->name); - -  if (restart->pid) -    { -      if (gs.loglevel > LOG_DEBUG+1) -        zlog_debug("cannot %s %s, previous pid %d still running", -		   cmdtype,restart->name,(int)restart->pid); -      return -1; -    } - -  /* Note: time_elapsed test must come before the force test, since we need -     to make sure that delay is initialized for use below in updating the -     restart interval. */ -  if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) && -      !force) -    { -      if (gs.loglevel > LOG_DEBUG+1) -        zlog_debug("postponing %s %s: " -		   "elapsed time %ld < retry interval %ld", -		   cmdtype,restart->name,(long)delay.tv_sec,restart->interval); -      return -1; -    } - -  gettimeofday(&restart->time,NULL); -  restart->kills = 0; -  { -    char cmd[strlen(command)+strlen(restart->name)+1]; -    snprintf(cmd,sizeof(cmd),command,restart->name); -    if ((restart->pid = run_background(cmd)) > 0) -      { -	restart->t_kill = thread_add_timer(master,restart_kill,restart, -					   gs.restart_timeout); -	restart->what = cmdtype; -	gs.numpids++; -      } -    else -      restart->pid = 0; -  } - -  /* Calculate the new restart interval. */ -  if (update_interval) -    { -      if (delay.tv_sec > 2*gs.max_restart_interval) -	restart->interval = gs.min_restart_interval; -      else if ((restart->interval *= 2) > gs.max_restart_interval) -	restart->interval = gs.max_restart_interval; -      if (gs.loglevel > LOG_DEBUG+1) -	zlog_debug("restart %s interval is now %ld", -		   restart->name,restart->interval); -    } -  return restart->pid; +	struct timeval delay; + +	if (gs.loglevel > LOG_DEBUG + 1) +		zlog_debug("attempting to %s %s", cmdtype, restart->name); + +	if (restart->pid) { +		if (gs.loglevel > LOG_DEBUG + 1) +			zlog_debug +			    ("cannot %s %s, previous pid %d still running", +			     cmdtype, restart->name, (int)restart->pid); +		return -1; +	} + +	/* Note: time_elapsed test must come before the force test, since we need +	   to make sure that delay is initialized for use below in updating the +	   restart interval. */ +	if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval) +	    && !force) { +		if (gs.loglevel > LOG_DEBUG + 1) +			zlog_debug("postponing %s %s: " +				   "elapsed time %ld < retry interval %ld", +				   cmdtype, restart->name, (long)delay.tv_sec, +				   restart->interval); +		return -1; +	} + +	gettimeofday(&restart->time, NULL); +	restart->kills = 0; +	{ +		char cmd[strlen(command) + strlen(restart->name) + 1]; +		snprintf(cmd, sizeof(cmd), command, restart->name); +		if ((restart->pid = run_background(cmd)) > 0) { +			restart->t_kill = +			    thread_add_timer(master, restart_kill, restart, +					     gs.restart_timeout); +			restart->what = cmdtype; +			gs.numpids++; +		} else +			restart->pid = 0; +	} + +	/* Calculate the new restart interval. */ +	if (update_interval) { +		if (delay.tv_sec > 2 * gs.max_restart_interval) +			restart->interval = gs.min_restart_interval; +		else if ((restart->interval *= 2) > gs.max_restart_interval) +			restart->interval = gs.max_restart_interval; +		if (gs.loglevel > LOG_DEBUG + 1) +			zlog_debug("restart %s interval is now %ld", +				   restart->name, restart->interval); +	} +	return restart->pid;  }  #define SET_READ_HANDLER(DMN) \ @@ -551,889 +516,874 @@ run_job(struct restart_info *restart, const char *cmdtype, const char *command,  #define SET_WAKEUP_DOWN(DMN)	\    (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN),	\ -    					  FUZZY(gs.period)) +					  FUZZY(gs.period))  #define SET_WAKEUP_UNRESPONSIVE(DMN)	\    (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \ -    					  FUZZY(gs.period)) +					  FUZZY(gs.period))  #define SET_WAKEUP_ECHO(DMN) \    (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \  					  FUZZY(gs.period)) -static int -wakeup_down(struct thread *t_wakeup) +static int wakeup_down(struct thread *t_wakeup)  { -  struct daemon *dmn = THREAD_ARG(t_wakeup); - -  dmn->t_wakeup = NULL; -  if (try_connect(dmn) < 0) -    SET_WAKEUP_DOWN(dmn); -  if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP)) -    try_restart(dmn); -  return 0; +	struct daemon *dmn = THREAD_ARG(t_wakeup); + +	dmn->t_wakeup = NULL; +	if (try_connect(dmn) < 0) +		SET_WAKEUP_DOWN(dmn); +	if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP)) +		try_restart(dmn); +	return 0;  } -static int -wakeup_init(struct thread *t_wakeup) +static int wakeup_init(struct thread *t_wakeup)  { -  struct daemon *dmn = THREAD_ARG(t_wakeup); - -  dmn->t_wakeup = NULL; -  if (try_connect(dmn) < 0) -    { -      SET_WAKEUP_DOWN(dmn); -      zlog_err("%s state -> down : initial connection attempt failed", -	       dmn->name); -      dmn->state = DAEMON_DOWN; -    } -  return 0; +	struct daemon *dmn = THREAD_ARG(t_wakeup); + +	dmn->t_wakeup = NULL; +	if (try_connect(dmn) < 0) { +		SET_WAKEUP_DOWN(dmn); +		zlog_err("%s state -> down : initial connection attempt failed", +			 dmn->name); +		dmn->state = DAEMON_DOWN; +	} +	return 0;  } -static void -daemon_down(struct daemon *dmn, const char *why) +static void daemon_down(struct daemon *dmn, const char *why)  { -  if (IS_UP(dmn) || (dmn->state == DAEMON_INIT)) -    zlog_err("%s state -> down : %s",dmn->name,why); -  else if (gs.loglevel > LOG_DEBUG) -    zlog_debug("%s still down : %s",dmn->name,why); -  if (IS_UP(dmn)) -    gs.numdown++; -  dmn->state = DAEMON_DOWN; -  if (dmn->fd >= 0) -    { -      close(dmn->fd); -      dmn->fd = -1; -    } -  THREAD_OFF(dmn->t_read); -  THREAD_OFF(dmn->t_write); -  THREAD_OFF(dmn->t_wakeup); -  if (try_connect(dmn) < 0) -    SET_WAKEUP_DOWN(dmn); -  phase_check(); +	if (IS_UP(dmn) || (dmn->state == DAEMON_INIT)) +		zlog_err("%s state -> down : %s", dmn->name, why); +	else if (gs.loglevel > LOG_DEBUG) +		zlog_debug("%s still down : %s", dmn->name, why); +	if (IS_UP(dmn)) +		gs.numdown++; +	dmn->state = DAEMON_DOWN; +	if (dmn->fd >= 0) { +		close(dmn->fd); +		dmn->fd = -1; +	} +	THREAD_OFF(dmn->t_read); +	THREAD_OFF(dmn->t_write); +	THREAD_OFF(dmn->t_wakeup); +	if (try_connect(dmn) < 0) +		SET_WAKEUP_DOWN(dmn); +	phase_check();  } -static int -handle_read(struct thread *t_read) +static int handle_read(struct thread *t_read)  { -  struct daemon *dmn = THREAD_ARG(t_read); -  static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n"; -  char buf[sizeof(resp)+100]; -  ssize_t rc; -  struct timeval delay; - -  dmn->t_read = NULL; -  if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0) -    { -      char why[100]; - -      if (ERRNO_IO_RETRY(errno)) -	{ -	  /* Pretend it never happened. */ -	  SET_READ_HANDLER(dmn); -	  return 0; +	struct daemon *dmn = THREAD_ARG(t_read); +	static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n"; +	char buf[sizeof(resp) + 100]; +	ssize_t rc; +	struct timeval delay; + +	dmn->t_read = NULL; +	if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) { +		char why[100]; + +		if (ERRNO_IO_RETRY(errno)) { +			/* Pretend it never happened. */ +			SET_READ_HANDLER(dmn); +			return 0; +		} +		snprintf(why, sizeof(why), "unexpected read error: %s", +			 safe_strerror(errno)); +		daemon_down(dmn, why); +		return 0;  	} -      snprintf(why,sizeof(why),"unexpected read error: %s", -	       safe_strerror(errno)); -      daemon_down(dmn,why); -      return 0; -    } -  if (rc == 0) -    { -      daemon_down(dmn,"read returned EOF"); -      return 0; -    } -  if (!dmn->echo_sent.tv_sec) -    { -      char why[sizeof(buf)+100]; -      snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s", -	       (int)rc,(int)rc,buf); -      daemon_down(dmn,why); -      return 0; -    } - -  /* We are expecting an echo response: is there any chance that the -     response would not be returned entirely in the first read?  That -     seems inconceivable... */ -  if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp))) -    { -      char why[100+sizeof(buf)]; -      snprintf(why,sizeof(why),"read returned bad echo response of %d bytes " -			       "(expecting %u): %.*s", -	       (int)rc,(u_int)sizeof(resp),(int)rc,buf); -      daemon_down(dmn,why); -      return 0; -    } - -  time_elapsed(&delay,&dmn->echo_sent); -  dmn->echo_sent.tv_sec = 0; -  if (dmn->state == DAEMON_UNRESPONSIVE) -    { -      if (delay.tv_sec < gs.timeout) -	{ -	  dmn->state = DAEMON_UP; -	  zlog_warn("%s state -> up : echo response received after %ld.%06ld " -		    "seconds", dmn->name, -		    (long)delay.tv_sec, (long)delay.tv_usec); +	if (rc == 0) { +		daemon_down(dmn, "read returned EOF"); +		return 0; +	} +	if (!dmn->echo_sent.tv_sec) { +		char why[sizeof(buf) + 100]; +		snprintf(why, sizeof(why), +			 "unexpected read returns %d bytes: %.*s", (int)rc, +			 (int)rc, buf); +		daemon_down(dmn, why); +		return 0;  	} -      else -	zlog_warn("%s: slow echo response finally received after %ld.%06ld " -		  "seconds", dmn->name, -		  (long)delay.tv_sec, (long)delay.tv_usec); -    } -  else if (gs.loglevel > LOG_DEBUG+1) -    zlog_debug("%s: echo response received after %ld.%06ld seconds", -	       dmn->name, (long)delay.tv_sec, (long)delay.tv_usec); - -  SET_READ_HANDLER(dmn); -  if (dmn->t_wakeup) -    thread_cancel(dmn->t_wakeup); -  SET_WAKEUP_ECHO(dmn); - -  return 0; + +	/* We are expecting an echo response: is there any chance that the +	   response would not be returned entirely in the first read?  That +	   seems inconceivable... */ +	if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) { +		char why[100 + sizeof(buf)]; +		snprintf(why, sizeof(why), +			 "read returned bad echo response of %d bytes " +			 "(expecting %u): %.*s", (int)rc, (u_int) sizeof(resp), +			 (int)rc, buf); +		daemon_down(dmn, why); +		return 0; +	} + +	time_elapsed(&delay, &dmn->echo_sent); +	dmn->echo_sent.tv_sec = 0; +	if (dmn->state == DAEMON_UNRESPONSIVE) { +		if (delay.tv_sec < gs.timeout) { +			dmn->state = DAEMON_UP; +			zlog_warn +			    ("%s state -> up : echo response received after %ld.%06ld " +			     "seconds", dmn->name, (long)delay.tv_sec, +			     (long)delay.tv_usec); +		} else +			zlog_warn +			    ("%s: slow echo response finally received after %ld.%06ld " +			     "seconds", dmn->name, (long)delay.tv_sec, +			     (long)delay.tv_usec); +	} else if (gs.loglevel > LOG_DEBUG + 1) +		zlog_debug("%s: echo response received after %ld.%06ld seconds", +			   dmn->name, (long)delay.tv_sec, (long)delay.tv_usec); + +	SET_READ_HANDLER(dmn); +	if (dmn->t_wakeup) +		thread_cancel(dmn->t_wakeup); +	SET_WAKEUP_ECHO(dmn); + +	return 0;  }  /*   * Wait till we notice that all daemons are ready before   * we send we are ready to systemd   */ -static void -daemon_send_ready (void) +static void daemon_send_ready(void)  { -  static int sent = 0; -  if (!sent && gs.numdown == 0) -    { +	static int sent = 0; +	if (!sent && gs.numdown == 0) {  #if defined (HAVE_CUMULUS) -      FILE *fp; +		FILE *fp; -      fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w"); -      fclose(fp); +		fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w"); +		fclose(fp);  #endif -      zlog_notice ("Watchfrr: Notifying Systemd we are up and running"); -      systemd_send_started(master, 0); -      sent = 1; -    } +		zlog_notice +		    ("Watchfrr: Notifying Systemd we are up and running"); +		systemd_send_started(master, 0); +		sent = 1; +	}  } -static void -daemon_up(struct daemon *dmn, const char *why) +static void daemon_up(struct daemon *dmn, const char *why)  { -  dmn->state = DAEMON_UP; -  gs.numdown--; -  dmn->connect_tries = 0; -  zlog_notice("%s state -> up : %s",dmn->name,why); -  daemon_send_ready(); -  if (gs.do_ping) -    SET_WAKEUP_ECHO(dmn); -  phase_check(); +	dmn->state = DAEMON_UP; +	gs.numdown--; +	dmn->connect_tries = 0; +	zlog_notice("%s state -> up : %s", dmn->name, why); +	daemon_send_ready(); +	if (gs.do_ping) +		SET_WAKEUP_ECHO(dmn); +	phase_check();  } -static int -check_connect(struct thread *t_write) +static int check_connect(struct thread *t_write)  { -  struct daemon *dmn = THREAD_ARG(t_write); -  int sockerr; -  socklen_t reslen = sizeof(sockerr); - -  dmn->t_write = NULL; -  if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0) -    { -      zlog_warn("%s: check_connect: getsockopt failed: %s", -	        dmn->name,safe_strerror(errno)); -      daemon_down(dmn,"getsockopt failed checking connection success"); -      return 0; -    } -  if ((reslen == sizeof(sockerr)) && sockerr) -    { -      char why[100]; -      snprintf(why,sizeof(why), -	       "getsockopt reports that connection attempt failed: %s", -	       safe_strerror(sockerr)); -      daemon_down(dmn,why); -      return 0; -    } - -  daemon_up(dmn,"delayed connect succeeded"); -  return 0; +	struct daemon *dmn = THREAD_ARG(t_write); +	int sockerr; +	socklen_t reslen = sizeof(sockerr); + +	dmn->t_write = NULL; +	if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen) +	    < 0) { +		zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name, +			  safe_strerror(errno)); +		daemon_down(dmn, +			    "getsockopt failed checking connection success"); +		return 0; +	} +	if ((reslen == sizeof(sockerr)) && sockerr) { +		char why[100]; +		snprintf(why, sizeof(why), +			 "getsockopt reports that connection attempt failed: %s", +			 safe_strerror(sockerr)); +		daemon_down(dmn, why); +		return 0; +	} + +	daemon_up(dmn, "delayed connect succeeded"); +	return 0;  } -static int -wakeup_connect_hanging(struct thread *t_wakeup) +static int wakeup_connect_hanging(struct thread *t_wakeup)  { -  struct daemon *dmn = THREAD_ARG(t_wakeup); -  char why[100]; - -  dmn->t_wakeup = NULL; -  snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds", -	   gs.timeout); -  daemon_down(dmn,why); -  return 0; +	struct daemon *dmn = THREAD_ARG(t_wakeup); +	char why[100]; + +	dmn->t_wakeup = NULL; +	snprintf(why, sizeof(why), +		 "connection attempt timed out after %ld seconds", gs.timeout); +	daemon_down(dmn, why); +	return 0;  }  /* Making connection to protocol daemon. */ -static int -try_connect(struct daemon *dmn) +static int try_connect(struct daemon *dmn)  { -  int sock; -  struct sockaddr_un addr; -  socklen_t len; - -  if (gs.loglevel > LOG_DEBUG+1) -    zlog_debug("%s: attempting to connect",dmn->name); -  dmn->connect_tries++; - -  memset (&addr, 0, sizeof (struct sockaddr_un)); -  addr.sun_family = AF_UNIX; -  snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", -	   gs.vtydir,dmn->name); +	int sock; +	struct sockaddr_un addr; +	socklen_t len; + +	if (gs.loglevel > LOG_DEBUG + 1) +		zlog_debug("%s: attempting to connect", dmn->name); +	dmn->connect_tries++; + +	memset(&addr, 0, sizeof(struct sockaddr_un)); +	addr.sun_family = AF_UNIX; +	snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", +		 gs.vtydir, dmn->name);  #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN -  len = addr.sun_len = SUN_LEN(&addr); +	len = addr.sun_len = SUN_LEN(&addr);  #else -  len = sizeof (addr.sun_family) + strlen (addr.sun_path); -#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */ - -  /* Quick check to see if we might succeed before we go to the trouble -     of creating a socket. */ -  if (access(addr.sun_path, W_OK) < 0) -    { -      if (errno != ENOENT) -        zlog_err("%s: access to socket %s denied: %s", -		dmn->name,addr.sun_path,safe_strerror(errno)); -      return -1; -    } - -  if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) -    { -      zlog_err("%s(%s): cannot make socket: %s", -	       __func__,addr.sun_path, safe_strerror(errno)); -      return -1; -    } - -  if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) -    { -      zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", -	       __func__, addr.sun_path, sock); -      close(sock); -      return -1; -    } - -  if (connect (sock, (struct sockaddr *) &addr, len) < 0) -    { -      if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) -	{ -	  if (gs.loglevel > LOG_DEBUG) -	    zlog_debug("%s(%s): connect failed: %s", -		       __func__,addr.sun_path, safe_strerror(errno)); -	  close (sock); -	  return -1; +	len = sizeof(addr.sun_family) + strlen(addr.sun_path); +#endif				/* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */ + +	/* Quick check to see if we might succeed before we go to the trouble +	   of creating a socket. */ +	if (access(addr.sun_path, W_OK) < 0) { +		if (errno != ENOENT) +			zlog_err("%s: access to socket %s denied: %s", +				 dmn->name, addr.sun_path, +				 safe_strerror(errno)); +		return -1; +	} + +	if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { +		zlog_err("%s(%s): cannot make socket: %s", +			 __func__, addr.sun_path, safe_strerror(errno)); +		return -1; +	} + +	if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) { +		zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", +			 __func__, addr.sun_path, sock); +		close(sock); +		return -1;  	} -      if (gs.loglevel > LOG_DEBUG) -	zlog_debug("%s: connection in progress",dmn->name); -      dmn->state = DAEMON_CONNECTING; -      dmn->fd = sock; -      dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd); -      dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn, -				       gs.timeout); -      SET_READ_HANDLER(dmn); -      return 0; -    } - -  dmn->fd = sock; -  SET_READ_HANDLER(dmn); -  daemon_up(dmn,"connect succeeded"); -  return 1; + +	if (connect(sock, (struct sockaddr *)&addr, len) < 0) { +		if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { +			if (gs.loglevel > LOG_DEBUG) +				zlog_debug("%s(%s): connect failed: %s", +					   __func__, addr.sun_path, +					   safe_strerror(errno)); +			close(sock); +			return -1; +		} +		if (gs.loglevel > LOG_DEBUG) +			zlog_debug("%s: connection in progress", dmn->name); +		dmn->state = DAEMON_CONNECTING; +		dmn->fd = sock; +		dmn->t_write = +		    thread_add_write(master, check_connect, dmn, dmn->fd); +		dmn->t_wakeup = +		    thread_add_timer(master, wakeup_connect_hanging, dmn, +				     gs.timeout); +		SET_READ_HANDLER(dmn); +		return 0; +	} + +	dmn->fd = sock; +	SET_READ_HANDLER(dmn); +	daemon_up(dmn, "connect succeeded"); +	return 1;  } -static int -phase_hanging(struct thread *t_hanging) +static int phase_hanging(struct thread *t_hanging)  { -  gs.t_phase_hanging = NULL; -  zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart", -           phase_str[gs.phase],PHASE_TIMEOUT); -  gs.phase = PHASE_NONE; -  return 0; +	gs.t_phase_hanging = NULL; +	zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart", +		 phase_str[gs.phase], PHASE_TIMEOUT); +	gs.phase = PHASE_NONE; +	return 0;  } -static void -set_phase(restart_phase_t new_phase) +static void set_phase(restart_phase_t new_phase)  { -  gs.phase = new_phase; -  if (gs.t_phase_hanging) -    thread_cancel(gs.t_phase_hanging); -  gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL, -  					PHASE_TIMEOUT); +	gs.phase = new_phase; +	if (gs.t_phase_hanging) +		thread_cancel(gs.t_phase_hanging); +	gs.t_phase_hanging = thread_add_timer(master, phase_hanging, NULL, +					      PHASE_TIMEOUT);  } -static void -phase_check(void) +static void phase_check(void)  { -  switch (gs.phase) -    { -    case PHASE_NONE: -      break; -    case PHASE_STOPS_PENDING: -      if (gs.numpids) -	break; -      zlog_info("Phased restart: all routing daemon stop jobs have completed."); -      set_phase(PHASE_WAITING_DOWN); -      /*FALLTHRU*/ -    case PHASE_WAITING_DOWN: -      if (gs.numdown+IS_UP(gs.special) < gs.numdaemons) -        break; -      zlog_info("Phased restart: all routing daemons now down."); -      run_job(&gs.special->restart,"restart",gs.restart_command,1,1); -      set_phase(PHASE_ZEBRA_RESTART_PENDING); -      /*FALLTHRU*/ -    case PHASE_ZEBRA_RESTART_PENDING: -      if (gs.special->restart.pid) -	break; -      zlog_info("Phased restart: %s restart job completed.",gs.special->name); -      set_phase(PHASE_WAITING_ZEBRA_UP); -      /*FALLTHRU*/ -    case PHASE_WAITING_ZEBRA_UP: -      if (!IS_UP(gs.special)) -        break; -      zlog_info("Phased restart: %s is now up.",gs.special->name); -      { -        struct daemon *dmn; -	for (dmn = gs.daemons; dmn; dmn = dmn->next) -	  { -	    if (dmn != gs.special) -	      run_job(&dmn->restart,"start",gs.start_command,1,0); -	  } -      } -      gs.phase = PHASE_NONE; -      THREAD_OFF(gs.t_phase_hanging); -      zlog_notice("Phased global restart has completed."); -      break; -    } +	switch (gs.phase) { +	case PHASE_NONE: +		break; +	case PHASE_STOPS_PENDING: +		if (gs.numpids) +			break; +		zlog_info +		    ("Phased restart: all routing daemon stop jobs have completed."); +		set_phase(PHASE_WAITING_DOWN); + +		/*FALLTHRU*/ +	case PHASE_WAITING_DOWN: +		if (gs.numdown + IS_UP(gs.special) < gs.numdaemons) +			break; +		zlog_info("Phased restart: all routing daemons now down."); +		run_job(&gs.special->restart, "restart", gs.restart_command, 1, +			1); +		set_phase(PHASE_ZEBRA_RESTART_PENDING); + +		/*FALLTHRU*/ +	case PHASE_ZEBRA_RESTART_PENDING: +		if (gs.special->restart.pid) +			break; +		zlog_info("Phased restart: %s restart job completed.", +			  gs.special->name); +		set_phase(PHASE_WAITING_ZEBRA_UP); + +		/*FALLTHRU*/ +	case PHASE_WAITING_ZEBRA_UP: +		if (!IS_UP(gs.special)) +			break; +		zlog_info("Phased restart: %s is now up.", gs.special->name); +		{ +			struct daemon *dmn; +			for (dmn = gs.daemons; dmn; dmn = dmn->next) { +				if (dmn != gs.special) +					run_job(&dmn->restart, "start", +						gs.start_command, 1, 0); +			} +		} +		gs.phase = PHASE_NONE; +		THREAD_OFF(gs.t_phase_hanging); +		zlog_notice("Phased global restart has completed."); +		break; +	}  } -static void -try_restart(struct daemon *dmn) +static void try_restart(struct daemon *dmn)  { -  switch (gs.mode) -  { -  case MODE_MONITOR: -    return; -  case MODE_GLOBAL_RESTART: -    run_job(&gs.restart,"restart",gs.restart_command,0,1); -    break; -  case MODE_SEPARATE_RESTART: -    run_job(&dmn->restart,"restart",gs.restart_command,0,1); -    break; -  case MODE_PHASED_ZEBRA_RESTART: -    if (dmn != gs.special) -      { -        if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE)) -	  run_job(&dmn->restart,"restart",gs.restart_command,0,1); -	else -	  zlog_debug("%s: postponing restart attempt because master %s daemon " -		     "not up [%s], or phased restart in progress", -		     dmn->name,gs.special->name,state_str[gs.special->state]); -	break; -      } -    /*FALLTHRU*/ -  case MODE_PHASED_ALL_RESTART: -    if ((gs.phase != PHASE_NONE) || gs.numpids) -      { -	if (gs.loglevel > LOG_DEBUG+1) -	  zlog_debug("postponing phased global restart: restart already in " -		     "progress [%s], or outstanding child processes [%d]", -		     phase_str[gs.phase],gs.numpids); -        break; -      } -    /* Is it too soon for a restart? */ -    { -      struct timeval delay; -      if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec < -      	  gs.special->restart.interval) -	{ -	  if (gs.loglevel > LOG_DEBUG+1) -	    zlog_debug("postponing phased global restart: " -		       "elapsed time %ld < retry interval %ld", -		       (long)delay.tv_sec,gs.special->restart.interval); -	  break; +	switch (gs.mode) { +	case MODE_MONITOR: +		return; +	case MODE_GLOBAL_RESTART: +		run_job(&gs.restart, "restart", gs.restart_command, 0, 1); +		break; +	case MODE_SEPARATE_RESTART: +		run_job(&dmn->restart, "restart", gs.restart_command, 0, 1); +		break; +	case MODE_PHASED_ZEBRA_RESTART: +		if (dmn != gs.special) { +			if ((gs.special->state == DAEMON_UP) +			    && (gs.phase == PHASE_NONE)) +				run_job(&dmn->restart, "restart", +					gs.restart_command, 0, 1); +			else +				zlog_debug +				    ("%s: postponing restart attempt because master %s daemon " +				     "not up [%s], or phased restart in progress", +				     dmn->name, gs.special->name, +				     state_str[gs.special->state]); +			break; +		} + +		/*FALLTHRU*/ +	case MODE_PHASED_ALL_RESTART: +		if ((gs.phase != PHASE_NONE) || gs.numpids) { +			if (gs.loglevel > LOG_DEBUG + 1) +				zlog_debug +				    ("postponing phased global restart: restart already in " +				     "progress [%s], or outstanding child processes [%d]", +				     phase_str[gs.phase], gs.numpids); +			break; +		} +		/* Is it too soon for a restart? */ +		{ +			struct timeval delay; +			if (time_elapsed(&delay, &gs.special->restart.time)-> +			    tv_sec < gs.special->restart.interval) { +				if (gs.loglevel > LOG_DEBUG + 1) +					zlog_debug +					    ("postponing phased global restart: " +					     "elapsed time %ld < retry interval %ld", +					     (long)delay.tv_sec, +					     gs.special->restart.interval); +				break; +			} +		} +		run_job(&gs.restart, "restart", gs.restart_command, 0, 1); +		break; +	default: +		zlog_err("error: unknown restart mode %d", gs.mode); +		break;  	} -    } -    run_job(&gs.restart,"restart",gs.restart_command,0,1); -    break; -  default: -    zlog_err("error: unknown restart mode %d",gs.mode); -    break; -  }  } -static int -wakeup_unresponsive(struct thread *t_wakeup) +static int wakeup_unresponsive(struct thread *t_wakeup)  { -  struct daemon *dmn = THREAD_ARG(t_wakeup); - -  dmn->t_wakeup = NULL; -  if (dmn->state != DAEMON_UNRESPONSIVE) -    zlog_err("%s: no longer unresponsive (now %s), " -	     "wakeup should have been cancelled!", -	     dmn->name,state_str[dmn->state]); -  else -    { -      SET_WAKEUP_UNRESPONSIVE(dmn); -      try_restart(dmn); -    } -  return 0; +	struct daemon *dmn = THREAD_ARG(t_wakeup); + +	dmn->t_wakeup = NULL; +	if (dmn->state != DAEMON_UNRESPONSIVE) +		zlog_err("%s: no longer unresponsive (now %s), " +			 "wakeup should have been cancelled!", +			 dmn->name, state_str[dmn->state]); +	else { +		SET_WAKEUP_UNRESPONSIVE(dmn); +		try_restart(dmn); +	} +	return 0;  } -static int -wakeup_no_answer(struct thread *t_wakeup) +static int wakeup_no_answer(struct thread *t_wakeup)  { -  struct daemon *dmn = THREAD_ARG(t_wakeup); - -  dmn->t_wakeup = NULL; -  dmn->state = DAEMON_UNRESPONSIVE; -  zlog_err("%s state -> unresponsive : no response yet to ping " -	   "sent %ld seconds ago",dmn->name,gs.timeout); -  if (gs.unresponsive_restart) -    { -      SET_WAKEUP_UNRESPONSIVE(dmn); -      try_restart(dmn); -    } -  return 0; +	struct daemon *dmn = THREAD_ARG(t_wakeup); + +	dmn->t_wakeup = NULL; +	dmn->state = DAEMON_UNRESPONSIVE; +	zlog_err("%s state -> unresponsive : no response yet to ping " +		 "sent %ld seconds ago", dmn->name, gs.timeout); +	if (gs.unresponsive_restart) { +		SET_WAKEUP_UNRESPONSIVE(dmn); +		try_restart(dmn); +	} +	return 0;  } -static int -wakeup_send_echo(struct thread *t_wakeup) +static int wakeup_send_echo(struct thread *t_wakeup)  { -  static const char echocmd[] = "echo " PING_TOKEN; -  ssize_t rc; -  struct daemon *dmn = THREAD_ARG(t_wakeup); - -  dmn->t_wakeup = NULL; -  if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) || -      ((size_t)rc != sizeof(echocmd))) -    { -      char why[100+sizeof(echocmd)]; -      snprintf(why,sizeof(why),"write '%s' returned %d instead of %u", -               echocmd,(int)rc,(u_int)sizeof(echocmd)); -      daemon_down(dmn,why); -    } -  else -    { -      gettimeofday(&dmn->echo_sent,NULL); -      dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout); -    } -  return 0; +	static const char echocmd[] = "echo " PING_TOKEN; +	ssize_t rc; +	struct daemon *dmn = THREAD_ARG(t_wakeup); + +	dmn->t_wakeup = NULL; +	if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0) || +	    ((size_t) rc != sizeof(echocmd))) { +		char why[100 + sizeof(echocmd)]; +		snprintf(why, sizeof(why), +			 "write '%s' returned %d instead of %u", echocmd, +			 (int)rc, (u_int) sizeof(echocmd)); +		daemon_down(dmn, why); +	} else { +		gettimeofday(&dmn->echo_sent, NULL); +		dmn->t_wakeup = +		    thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout); +	} +	return 0;  } -static void -sigint(void) +static void sigint(void)  { -  zlog_notice("Terminating on signal"); -  systemd_send_stopping (); -  exit(0); +	zlog_notice("Terminating on signal"); +	systemd_send_stopping(); +	exit(0);  } -static int -valid_command(const char *cmd) +static int valid_command(const char *cmd)  { -  char *p; +	char *p; -  return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%'); +	return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's') +	    && !strchr(p + 1, '%');  }  /* This is an ugly hack to circumvent problems with passing command-line     arguments that contain spaces.  The fix is to use a configuration file. */ -static char * -translate_blanks(const char *cmd, const char *blankstr) +static char *translate_blanks(const char *cmd, const char *blankstr)  { -  char *res; -  char *p; -  size_t bslen = strlen(blankstr); - -  if (!(res = strdup(cmd))) -    { -      perror("strdup"); -      exit(1); -    } -  while ((p = strstr(res,blankstr)) != NULL) -    { -      *p = ' '; -      if (bslen != 1) -	 memmove(p+1,p+bslen,strlen(p+bslen)+1); -    } -  return res; +	char *res; +	char *p; +	size_t bslen = strlen(blankstr); + +	if (!(res = strdup(cmd))) { +		perror("strdup"); +		exit(1); +	} +	while ((p = strstr(res, blankstr)) != NULL) { +		*p = ' '; +		if (bslen != 1) +			memmove(p + 1, p + bslen, strlen(p + bslen) + 1); +	} +	return res;  } -struct zebra_privs_t watchfrr_privs = -{ +struct zebra_privs_t watchfrr_privs = {  #ifdef VTY_GROUP -  .vty_group = VTY_GROUP, +	.vty_group = VTY_GROUP,  #endif  }; -int -main(int argc, char **argv) +int main(int argc, char **argv)  { -  const char *progname; -  int opt; -  int daemon_mode = 0; -  const char *pidfile = DEFAULT_PIDFILE; -  const char *special = "zebra"; -  const char *blankstr = NULL; -  static struct quagga_signal_t my_signals[] = -  { -    { -      .signal = SIGINT, -      .handler = sigint, -    }, -    { -      .signal = SIGTERM, -      .handler = sigint, -    }, -    { -      .signal = SIGCHLD, -      .handler = sigchild, -    }, -  }; - -  if ((progname = strrchr (argv[0], '/')) != NULL) -    progname++; -  else -    progname = argv[0]; - -  gs.restart.name = "all"; -  while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh", -			    longopts, 0)) != EOF) -    { -      switch (opt) -        { -	case 0: -	  break; -        case 'a': -	  if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART)) -	    { -	      fputs("Ambiguous operating mode selected.\n",stderr); -	      return usage(progname,1); -	    } -	  gs.mode = MODE_PHASED_ZEBRA_RESTART; -	  break; -        case 'A': -	  if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART)) -	    { -	      fputs("Ambiguous operating mode selected.\n",stderr); -	      return usage(progname,1); -	    } -	  gs.mode = MODE_PHASED_ALL_RESTART; -	  break; -	case 'b': -	  blankstr = optarg; -	  break; -        case 'd': -	  daemon_mode = 1; -	  break; -        case 'e': -	  gs.do_ping = 0; -	  break; -        case 'k': -	  if (!valid_command(optarg)) -	  { -	    fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n", -		    optarg); -	    return usage(progname,1); -	  } -	  gs.stop_command = optarg; -	  break; -	case 'l': -	  { -	    char garbage[3]; -	    if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) || -	        (gs.loglevel < LOG_EMERG)) -	      { -	        fprintf(stderr,"Invalid loglevel argument: %s\n",optarg); -		return usage(progname,1); -	      } -	  } -	  break; -	case 'm': -	  { -	    char garbage[3]; -	    if ((sscanf(optarg,"%ld%1s", -	    		&gs.min_restart_interval,garbage) != 1) || -	        (gs.min_restart_interval < 0)) -	      { -	        fprintf(stderr,"Invalid min_restart_interval argument: %s\n", -		        optarg); -		return usage(progname,1); -	      } -	  } -	  break; -	case 'M': -	  { -	    char garbage[3]; -	    if ((sscanf(optarg,"%ld%1s", -	    		&gs.max_restart_interval,garbage) != 1) || -	        (gs.max_restart_interval < 0)) -	      { -	        fprintf(stderr,"Invalid max_restart_interval argument: %s\n", -		        optarg); -		return usage(progname,1); -	      } -	  } -	  break; -	case 'i': -	  { -	    char garbage[3]; -	    int period; -	    if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) || -	        (gs.period < 1)) -	      { -	        fprintf(stderr,"Invalid interval argument: %s\n",optarg); -		return usage(progname,1); -	      } -	    gs.period = 1000*period; -	  } -	  break; -        case 'p': -	  pidfile = optarg; -	  break; -        case 'r': -	  if ((gs.mode == MODE_GLOBAL_RESTART) || -	      (gs.mode == MODE_SEPARATE_RESTART)) -	    { -	      fputs("Ambiguous operating mode selected.\n",stderr); -	      return usage(progname,1); -	    } -	  if (!valid_command(optarg)) -	  { -	    fprintf(stderr, -		    "Invalid restart command, must contain '%%s': %s\n", -		    optarg); -	    return usage(progname,1); -	  } -	  gs.restart_command = optarg; -	  if (gs.mode == MODE_MONITOR) -	    gs.mode = MODE_SEPARATE_RESTART; -	  break; -        case 'R': -	  if (gs.mode != MODE_MONITOR) -	    { -	      fputs("Ambiguous operating mode selected.\n",stderr); -	      return usage(progname,1); -	    } -	  if (strchr(optarg,'%')) -	    { -	      fprintf(stderr, -		      "Invalid restart-all arg, must not contain '%%s': %s\n", -		      optarg); -	      return usage(progname,1); -	    } -	  gs.restart_command = optarg; -	  gs.mode = MODE_GLOBAL_RESTART; -	  break; -        case 's': -	  if (!valid_command(optarg)) -	  { -	    fprintf(stderr,"Invalid start command, must contain '%%s': %s\n", -		    optarg); -	    return usage(progname,1); -	  } -	  gs.start_command = optarg; -	  break; -	case 'S': -	  gs.vtydir = optarg; -	  break; -	case 't': -	  { -	    char garbage[3]; -	    if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) || -	        (gs.timeout < 1)) -	      { -	        fprintf(stderr,"Invalid timeout argument: %s\n",optarg); -		return usage(progname,1); -	      } -	  } -	  break; -	case 'T': -	  { -	    char garbage[3]; -	    if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) || -	        (gs.restart_timeout < 1)) -	      { -	        fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg); -		return usage(progname,1); -	      } -	  } -	  break; -        case 'z': -	  gs.unresponsive_restart = 1; -	  break; -	case 'v': -	  printf ("%s version %s\n", progname, FRR_VERSION); -	  puts("Copyright 2004 Andrew J. Schorr"); -	  return 0; -        case 'h': -	  return usage(progname,0); -        default: -	  fputs("Invalid option.\n",stderr); -	  return usage(progname,1); -        } -    } -   -  if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR)) -    { -      fputs("Option -z requires a -r or -R restart option.\n",stderr); -      return usage(progname,1); -    } -  switch (gs.mode) -    { -    case MODE_MONITOR: -      if (gs.restart_command || gs.start_command || gs.stop_command) -        { -	  fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n", -		  mode_str[gs.mode]); -	  return usage(progname,1); +	const char *progname; +	int opt; +	int daemon_mode = 0; +	const char *pidfile = DEFAULT_PIDFILE; +	const char *special = "zebra"; +	const char *blankstr = NULL; +	static struct quagga_signal_t my_signals[] = { +		{ +			.signal = SIGINT, +			.handler = sigint, +		}, +		{ +			.signal = SIGTERM, +			.handler = sigint, +		}, +		{ +			.signal = SIGCHLD, +			.handler = sigchild, +		}, +	}; + +	if ((progname = strrchr(argv[0], '/')) != NULL) +		progname++; +	else +		progname = argv[0]; + +	gs.restart.name = "all"; +	while ((opt = +		getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh", +			    longopts, 0)) != EOF) { +		switch (opt) { +		case 0: +			break; +		case 'a': +			if ((gs.mode != MODE_MONITOR) +			    && (gs.mode != MODE_SEPARATE_RESTART)) { +				fputs("Ambiguous operating mode selected.\n", +				      stderr); +				return usage(progname, 1); +			} +			gs.mode = MODE_PHASED_ZEBRA_RESTART; +			break; +		case 'A': +			if ((gs.mode != MODE_MONITOR) +			    && (gs.mode != MODE_SEPARATE_RESTART)) { +				fputs("Ambiguous operating mode selected.\n", +				      stderr); +				return usage(progname, 1); +			} +			gs.mode = MODE_PHASED_ALL_RESTART; +			break; +		case 'b': +			blankstr = optarg; +			break; +		case 'd': +			daemon_mode = 1; +			break; +		case 'e': +			gs.do_ping = 0; +			break; +		case 'k': +			if (!valid_command(optarg)) { +				fprintf(stderr, +					"Invalid kill command, must contain '%%s': %s\n", +					optarg); +				return usage(progname, 1); +			} +			gs.stop_command = optarg; +			break; +		case 'l': +			{ +				char garbage[3]; +				if ((sscanf +				     (optarg, "%d%1s", &gs.loglevel, +				      garbage) != 1) +				    || (gs.loglevel < LOG_EMERG)) { +					fprintf(stderr, +						"Invalid loglevel argument: %s\n", +						optarg); +					return usage(progname, 1); +				} +			} +			break; +		case 'm': +			{ +				char garbage[3]; +				if ((sscanf(optarg, "%ld%1s", +					    &gs.min_restart_interval, +					    garbage) != 1) +				    || (gs.min_restart_interval < 0)) { +					fprintf(stderr, +						"Invalid min_restart_interval argument: %s\n", +						optarg); +					return usage(progname, 1); +				} +			} +			break; +		case 'M': +			{ +				char garbage[3]; +				if ((sscanf(optarg, "%ld%1s", +					    &gs.max_restart_interval, +					    garbage) != 1) +				    || (gs.max_restart_interval < 0)) { +					fprintf(stderr, +						"Invalid max_restart_interval argument: %s\n", +						optarg); +					return usage(progname, 1); +				} +			} +			break; +		case 'i': +			{ +				char garbage[3]; +				int period; +				if ((sscanf(optarg, "%d%1s", &period, garbage) +				     != 1) || (gs.period < 1)) { +					fprintf(stderr, +						"Invalid interval argument: %s\n", +						optarg); +					return usage(progname, 1); +				} +				gs.period = 1000 * period; +			} +			break; +		case 'p': +			pidfile = optarg; +			break; +		case 'r': +			if ((gs.mode == MODE_GLOBAL_RESTART) || +			    (gs.mode == MODE_SEPARATE_RESTART)) { +				fputs("Ambiguous operating mode selected.\n", +				      stderr); +				return usage(progname, 1); +			} +			if (!valid_command(optarg)) { +				fprintf(stderr, +					"Invalid restart command, must contain '%%s': %s\n", +					optarg); +				return usage(progname, 1); +			} +			gs.restart_command = optarg; +			if (gs.mode == MODE_MONITOR) +				gs.mode = MODE_SEPARATE_RESTART; +			break; +		case 'R': +			if (gs.mode != MODE_MONITOR) { +				fputs("Ambiguous operating mode selected.\n", +				      stderr); +				return usage(progname, 1); +			} +			if (strchr(optarg, '%')) { +				fprintf(stderr, +					"Invalid restart-all arg, must not contain '%%s': %s\n", +					optarg); +				return usage(progname, 1); +			} +			gs.restart_command = optarg; +			gs.mode = MODE_GLOBAL_RESTART; +			break; +		case 's': +			if (!valid_command(optarg)) { +				fprintf(stderr, +					"Invalid start command, must contain '%%s': %s\n", +					optarg); +				return usage(progname, 1); +			} +			gs.start_command = optarg; +			break; +		case 'S': +			gs.vtydir = optarg; +			break; +		case 't': +			{ +				char garbage[3]; +				if ((sscanf +				     (optarg, "%ld%1s", &gs.timeout, +				      garbage) != 1) || (gs.timeout < 1)) { +					fprintf(stderr, +						"Invalid timeout argument: %s\n", +						optarg); +					return usage(progname, 1); +				} +			} +			break; +		case 'T': +			{ +				char garbage[3]; +				if ((sscanf +				     (optarg, "%ld%1s", &gs.restart_timeout, +				      garbage) != 1) +				    || (gs.restart_timeout < 1)) { +					fprintf(stderr, +						"Invalid restart timeout argument: %s\n", +						optarg); +					return usage(progname, 1); +				} +			} +			break; +		case 'z': +			gs.unresponsive_restart = 1; +			break; +		case 'v': +			printf("%s version %s\n", progname, FRR_VERSION); +			puts("Copyright 2004 Andrew J. Schorr"); +			return 0; +		case 'h': +			return usage(progname, 0); +		default: +			fputs("Invalid option.\n", stderr); +			return usage(progname, 1); +		}  	} -      break; -    case MODE_GLOBAL_RESTART: -    case MODE_SEPARATE_RESTART: -      if (!gs.restart_command || gs.start_command || gs.stop_command) -        { -	  fprintf(stderr,"No start/kill commands needed in [%s] mode.\n", -		  mode_str[gs.mode]); -	  return usage(progname,1); + +	if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR)) { +		fputs("Option -z requires a -r or -R restart option.\n", +		      stderr); +		return usage(progname, 1);  	} -      break; -    case MODE_PHASED_ZEBRA_RESTART: -    case MODE_PHASED_ALL_RESTART: -      if (!gs.restart_command || !gs.start_command || !gs.stop_command) -        { -	  fprintf(stderr, -	  	  "Need start, kill, and restart commands in [%s] mode.\n", -		  mode_str[gs.mode]); -	  return usage(progname,1); +	switch (gs.mode) { +	case MODE_MONITOR: +		if (gs.restart_command || gs.start_command || gs.stop_command) { +			fprintf(stderr, +				"No kill/(re)start commands needed for %s mode.\n", +				mode_str[gs.mode]); +			return usage(progname, 1); +		} +		break; +	case MODE_GLOBAL_RESTART: +	case MODE_SEPARATE_RESTART: +		if (!gs.restart_command || gs.start_command || gs.stop_command) { +			fprintf(stderr, +				"No start/kill commands needed in [%s] mode.\n", +				mode_str[gs.mode]); +			return usage(progname, 1); +		} +		break; +	case MODE_PHASED_ZEBRA_RESTART: +	case MODE_PHASED_ALL_RESTART: +		if (!gs.restart_command || !gs.start_command +		    || !gs.stop_command) { +			fprintf(stderr, +				"Need start, kill, and restart commands in [%s] mode.\n", +				mode_str[gs.mode]); +			return usage(progname, 1); +		} +		break;  	} -      break; -    } - -  if (blankstr) -    { -      if (gs.restart_command) -        gs.restart_command = translate_blanks(gs.restart_command,blankstr); -      if (gs.start_command) -        gs.start_command = translate_blanks(gs.start_command,blankstr); -      if (gs.stop_command) -        gs.stop_command = translate_blanks(gs.stop_command,blankstr); -    } -       -  gs.restart.interval = gs.min_restart_interval; - -  zprivs_init (&watchfrr_privs); - -  master = thread_master_create(); -  cmd_init(-1); -  memory_init(); -  vty_init(master); -  watchfrr_vty_init(); -  vty_serv_sock(NULL, 0, WATCHFRR_VTYSH_PATH); - -  signal_init (master, array_size(my_signals), my_signals); -  srandom(time(NULL)); - -  { -    int i; -    struct daemon *tail = NULL; - -    for (i = optind; i < argc; i++) -      { -	struct daemon *dmn; - -	if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn)))) -	  { -	    fprintf(stderr,"calloc(1,%u) failed: %s\n", -		    (u_int)sizeof(*dmn), safe_strerror(errno)); -	    return 1; -	  } -	dmn->name = dmn->restart.name = argv[i]; -	dmn->state = DAEMON_INIT; -	gs.numdaemons++; -	gs.numdown++; -	dmn->fd = -1; -	dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn, -					      100+(random() % 900)); -	dmn->restart.interval = gs.min_restart_interval; -	if (tail) -	  tail->next = dmn; -	else -	  gs.daemons = dmn; -	tail = dmn; -	if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) || -	     (gs.mode == MODE_PHASED_ALL_RESTART)) && -	    !strcmp(dmn->name,special)) -	  gs.special = dmn; -      } -  } -  if (!gs.daemons) -    { -      fputs("Must specify one or more daemons to monitor.\n",stderr); -      return usage(progname,1); -    } -  if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) || -      (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special) -    { -      fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n", -	      mode_str[gs.mode],special); -      return usage(progname,1); -    } - -  zlog_default = openzlog(progname, ZLOG_WATCHFRR, 0, -			  LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON); -  zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED); -  if (daemon_mode) -    { -      zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG)); -      if (daemon (0, 0) < 0) -	{ -	  fprintf(stderr, "Watchfrr daemon failed: %s", strerror(errno)); -	  exit (1); +	if (blankstr) { +		if (gs.restart_command) +			gs.restart_command = +			    translate_blanks(gs.restart_command, blankstr); +		if (gs.start_command) +			gs.start_command = +			    translate_blanks(gs.start_command, blankstr); +		if (gs.stop_command) +			gs.stop_command = +			    translate_blanks(gs.stop_command, blankstr);  	} -    } -  else -    zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG)); -  /* Make sure we're not already running. */ -  pid_output (pidfile); +	gs.restart.interval = gs.min_restart_interval; -  /* Announce which daemons are being monitored. */ -  { -    struct daemon *dmn; -    size_t len = 0; +	zprivs_init(&watchfrr_privs); -    for (dmn = gs.daemons; dmn; dmn = dmn->next) -      len += strlen(dmn->name)+1; +	master = thread_master_create(); +	cmd_init(-1); +	memory_init(); +	vty_init(master); +	watchfrr_vty_init(); +	vty_serv_sock(NULL, 0, WATCHFRR_VTYSH_PATH); -    { -      char buf[len+1]; -      char *p = buf; +	signal_init(master, array_size(my_signals), my_signals); +	srandom(time(NULL)); -      for (dmn = gs.daemons; dmn; dmn = dmn->next)  	{ -	  if (p != buf) -	    *p++ = ' '; -	  strcpy(p,dmn->name); -	  p += strlen(p); +		int i; +		struct daemon *tail = NULL; + +		for (i = optind; i < argc; i++) { +			struct daemon *dmn; + +			if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) { +				fprintf(stderr, "calloc(1,%u) failed: %s\n", +					(u_int) sizeof(*dmn), +					safe_strerror(errno)); +				return 1; +			} +			dmn->name = dmn->restart.name = argv[i]; +			dmn->state = DAEMON_INIT; +			gs.numdaemons++; +			gs.numdown++; +			dmn->fd = -1; +			dmn->t_wakeup = +			    thread_add_timer_msec(master, wakeup_init, dmn, +						  100 + (random() % 900)); +			dmn->restart.interval = gs.min_restart_interval; +			if (tail) +				tail->next = dmn; +			else +				gs.daemons = dmn; +			tail = dmn; + +			if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) || +			     (gs.mode == MODE_PHASED_ALL_RESTART)) && +			    !strcmp(dmn->name, special)) +				gs.special = dmn; +		} +	} +	if (!gs.daemons) { +		fputs("Must specify one or more daemons to monitor.\n", stderr); +		return usage(progname, 1); +	} +	if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) || +	     (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special) { +		fprintf(stderr, +			"In mode [%s], but cannot find master daemon %s\n", +			mode_str[gs.mode], special); +		return usage(progname, 1);  	} -      zlog_notice("%s %s watching [%s], mode [%s]", -		  progname, FRR_VERSION, buf, mode_str[gs.mode]); -    } -  } -  { -    struct thread thread; +	zlog_default = openzlog(progname, ZLOG_WATCHFRR, 0, +				LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON); +	zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED); +	if (daemon_mode) { +		zlog_set_level(NULL, ZLOG_DEST_SYSLOG, +			       MIN(gs.loglevel, LOG_DEBUG)); +		if (daemon(0, 0) < 0) { +			fprintf(stderr, "Watchfrr daemon failed: %s", +				strerror(errno)); +			exit(1); +		} +	} else +		zlog_set_level(NULL, ZLOG_DEST_STDOUT, +			       MIN(gs.loglevel, LOG_DEBUG)); + +	/* Make sure we're not already running. */ +	pid_output(pidfile); + +	/* Announce which daemons are being monitored. */ +	{ +		struct daemon *dmn; +		size_t len = 0; + +		for (dmn = gs.daemons; dmn; dmn = dmn->next) +			len += strlen(dmn->name) + 1; + +		{ +			char buf[len + 1]; +			char *p = buf; + +			for (dmn = gs.daemons; dmn; dmn = dmn->next) { +				if (p != buf) +					*p++ = ' '; +				strcpy(p, dmn->name); +				p += strlen(p); +			} +			zlog_notice("%s %s watching [%s], mode [%s]", +				    progname, FRR_VERSION, buf, +				    mode_str[gs.mode]); +		} +	} -    while (thread_fetch (master, &thread)) -      thread_call (&thread); -  } +	{ +		struct thread thread; + +		while (thread_fetch(master, &thread)) +			thread_call(&thread); +	} -  systemd_send_stopping (); -  /* Not reached. */ -  return 0; +	systemd_send_stopping(); +	/* Not reached. */ +	return 0;  } diff --git a/watchfrr/watchfrr.h b/watchfrr/watchfrr.h index 4a479c72e6..719ad4dfd8 100644 --- a/watchfrr/watchfrr.h +++ b/watchfrr/watchfrr.h @@ -26,4 +26,4 @@ extern void watchfrr_vty_init(void);  extern pid_t integrated_write_pid;  extern void integrated_write_sigchld(int status); -#endif /* FRR_WATCHFRR_H */ +#endif				/* FRR_WATCHFRR_H */ diff --git a/watchfrr/watchfrr_vty.c b/watchfrr/watchfrr_vty.c index 4fffb020d7..bf3e1510a7 100644 --- a/watchfrr/watchfrr_vty.c +++ b/watchfrr/watchfrr_vty.c @@ -31,18 +31,18 @@  pid_t integrated_write_pid;  static int integrated_result_fd; -DEFUN (config_write_integrated, -	config_write_integrated_cmd, -	"write integrated", -	"Write running configuration to memory, network, or terminal\n" -	"Write integrated all-daemon Frr.conf file\n") +DEFUN(config_write_integrated, +      config_write_integrated_cmd, +      "write integrated", +      "Write running configuration to memory, network, or terminal\n" +      "Write integrated all-daemon Frr.conf file\n")  {  	pid_t child;  	sigset_t oldmask, sigmask;  	if (integrated_write_pid != -1) {  		vty_out(vty, "%% configuration write already in progress.%s", -				VTY_NEWLINE); +			VTY_NEWLINE);  		return CMD_WARNING;  	} @@ -61,20 +61,20 @@ DEFUN (config_write_integrated,  	child = fork();  	if (child == -1) {  		vty_out(vty, "%% configuration write fork() failed: %s.%s", -				safe_strerror(errno), VTY_NEWLINE); +			safe_strerror(errno), VTY_NEWLINE);  		sigprocmask(SIG_SETMASK, &oldmask, NULL);  		return CMD_WARNING;  	}  	if (child != 0) { -	/* note: the VTY won't write a command return value to vtysh;  the -	 * session temporarily enters an intentional "hang" state.  This is -	 * to make sure latency in vtysh doing the config write (several -	 * seconds is not rare to see) does not interfere with watchfrr's -	 * supervisor job. -	 * -	 * The fd is duplicated here so we don't need to hold a vty pointer -	 * (which could become invalid in the meantime). -	 */ +		/* note: the VTY won't write a command return value to vtysh;  the +		 * session temporarily enters an intentional "hang" state.  This is +		 * to make sure latency in vtysh doing the config write (several +		 * seconds is not rare to see) does not interfere with watchfrr's +		 * supervisor job. +		 * +		 * The fd is duplicated here so we don't need to hold a vty pointer +		 * (which could become invalid in the meantime). +		 */  		integrated_write_pid = child;  		integrated_result_fd = dup(vty->wfd);  		sigprocmask(SIG_SETMASK, &oldmask, NULL); @@ -93,7 +93,7 @@ DEFUN (config_write_integrated,  	/* unbuffered write; we just messed with stdout... */  	char msg[512];  	snprintf(msg, sizeof(msg), "error executing %s: %s\n", -			VTYSH_BIN_PATH, safe_strerror(errno)); +		 VTYSH_BIN_PATH, safe_strerror(errno));  	write(1, msg, strlen(msg));  	exit(1);  } @@ -104,11 +104,11 @@ void integrated_write_sigchld(int status)  	if (WIFEXITED(status)) {  		zlog_info("configuration write completed with exit code %d", -				WEXITSTATUS(status)); +			  WEXITSTATUS(status));  		reply[3] = WEXITSTATUS(status);  	} else if (WIFSIGNALED(status)) {  		zlog_warn("configuration write terminated by signal %d", -				WTERMSIG(status)); +			  WTERMSIG(status));  	} else {  		zlog_warn("configuration write terminated");  	}  | 
