SUBDIRS = lib qpb fpm @ZEBRA@ @LIBRFP@ @RFPTEST@ \
@BGPD@ @RIPD@ @RIPNGD@ @OSPFD@ @OSPF6D@ @LDPD@ \
- @ISISD@ @PIMD@ @WATCHQUAGGA@ @VTYSH@ @OSPFCLIENT@ @DOC@ m4 @pkgsrcdir@ \
+ @ISISD@ @PIMD@ @WATCHFRR@ @VTYSH@ @OSPFCLIENT@ @DOC@ m4 @pkgsrcdir@ \
redhat @SOLARIS@ tests tools cumulus
DIST_SUBDIRS = lib qpb fpm zebra bgpd ripd ripngd ospfd ospf6d ldpd \
- isisd watchquagga vtysh ospfclient doc m4 pkgsrc redhat tests \
+ isisd watchfrr vtysh ospfclient doc m4 pkgsrc redhat tests \
solaris pimd @LIBRFP@ @RFPTEST@ tools cumulus
EXTRA_DIST = aclocal.m4 SERVICES REPORTING-BUGS \
AS_HELP_STRING([--disable-ospf6d], [do not build ospf6d]))
AC_ARG_ENABLE(ldpd,
AS_HELP_STRING([--enable-ldpd], [build ldpd]))
-AC_ARG_ENABLE(watchquagga,
- AS_HELP_STRING([--disable-watchquagga], [do not build watchquagga]))
+AC_ARG_ENABLE(watchfrr,
+ AS_HELP_STRING([--disable-watchfrr], [do not build watchfrr]))
AC_ARG_ENABLE(isisd,
AS_HELP_STRING([--disable-isisd], [do not build isisd]))
AC_ARG_ENABLE(pimd,
fi
AM_CONDITIONAL(LDPD, test "x$LDPD" = "xldpd")
-if test "${enable_watchquagga}" = "no";then
- WATCHQUAGGA=""
+if test "${enable_watchfrr}" = "no";then
+ WATCHFRR=""
else
- WATCHQUAGGA="watchquagga"
+ WATCHFRR="watchfrr"
fi
-AM_CONDITIONAL(WATCHQUAGGA, test "x$WATCHQUAGGA" = "xwatchquagga")
+AM_CONDITIONAL(WATCHFRR, test "x$WATCHFRR" = "xwatchfrr")
OSPFCLIENT=""
if test "${enable_ospfapi}" != "no";then
AC_SUBST(OSPFD)
AC_SUBST(OSPF6D)
AC_SUBST(LDPD)
-AC_SUBST(WATCHQUAGGA)
+AC_SUBST(WATCHFRR)
AC_SUBST(ISISD)
AC_SUBST(PIMD)
AC_SUBST(SOLARIS)
AC_DEFINE_UNQUOTED(LDPD_SOCKET, "$frr_statedir/ldpd.sock",ldpd control socket)
AC_DEFINE_UNQUOTED(PATH_ISISD_PID, "$frr_statedir/isisd.pid",isisd PID)
AC_DEFINE_UNQUOTED(PATH_PIMD_PID, "$frr_statedir/pimd.pid",pimd PID)
-AC_DEFINE_UNQUOTED(PATH_WATCHQUAGGA_PID, "$frr_statedir/watchquagga.pid",watchquagga PID)
+AC_DEFINE_UNQUOTED(PATH_WATCHFRR_PID, "$frr_statedir/watchfrr.pid",watchfrr PID)
AC_DEFINE_UNQUOTED(ZEBRA_SERV_PATH, "$frr_statedir/zserv.api",zebra api socket)
AC_DEFINE_UNQUOTED(ZEBRA_VTYSH_PATH, "$frr_statedir/zebra.vty",zebra vty socket)
AC_DEFINE_UNQUOTED(RIP_VTYSH_PATH, "$frr_statedir/ripd.vty",rip vty socket)
AC_DEFINE_UNQUOTED(LDP_VTYSH_PATH, "$frr_statedir/ldpd.vty",ldpd vty socket)
AC_DEFINE_UNQUOTED(ISIS_VTYSH_PATH, "$frr_statedir/isisd.vty",isisd vty socket)
AC_DEFINE_UNQUOTED(PIM_VTYSH_PATH, "$frr_statedir/pimd.vty",pimd vty socket)
-AC_DEFINE_UNQUOTED(WATCHQUAGGA_VTYSH_PATH, "$frr_statedir/watchquagga.vty",watchquagga vty socket)
+AC_DEFINE_UNQUOTED(WATCHFRR_VTYSH_PATH, "$frr_statedir/watchfrr.vty",watchfrr vty socket)
AC_DEFINE_UNQUOTED(DAEMON_VTY_DIR, "$frr_statedir",daemon vty directory)
dnl autoconf does this, but it does it too late...
AC_MSG_RESULT($ac_cv_htonl_works)
AC_CONFIG_FILES([Makefile lib/Makefile qpb/Makefile zebra/Makefile ripd/Makefile
- ripngd/Makefile bgpd/Makefile ospfd/Makefile watchquagga/Makefile
+ ripngd/Makefile bgpd/Makefile ospfd/Makefile watchfrr/Makefile
ospf6d/Makefile ldpd/Makefile isisd/Makefile vtysh/Makefile
doc/Makefile ospfclient/Makefile tests/Makefile m4/Makefile
pimd/Makefile
doc/ripngd.8
doc/pimd.8
doc/vtysh.1
- doc/watchquagga.8
+ doc/watchfrr.8
doc/zebra.8
doc/quagga.1
pkgsrc/bgpd.sh pkgsrc/ospf6d.sh pkgsrc/ospfd.sh
# When using "vtysh" such a config file is also needed. It should be owned by
# group "quaggavty" and set to ug=rw,o= though. Check /etc/pam.d/quagga, too.
#
-# The watchquagga daemon is always started. Per default in monitoring-only but
+# The watchfrr daemon is always started. Per default in monitoring-only but
# that can be changed via /etc/quagga/debian.conf.
#
zebra=no
pimd_options=" --daemon -A 127.0.0.1"
# The list of daemons to watch is automatically generated by the init script.
-watchquagga_enable=yes
-watchquagga_options=(-adz -r /usr/sbin/servicebBquaggabBrestartbB%s -s /usr/sbin/servicebBquaggabBstartbB%s -k /usr/sbin/servicebBquaggabBstopbB%s -b bB -t 30)
+watchfrr_enable=yes
+watchfrr_options=(-adz -r /usr/sbin/servicebBquaggabBrestartbB%s -s /usr/sbin/servicebBquaggabBstartbB%s -k /usr/sbin/servicebBquaggabBstopbB%s -b bB -t 30)
usr/share/man/man8/ripngd.8
usr/share/man/man8/zebra.8
usr/share/man/man8/isisd.8
-usr/share/man/man8/watchquagga.8
+usr/share/man/man8/watchfrr.8
usr/share/snmp/mibs/
cumulus/etc/* etc/
tools/*.service lib/systemd/system
doc/vtysh.1
doc/zebra.8
doc/isisd.8
-doc/watchquagga.8
+doc/watchfrr.8
-check process watchquagga with pidfile /var/run/quagga/watchquagga.pid
- start program = "/etc/init.d/quagga start watchquagga" with timeout 120 seconds
- stop program = "/etc/init.d/quagga stop watchquagga"
+check process watchfrr with pidfile /var/run/quagga/watchfrr.pid
+ start program = "/etc/init.d/quagga start watchfrr" with timeout 120 seconds
+ stop program = "/etc/init.d/quagga stop watchfrr"
if 3 restarts within 10 cycles then timeout
man_MANS += vtysh.1
endif
-if WATCHQUAGGA
-man_MANS += watchquagga.8
+if WATCHFRR
+man_MANS += watchfrr.8
endif
if ZEBRA
ripngd.8.in \
pimd.8.in \
vtysh.1.in \
- watchquagga.8.in \
+ watchfrr.8.in \
zebra.8.in \
quagga.1.in \
\
@end quotation
Since the @command{vtysh} command may be running as ordinary user on the
-system, configuration writes will be tried through @command{watchquagga},
+system, configuration writes will be tried through @command{watchfrr},
using the @command{write integrated} command internally. Since
-@command{watchquagga} is running as superuser, @command{vtysh} is able to
+@command{watchfrr} is running as superuser, @command{vtysh} is able to
ensure correct ownership and permissions on @file{Quagga.conf}.
-If @command{watchquagga} is not running or the configuration write fails,
+If @command{watchfrr} is not running or the configuration write fails,
@command{vtysh} will attempt to directly write to the file. This is likely
to fail if running as unprivileged user; alternatively it may leave the
file with incorrect owner or permissions.
Unconditionally (regardless of @command{service integrated-vtysh-config}
setting) write out integrated @file{Quagga.conf} file through
-@command{watchquagga}. If @command{watchquagga} is not running, this command
+@command{watchfrr}. If @command{watchfrr} is not running, this command
is unavailable.
@end deffn
--- /dev/null
+.\" This file was originally generated by help2man 1.36.
+.TH WATCHFRR 8 "July 2010"
+.SH NAME
+watchfrr \- a program to monitor the status of frr daemons
+.SH SYNOPSIS
+.B watchfrr
+.RI [ option ...]
+.IR daemon ...
+.br
+.B watchfrr
+.BR \-h " | " \-v
+.SH DESCRIPTION
+.B watchfrr
+is a watchdog program that monitors the status of supplied frr
+.IR daemon s
+and tries to restart them in case they become unresponsive or shut down.
+.PP
+To determine whether a daemon is running, it tries to connect to the
+daemon's VTY UNIX stream socket, and send echo commands to ensure the
+daemon responds. When the daemon crashes, EOF is received from the socket,
+so that watchfrr can react immediately.
+.PP
+This program can run in one of the following 5 modes:
+.TP
+.B Mode 0: monitor
+In this mode, the program serves as a monitor and reports status changes.
+.IP
+Example usage: watchfrr \-d zebra ospfd bgpd
+.TP
+.B Mode 1: global restart
+In this mode, whenever a daemon hangs or crashes, the given command is used
+to restart all watched daemons.
+.IP
+Example usage: watchfrr \-dz \e
+.br
+-R '/sbin/service zebra restart; /sbin/service ospfd restart' \e
+.br
+zebra ospfd
+.TP
+.B Mode 2: individual daemon restart
+In this mode, whenever a single daemon hangs or crashes, the given command
+is used to restart this daemon only.
+.IP
+Example usage: watchfrr \-dz \-r '/sbin/service %s restart' \e
+.br
+zebra ospfd bgpd
+.TP
+.B Mode 3: phased zebra restart
+In this mode, whenever a single daemon hangs or crashes, the given command
+is used to restart this daemon only. The only exception is the zebra
+daemon; in this case, the following steps are taken: (1) all other daemons
+are stopped, (2) zebra is restarted, and (3) other daemons are started
+again.
+.IP
+Example usage: watchfrr \-adz \-r '/sbin/service %s restart' \e
+.br
+\-s '/sbin/service %s start' \e
+.br
+\-k '/sbin/service %s stop' zebra ospfd bgpd
+.TP
+.B Mode 4: phased global restart for any failure
+In this mode, whenever a single daemon hangs or crashes, the following
+steps are taken: (1) all other daemons are stopped, (2) zebra is restarted,
+and (3) other daemons are started again.
+.IP
+Example usage: watchfrr \-Adz \-r '/sbin/service %s restart' \e
+.br
+\-s '/sbin/service %s start' \e
+.br
+\-k '/sbin/service %s stop' zebra ospfd bgpd
+.PP
+Important: It is believed that mode 2 (individual daemon restart) is not
+safe, and mode 3 (phased zebra restart) may not be safe with certain
+routing daemons.
+.PP
+In order to avoid restarting the daemons in quick succession, you can
+supply the
+.B \-m
+and
+.B \-M
+options to set the minimum and maximum delay between the restart commands.
+The minimum restart delay is recalculated each time a restart is attempted.
+If the time since the last restart attempt exceeds twice the value of
+.BR \-M ,
+the restart delay is set to the value of
+.BR \-m ,
+otherwise the interval is doubled (but capped at the value of
+.BR \-M ).
+.SH OPTIONS
+.TP
+.BR \-d ", " \-\-daemon
+Run in daemon mode. When supplied, error messages are sent to Syslog
+instead of standard output (stdout).
+.TP
+.BI \-S " directory" "\fR, \fB\-\-statedir " directory
+Set the VTY socket
+.I directory
+(the default value is "/var/run/frr").
+.TP
+.BR \-e ", " \-\-no\-echo
+Do not ping the daemons to test whether they respond. This option is
+necessary if one or more daemons do not support the echo command.
+.TP
+.BI \-l " level" "\fR, \fB\-\-loglevel " level
+Set the logging
+.I level
+(the default value is "6"). The value should range from 0 (LOG_EMERG) to 7
+(LOG_DEBUG), but higher number can be supplied if extra debugging messages
+are required.
+.TP
+.BI \-m " number" "\fR, \fB\-\-min\-restart\-interval " number
+Set the minimum
+.I number
+of seconds to wait between invocations of the daemon restart commands (the
+default value is "60").
+.TP
+.BI \-M " number" "\fR, \fB\-\-max\-restart\-interval " number
+Set the maximum
+.I number
+of seconds to wait between invocations of the daemon restart commands (the
+default value is "600").
+.TP
+.BI \-i " number" "\fR, \fB\-\-interval " number
+Set the status polling interval in seconds (the default value is "5").
+.TP
+.BI \-t " number" "\fR, \fB\-\-timeout " number
+Set the unresponsiveness timeout in seconds (the default value is "10").
+.TP
+.BI \-T " number" "\fR, \fB\-\-restart\-timeout " number
+Set the restart (kill) timeout in seconds (the default value is "20"). If
+any background jobs are still running after this period has elapsed, they
+will be killed.
+.TP
+.BI \-r " command" "\fR, \fB\-\-restart " command
+Supply a Bourne shell
+.I command
+to restart a single daemon. The command string should contain the '%s'
+placeholder to be substituted with the daemon name.
+.IP
+Note that
+.B \-r
+and
+.B \-R
+options are not compatible.
+.TP
+.BI \-s " command" "\fR, \fB\-\-start\-command " command
+Supply a Bourne shell
+.I command
+to start a single daemon. The command string should contain the '%s'
+placeholder to be substituted with the daemon name.
+.TP
+.BI \-k " command" "\fR, \fB\-\-kill\-command " command
+Supply a Bourne shell
+.I command
+to stop a single daemon. The command string should contain the '%s'
+placeholder to be substituted with the daemon name.
+.TP
+.BR \-R ", " \-\-restart\-all
+When one or more daemons are shut down, try to restart them using the
+Bourne shell command supplied on the command line.
+.IP
+Note that
+.B \-r
+and
+.B \-R
+options are not compatible.
+.TP
+.BR \-z ", " \-\-unresponsive\-restart
+When a daemon is in an unresponsive state, treat it as being shut down for
+the restart purposes.
+.TP
+.BR \-a ", " \-\-all\-restart
+When zebra hangs or crashes, restart all daemons taking the following
+steps: (1) stop all other daemons, (2) restart zebra, and (3) start other
+daemons again.
+.IP
+Note that this option also requires
+.BR \-r ,
+.BR \-s ,
+and
+.B \-k
+options to be specified.
+.TP
+.BR \-A ", " \-\-always\-all\-restart
+When any daemon (i.e., not just zebra) hangs or crashes, restart all
+daemons taking the following steps: (1) stop all other daemons, (2) restart
+zebra, and (3) start other daemons again.
+.IP
+Note that this option also requires
+.BR \-r ,
+.BR \-s ,
+and
+.B \-k
+options to be specified.
+.TP
+.BI \-p " filename" "\fR, \fB\-\-pid\-file " filename
+Set the process identifier
+.I filename
+(the default value is "/var/run/frr/watchfrr.pid").
+.TP
+.BI \-b " string" "\fR, \fB\-\-blank\-string " string
+When the supplied
+.I string
+is found in any of the command line option arguments (i.e.,
+.BR \-r ,
+.BR \-s ,
+.BR \-k ,
+or
+.BR \-R ),
+replace it with a space.
+.IP
+This is an ugly hack to circumvent problems with passing the command line
+arguments containing embedded spaces.
+.TP
+.BR \-v ", " \-\-version
+Display the version information and exit.
+.TP
+.BR \-h ", " \-\-help
+Display the usage information and exit.
+.SH SEE ALSO
+.BR zebra (8),
+.BR bgpd (8),
+.BR isisd (8),
+.BR ospfd (8),
+.BR ospf6d (8),
+.BR ripd (8),
+.BR ripngd (8)
+.PP
+See the project homepage at <@PACKAGE_URL@>.
+.SH AUTHORS
+Copyright 2004 Andrew J. Schorr
+++ /dev/null
-.\" This file was originally generated by help2man 1.36.
-.TH WATCHQUAGGA 8 "July 2010"
-.SH NAME
-watchquagga \- a program to monitor the status of quagga daemons
-.SH SYNOPSIS
-.B watchquagga
-.RI [ option ...]
-.IR daemon ...
-.br
-.B watchquagga
-.BR \-h " | " \-v
-.SH DESCRIPTION
-.B watchquagga
-is a watchdog program that monitors the status of supplied quagga
-.IR daemon s
-and tries to restart them in case they become unresponsive or shut down.
-.PP
-To determine whether a daemon is running, it tries to connect to the
-daemon's VTY UNIX stream socket, and send echo commands to ensure the
-daemon responds. When the daemon crashes, EOF is received from the socket,
-so that watchquagga can react immediately.
-.PP
-This program can run in one of the following 5 modes:
-.TP
-.B Mode 0: monitor
-In this mode, the program serves as a monitor and reports status changes.
-.IP
-Example usage: watchquagga \-d zebra ospfd bgpd
-.TP
-.B Mode 1: global restart
-In this mode, whenever a daemon hangs or crashes, the given command is used
-to restart all watched daemons.
-.IP
-Example usage: watchquagga \-dz \e
-.br
--R '/sbin/service zebra restart; /sbin/service ospfd restart' \e
-.br
-zebra ospfd
-.TP
-.B Mode 2: individual daemon restart
-In this mode, whenever a single daemon hangs or crashes, the given command
-is used to restart this daemon only.
-.IP
-Example usage: watchquagga \-dz \-r '/sbin/service %s restart' \e
-.br
-zebra ospfd bgpd
-.TP
-.B Mode 3: phased zebra restart
-In this mode, whenever a single daemon hangs or crashes, the given command
-is used to restart this daemon only. The only exception is the zebra
-daemon; in this case, the following steps are taken: (1) all other daemons
-are stopped, (2) zebra is restarted, and (3) other daemons are started
-again.
-.IP
-Example usage: watchquagga \-adz \-r '/sbin/service %s restart' \e
-.br
-\-s '/sbin/service %s start' \e
-.br
-\-k '/sbin/service %s stop' zebra ospfd bgpd
-.TP
-.B Mode 4: phased global restart for any failure
-In this mode, whenever a single daemon hangs or crashes, the following
-steps are taken: (1) all other daemons are stopped, (2) zebra is restarted,
-and (3) other daemons are started again.
-.IP
-Example usage: watchquagga \-Adz \-r '/sbin/service %s restart' \e
-.br
-\-s '/sbin/service %s start' \e
-.br
-\-k '/sbin/service %s stop' zebra ospfd bgpd
-.PP
-Important: It is believed that mode 2 (individual daemon restart) is not
-safe, and mode 3 (phased zebra restart) may not be safe with certain
-routing daemons.
-.PP
-In order to avoid restarting the daemons in quick succession, you can
-supply the
-.B \-m
-and
-.B \-M
-options to set the minimum and maximum delay between the restart commands.
-The minimum restart delay is recalculated each time a restart is attempted.
-If the time since the last restart attempt exceeds twice the value of
-.BR \-M ,
-the restart delay is set to the value of
-.BR \-m ,
-otherwise the interval is doubled (but capped at the value of
-.BR \-M ).
-.SH OPTIONS
-.TP
-.BR \-d ", " \-\-daemon
-Run in daemon mode. When supplied, error messages are sent to Syslog
-instead of standard output (stdout).
-.TP
-.BI \-S " directory" "\fR, \fB\-\-statedir " directory
-Set the VTY socket
-.I directory
-(the default value is "/var/run/quagga").
-.TP
-.BR \-e ", " \-\-no\-echo
-Do not ping the daemons to test whether they respond. This option is
-necessary if one or more daemons do not support the echo command.
-.TP
-.BI \-l " level" "\fR, \fB\-\-loglevel " level
-Set the logging
-.I level
-(the default value is "6"). The value should range from 0 (LOG_EMERG) to 7
-(LOG_DEBUG), but higher number can be supplied if extra debugging messages
-are required.
-.TP
-.BI \-m " number" "\fR, \fB\-\-min\-restart\-interval " number
-Set the minimum
-.I number
-of seconds to wait between invocations of the daemon restart commands (the
-default value is "60").
-.TP
-.BI \-M " number" "\fR, \fB\-\-max\-restart\-interval " number
-Set the maximum
-.I number
-of seconds to wait between invocations of the daemon restart commands (the
-default value is "600").
-.TP
-.BI \-i " number" "\fR, \fB\-\-interval " number
-Set the status polling interval in seconds (the default value is "5").
-.TP
-.BI \-t " number" "\fR, \fB\-\-timeout " number
-Set the unresponsiveness timeout in seconds (the default value is "10").
-.TP
-.BI \-T " number" "\fR, \fB\-\-restart\-timeout " number
-Set the restart (kill) timeout in seconds (the default value is "20"). If
-any background jobs are still running after this period has elapsed, they
-will be killed.
-.TP
-.BI \-r " command" "\fR, \fB\-\-restart " command
-Supply a Bourne shell
-.I command
-to restart a single daemon. The command string should contain the '%s'
-placeholder to be substituted with the daemon name.
-.IP
-Note that
-.B \-r
-and
-.B \-R
-options are not compatible.
-.TP
-.BI \-s " command" "\fR, \fB\-\-start\-command " command
-Supply a Bourne shell
-.I command
-to start a single daemon. The command string should contain the '%s'
-placeholder to be substituted with the daemon name.
-.TP
-.BI \-k " command" "\fR, \fB\-\-kill\-command " command
-Supply a Bourne shell
-.I command
-to stop a single daemon. The command string should contain the '%s'
-placeholder to be substituted with the daemon name.
-.TP
-.BR \-R ", " \-\-restart\-all
-When one or more daemons are shut down, try to restart them using the
-Bourne shell command supplied on the command line.
-.IP
-Note that
-.B \-r
-and
-.B \-R
-options are not compatible.
-.TP
-.BR \-z ", " \-\-unresponsive\-restart
-When a daemon is in an unresponsive state, treat it as being shut down for
-the restart purposes.
-.TP
-.BR \-a ", " \-\-all\-restart
-When zebra hangs or crashes, restart all daemons taking the following
-steps: (1) stop all other daemons, (2) restart zebra, and (3) start other
-daemons again.
-.IP
-Note that this option also requires
-.BR \-r ,
-.BR \-s ,
-and
-.B \-k
-options to be specified.
-.TP
-.BR \-A ", " \-\-always\-all\-restart
-When any daemon (i.e., not just zebra) hangs or crashes, restart all
-daemons taking the following steps: (1) stop all other daemons, (2) restart
-zebra, and (3) start other daemons again.
-.IP
-Note that this option also requires
-.BR \-r ,
-.BR \-s ,
-and
-.B \-k
-options to be specified.
-.TP
-.BI \-p " filename" "\fR, \fB\-\-pid\-file " filename
-Set the process identifier
-.I filename
-(the default value is "/var/run/quagga/watchquagga.pid").
-.TP
-.BI \-b " string" "\fR, \fB\-\-blank\-string " string
-When the supplied
-.I string
-is found in any of the command line option arguments (i.e.,
-.BR \-r ,
-.BR \-s ,
-.BR \-k ,
-or
-.BR \-R ),
-replace it with a space.
-.IP
-This is an ugly hack to circumvent problems with passing the command line
-arguments containing embedded spaces.
-.TP
-.BR \-v ", " \-\-version
-Display the version information and exit.
-.TP
-.BR \-h ", " \-\-help
-Display the usage information and exit.
-.SH SEE ALSO
-.BR zebra (8),
-.BR bgpd (8),
-.BR isisd (8),
-.BR ospfd (8),
-.BR ospf6d (8),
-.BR ripd (8),
-.BR ripngd (8)
-.PP
-See the project homepage at <@PACKAGE_URL@>.
-.SH AUTHORS
-Copyright 2004 Andrew J. Schorr
*
* terminal = 0 -- vtysh / no logging, no config control
* terminal = 1 -- normal daemon
- * terminal = -1 -- watchquagga / no logging, but minimal config control */
+ * terminal = -1 -- watchfrr / no logging, but minimal config control */
void
cmd_init (int terminal)
{
"ISIS",
"PIM",
"RFP",
- "WATCHQUAGGA",
+ "WATCHFRR",
NULL,
};
ZLOG_ISIS,
ZLOG_PIM,
ZLOG_RFP,
- ZLOG_WATCHQUAGGA,
+ ZLOG_WATCHFRR,
} zlog_proto_t;
/* If maxlvl is set to ZLOG_DISABLED, then no messages will be sent
if (ret == CMD_SUSPEND)
break;
- /* warning: watchquagga hardcodes this result write */
+ /* warning: watchfrr hardcodes this result write */
header[3] = ret;
buffer_put(vty->obuf, header, 4);
ospf6d.init ospfd.init ldpd.init \
quagga.logrotate quagga.pam quagga.spec \
quagga.sysconfig ripd.init ripngd.init \
- watchquagga.init pimd.init zebra.init \
+ watchfrr.init pimd.init zebra.init \
README.rpm_build.md
%{!?quagga_user: %global quagga_user quagga }
%{!?vty_group: %global vty_group quaggavt }
%{!?with_fpm: %global with_fpm 0 }
- %{!?with_watchquagga: %global with_watchquagga 1 }
+ %{!?with_watchfrr: %global with_watchfrr 1 }
6. Build the RPM
chkconfig bgpd on
... etc
-2. If you want to run `watchquagga`, then configure `/etc/sysconfig/quagga`
- and uncomment the line with the daemons for `watchquagga` to monitor,
- then enable watchquagga
+2. If you want to run `watchfrr`, then configure `/etc/sysconfig/quagga`
+ and uncomment the line with the daemons for `watchfrr` to monitor,
+ then enable watchfrr
- chkconfig watchquagga on
+ chkconfig watchfrr on
3. Check your firewall / IPtables to make sure the routing protocols are
allowed.
systemctl enable bgpd
... etc
- Note: There is no watchquagga on systemd based systems. Systemd contains
+ Note: There is no watchfrr on systemd based systems. Systemd contains
the functionality of monitoring and restarting daemons.
2. Check your firewall / IPtables to make sure the routing protocols are
%{!?quagga_user: %global quagga_user quagga }
%{!?vty_group: %global vty_group quaggavty }
%{!?with_fpm: %global with_fpm 0 }
-%{!?with_watchquagga: %global with_watchquagga 1 }
+%{!?with_watchfrr: %global with_watchfrr 1 }
%{!?with_bgp_vnc: %global with_bgp_vnc 0 }
# path defines
# Check for init.d (upstart) as used in CentOS 6 or systemd (ie CentOS 7)
%{expand: %%global initsystem %(if [[ `/sbin/init --version 2> /dev/null` =~ upstart ]]; then echo upstart; elif [[ `systemctl` =~ -\.mount ]]; then echo systemd; fi)}
#
-# If init system is systemd, then always disable watchquagga
+# If init system is systemd, then always disable watchfrr
#
%if "%{initsystem}" == "systemd"
- # Note: For systems with systemd, watchquagga will NOT be built. Systemd
+ # Note: For systems with systemd, watchfrr will NOT be built. Systemd
# takes over the role of restarting crashed processes. Value will
# be overwritten with 0 below for systemd independent on the setting here
- %global with_watchquagga 1
+ %global with_watchfrr 1
%endif
# if FPM is enabled, then enable tcp_zebra as well
%define daemon_ldpd ""
%endif
-%if %{with_watchquagga}
-%define daemon_watchquagga watchquagga
+%if %{with_watchfrr}
+%define daemon_watchfrr watchfrr
%else
-%define daemon_watchquagga ""
+%define daemon_watchfrr ""
%endif
-%define all_daemons %{daemon_list} %{daemon_ldpd} %{daemon_watchquagga}
+%define all_daemons %{daemon_list} %{daemon_ldpd} %{daemon_watchfrr}
# allow build dir to be kept
%{!?keep_build: %global keep_build 0 }
%else
--disable-fpm \
%endif
-%if %{with_watchquagga}
- --enable-watchquagga \
+%if %{with_watchfrr}
+ --enable-watchfrr \
%else
- --disable-watchquagga \
+ --disable-watchfrr \
%endif
%if %{with_bgp_vnc}
--enable-bgp-vnc \
%endif
fi
done
-%if %{with_watchquagga}
- # No config for watchquagga - this is part of /etc/sysconfig/quagga
- rm -f %{_sysconfdir}/watchquagga.*
+%if %{with_watchfrr}
+ # No config for watchfrr - this is part of /etc/sysconfig/quagga
+ rm -f %{_sysconfdir}/watchfrr.*
%endif
if [ ! -e %{_sysconfdir}/vtysh.conf ]; then
# Rename restart flags for daemons handled specially.
running_zebra="$restart_zebra"
restart_zebra=no
- %if %{with_watchquagga}
- running_watchquagga="$restart_watchquagga"
- restart_watchquagga=no
+ %if %{with_watchfrr}
+ running_watchfrr="$restart_watchfrr"
+ restart_watchfrr=no
%endif
%if "%{initsystem}" == "systemd"
##
## Systemd Version
##
- # No watchquagga for systemd version
+ # No watchfrr for systemd version
#
# Stop all daemons other than zebra.
for daemon in %all_daemons ; do
##
## init.d Version
##
- %if %{with_watchquagga}
- # Stop watchquagga first.
- [ "$running_watchquagga" = yes ] && \
- /etc/rc.d/init.d/watchquagga stop >/dev/null 2>&1
+ %if %{with_watchfrr}
+ # Stop watchfrr first.
+ [ "$running_watchfrr" = yes ] && \
+ /etc/rc.d/init.d/watchfrr stop >/dev/null 2>&1
%endif
- # Stop all daemons other than zebra and watchquagga.
+ # Stop all daemons other than zebra and watchfrr.
for daemon in %all_daemons ; do
eval restart=\$restart_${daemon}
[ "$restart" = yes ] && \
# Restart zebra.
[ "$running_zebra" = yes ] && \
/etc/rc.d/init.d/zebra restart >/dev/null 2>&1
- # Start all daemons other than zebra and watchquagga.
+ # Start all daemons other than zebra and watchfrr.
for daemon in %all_daemons ; do
eval restart=\$restart_${daemon}
[ "$restart" = yes ] && \
/etc/rc.d/init.d/${daemon} start >/dev/null 2>&1
done
- %if %{with_watchquagga}
- # Start watchquagga last.
- # Avoid postun scriptlet error if watchquagga is not running.
- [ "$running_watchquagga" = yes ] && \
- /etc/rc.d/init.d/watchquagga start >/dev/null 2>&1 || :
- %endif
+ %if %{with_watchfrr}
+ # Start watchfrr last.
+ # Avoid postun scriptlet error if watchfrr is not running.
+ [ "$running_watchfrr" = yes ] && \
+ /etc/rc.d/init.d/watchfrr start >/dev/null 2>&1 || :
+ %endif
%endif
fi
%{_sbindir}/quagga-reload.py
%{_sbindir}/quagga-reload.pyc
%{_sbindir}/quagga-reload.pyo
-%if %{with_watchquagga}
- %{_sbindir}/watchquagga
+%if %{with_watchfrr}
+ %{_sbindir}/watchfrr
%endif
%{_sbindir}/ripngd
%{_sbindir}/ospf6d
%config %{_unitdir}/quagga.service
%else
%config /etc/rc.d/init.d/zebra
- %if %{with_watchquagga}
- %config /etc/rc.d/init.d/watchquagga
+ %if %{with_watchfrr}
+ %config /etc/rc.d/init.d/watchfrr
%endif
%config /etc/rc.d/init.d/ripd
%config /etc/rc.d/init.d/ospfd
- Add conditional logic to only build tex footnotes with supported texi2html
- Added pimd to files section and fix double listing of /var/lib*/quagga
- Numerous fixes to unify upstart/systemd startup into same spec file
-- Only allow use of watchquagga for non-systemd systems. no need with systemd
+- Only allow use of watchfrr for non-systemd systems. no need with systemd
* Fri Sep 4 2015 Paul Jakma <paul@jakma.org>
- buildreq updates
- daemonv6_list should contain only IPv6 daemons
* Wed Dec 22 2004 Andrew J. Schorr <ajschorr@alumni.princeton.edu>
-- watchquagga added
+- watchfrr added
- on upgrade, all daemons should be condrestart'ed
- on removal, all daemons should be stopped
PIMD_OPTS="-A 127.0.0.1"
LDPD_OPTS="-A 127.0.0.1"
-# Watchquagga configuration for LSB initscripts
+# Watchfrr configuration for LSB initscripts
#
# (Not needed with systemd: the service files are configured to automatically
# restart any daemon on failure. If zebra fails, all running daemons will be
# chkconfig: 2345 17 83
### BEGIN INIT INFO
-# Provides: watchquagga
-# Short-Description: Quagga watchdog
-# Description: Quagga watchdog for use with Zebra
+# Provides: watchfrr
+# Short-Description: Frr watchdog
+# Description: Frr watchdog for use with Zebra
### END INIT INFO
# source function library
# Get network config
. /etc/sysconfig/network
-# quagga command line options
+# frr command line options
. /etc/sysconfig/quagga
RETVAL=0
-PROG="watchquagga"
-cmd=watchquagga
-LOCK_FILE=/var/lock/subsys/watchquagga
+PROG="watchfrr"
+cmd=watchfrr
+LOCK_FILE=/var/lock/subsys/watchfrr
case "$1" in
start)
f none @sbindir@/ripngd=$DESTDIR/@sbindir@/ripngd 0755 root bin
f none @sbindir@/ospfd=$DESTDIR/@sbindir@/ospfd 0755 root bin
f none @sbindir@/ospf6d=$DESTDIR/@sbindir@/ospf6d 0755 root bin
-f none @sbindir@/watchquagga=$DESTDIR/@sbindir@/watchquagga 0755 root bin
+f none @sbindir@/watchfrr=$DESTDIR/@sbindir@/watchfrr 0755 root bin
d none @sysconfdir@=$DESTDIR/@sysconfdir@ 0711 @enable_user@ @enable_group@
f none @sysconfdir@/zebra.conf.sample=$DESTDIR/@sysconfdir@/zebra.conf.sample 0644 root bin
f none @sysconfdir@/bgpd.conf.sample=$DESTDIR/@sysconfdir@/bgpd.conf.sample 0644 root bin
# Local Daemon selection may be done by using /etc/quagga/daemons.
# See /usr/share/doc/quagga/README.Debian.gz for further information.
-# Keep zebra first and do not list watchquagga!
+# Keep zebra first and do not list watchfrr!
DAEMONS="zebra bgpd ripd ripngd ospfd ospf6d isisd babeld pimd"
MAX_INSTANCES=5
RELOAD_SCRIPT=/usr/lib/quagga/quagga-reload.py
# vtysh_enable has no config file nor binary so skip check.
# (Not sure why vtysh_enable is in this list but does not hurt)
- if [ $1 != "watchquagga" -a $1 != "vtysh_enable" ]; then
+ if [ $1 != "watchfrr" -a $1 != "vtysh_enable" ]; then
# check for daemon binary
if [ ! -x "$D_PATH/$1" ]; then return 1; fi
start()
{
ulimit -n $MAX_FDS
- if [ "$1" = "watchquagga" ]; then
+ if [ "$1" = "watchfrr" ]; then
- # We may need to restart watchquagga if new daemons are added and/or
+ # We may need to restart watchfrr if new daemons are added and/or
# removed
if started "$1" ; then
- stop watchquagga
+ stop watchfrr
else
- # Echo only once. watchquagga is printed in the stop above
+ # Echo only once. watchfrr is printed in the stop above
echo -n " $1"
fi
- if [ -e /var/run/quagga/watchquagga.started ] ; then
- rm /var/run/quagga/watchquagga.started
+ if [ -e /var/run/quagga/watchfrr.started ] ; then
+ rm /var/run/quagga/watchfrr.started
fi
${SSD} \
--start \
--pidfile=`pidfile $1` \
--exec "$D_PATH/$1" \
-- \
- "${watchquagga_options[@]}"
+ "${watchfrr_options[@]}"
for i in `seq 1 10`;
do
- if [ -e /var/run/quagga/watchquagga.started ] ; then
+ if [ -e /var/run/quagga/watchfrr.started ] ; then
break
else
sleep 1
# Converts values from /etc/quagga/daemons to all-numeric values.
convert_daemon_prios()
{
- for name in $DAEMONS zebra vtysh_enable watchquagga_enable; do
+ for name in $DAEMONS zebra vtysh_enable watchfrr_enable; do
# First, assign the value set by the user to $value
eval value=\${${name}:0:3}
done
}
-# Starts watchquagga for all wanted daemons.
-start_watchquagga()
+# Starts watchfrr for all wanted daemons.
+start_watchfrr()
{
local daemon_name
local daemon_prio
local daemon_inst
# Start the monitor daemon only if desired.
- if [ 0 -eq "$watchquagga_enable" ]; then
+ if [ 0 -eq "$watchfrr_enable" ]; then
return
fi
# Check variable type
- if ! declare -p watchquagga_options | grep -q '^declare \-a'; then
+ if ! declare -p watchfrr_options | grep -q '^declare \-a'; then
echo
- echo "ERROR: The variable watchquagga_options from /etc/quagga/debian.cnf must be a BASH array!"
+ echo "ERROR: The variable watchfrr_options from /etc/quagga/debian.cnf must be a BASH array!"
echo "ERROR: Please convert config file and restart!"
exit 1
fi
eval "inst_disable=\${${daemon_name}_${inst}}"
if [ -z ${inst_disable} ] || [ ${inst_disable} != 0 ]; then
if check_daemon $daemon_name $inst; then
- watchquagga_options+=("${daemon_name}-${inst}")
+ watchfrr_options+=("${daemon_name}-${inst}")
fi
fi
done
else
if check_daemon $daemon_name; then
- watchquagga_options+=($daemon_name)
+ watchfrr_options+=($daemon_name)
fi
fi
found_one=1
# Start if at least one daemon is activated.
if [ $found_one -eq 1 ]; then
echo -n "Starting Quagga monitor daemon:"
- start watchquagga
+ start watchfrr
echo "."
fi
}
-# Stopps watchquagga.
-stop_watchquagga()
+# Stopps watchfrr.
+stop_watchfrr()
{
echo -n "Stopping Quagga monitor daemon:"
- stop watchquagga
+ stop watchfrr
echo "."
}
# Start all daemons
cd $C_PATH/
- if [ "$2" != "watchquagga" ]; then
+ if [ "$2" != "watchfrr" ]; then
start_prio 10 $dmn
fi
- start_watchquagga
+ start_watchfrr
vtysh_b
;;
stop|0)
# Stop all daemons at level '0' or 'stop'
- stop_watchquagga
- if [ "$dmn" != "watchquagga" ]; then
+ stop_watchfrr
+ if [ "$dmn" != "watchfrr" ]; then
[ -n "${dmn}" ] && eval "${dmn/-/_}=0"
stop_prio 0 $dmn
fi
ip route flush proto zebra
else
[ -n "$dmn" ] && eval "${dmn/-/_}=0"
- start_watchquagga
+ start_watchfrr
fi
;;
$(top_srcdir)/zebra/zebra_fpm.c \
$(top_srcdir)/zebra/zebra_ptm.c \
$(top_srcdir)/zebra/zebra_mpls_vty.c \
- $(top_srcdir)/watchquagga/watchquagga_vty.c \
+ $(top_srcdir)/watchfrr/watchfrr_vty.c \
$(BGP_VNC_RFAPI_SRC) $(BGP_VNC_RFP_SRC)
vtysh_cmd.c: $(vtysh_cmd_FILES) extract.pl
{ .fd = -1, .name = "bgpd", .flag = VTYSH_BGPD, .path = BGP_VTYSH_PATH, .next = NULL},
{ .fd = -1, .name = "isisd", .flag = VTYSH_ISISD, .path = ISIS_VTYSH_PATH, .next = NULL},
{ .fd = -1, .name = "pimd", .flag = VTYSH_PIMD, .path = PIM_VTYSH_PATH, .next = NULL},
- { .fd = -1, .name = "watchquagga", .flag = VTYSH_WATCHQUAGGA, .path = WATCHQUAGGA_VTYSH_PATH, .next = NULL},
+ { .fd = -1, .name = "watchfrr", .flag = VTYSH_WATCHFRR, .path = WATCHFRR_VTYSH_PATH, .next = NULL},
};
enum vtysh_write_integrated vtysh_write_integrated = WRITE_INTEGRATED_UNSPECIFIED;
{
ret = CMD_WARNING;
for (i = 0; i < array_size(vtysh_client); i++)
- if (vtysh_client[i].flag == VTYSH_WATCHQUAGGA)
+ if (vtysh_client[i].flag == VTYSH_WATCHFRR)
break;
if (i < array_size(vtysh_client) && vtysh_client[i].fd != -1)
ret = vtysh_client_execute (&vtysh_client[i], "write integrated", stdout);
if (ret != CMD_SUCCESS)
{
printf("\nWarning: attempting direct configuration write without "
- "watchquagga.\nFile permissions and ownership may be "
+ "watchfrr.\nFile permissions and ownership may be "
"incorrect, or write may fail.\n\n");
ret = vtysh_write_config_integrated();
}
#define VTYSH_ISISD 0x40
#define VTYSH_PIMD 0x100
#define VTYSH_LDPD 0x200
-#define VTYSH_WATCHQUAGGA 0x400
+#define VTYSH_WATCHFRR 0x400
/* commands in REALLYALL are crucial to correct vtysh operation */
#define VTYSH_REALLYALL ~0U
-/* watchquagga is not in ALL since library CLI functions should not be
+/* watchfrr is not in ALL since library CLI functions should not be
* run on it (logging & co. should stay in a fixed/frozen config, and
* things like prefix lists are not even initialised) */
#define VTYSH_ALL VTYSH_ZEBRA|VTYSH_RIPD|VTYSH_RIPNGD|VTYSH_OSPFD|VTYSH_OSPF6D|VTYSH_LDPD|VTYSH_BGPD|VTYSH_ISISD|VTYSH_PIMD
--- /dev/null
+Makefile
+Makefile.in
+*.o
+watchfrr
+tags
+TAGS
+.deps
+.nfs*
+*.lo
+*.la
+*.libs
+.arch-inventory
+.arch-ids
+*~
+*.loT
+
--- /dev/null
+## Process this file with Automake to create Makefile.in
+
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib -I$(top_builddir)/lib
+DEFS = @DEFS@ -DSTATEDIR=\"$(localstatedir)/\"
+
+AM_CFLAGS = $(WERROR)
+
+sbin_PROGRAMS = watchfrr
+
+noinst_HEADERS = watchfrr.h
+
+watchfrr_SOURCES = watchfrr.c watchfrr_vty.c
+watchfrr_LDADD = ../lib/libzebra.la @LIBCAP@
--- /dev/null
+/*
+ Monitor status of frr daemons and restart if necessary.
+
+ Copyright (C) 2004 Andrew J. Schorr
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <zebra.h>
+#include <thread.h>
+#include <log.h>
+#include <network.h>
+#include <sigevent.h>
+#include <lib/version.h>
+#include "command.h"
+#include "memory_vty.h"
+
+#include <getopt.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <memory.h>
+#include <systemd.h>
+
+#include "watchfrr.h"
+
+#ifndef MIN
+#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
+#endif
+
+/* Macros to help randomize timers. */
+#define JITTER(X) ((random() % ((X)+1))-((X)/2))
+#define FUZZY(X) ((X)+JITTER((X)/20))
+
+#define DEFAULT_PERIOD 5
+#define DEFAULT_TIMEOUT 10
+#define DEFAULT_RESTART_TIMEOUT 20
+#define DEFAULT_LOGLEVEL LOG_INFO
+#define DEFAULT_MIN_RESTART 60
+#define DEFAULT_MAX_RESTART 600
+#ifdef PATH_WATCHFRR_PID
+#define DEFAULT_PIDFILE PATH_WATCHFRR_PID
+#else
+#define DEFAULT_PIDFILE STATEDIR "/watchfrr.pid"
+#endif
+#ifdef DAEMON_VTY_DIR
+#define VTYDIR DAEMON_VTY_DIR
+#else
+#define VTYDIR STATEDIR
+#endif
+
+#define PING_TOKEN "PING"
+
+/* Needs to be global, referenced somewhere inside libzebra. */
+struct thread_master *master;
+
+typedef enum
+{
+ MODE_MONITOR = 0,
+ MODE_GLOBAL_RESTART,
+ MODE_SEPARATE_RESTART,
+ MODE_PHASED_ZEBRA_RESTART,
+ MODE_PHASED_ALL_RESTART
+} watch_mode_t;
+
+static const char *mode_str[] =
+{
+ "monitor",
+ "global restart",
+ "individual daemon restart",
+ "phased zebra restart",
+ "phased global restart for any failure",
+};
+
+typedef enum
+{
+ PHASE_NONE = 0,
+ PHASE_STOPS_PENDING,
+ PHASE_WAITING_DOWN,
+ PHASE_ZEBRA_RESTART_PENDING,
+ PHASE_WAITING_ZEBRA_UP
+} restart_phase_t;
+
+static const char *phase_str[] =
+{
+ "None",
+ "Stop jobs running",
+ "Waiting for other daemons to come down",
+ "Zebra restart job running",
+ "Waiting for zebra to come up",
+ "Start jobs running",
+};
+
+#define PHASE_TIMEOUT (3*gs.restart_timeout)
+
+struct restart_info
+{
+ const char *name;
+ const char *what;
+ pid_t pid;
+ struct timeval time;
+ long interval;
+ struct thread *t_kill;
+ int kills;
+};
+
+static struct global_state
+{
+ watch_mode_t mode;
+ restart_phase_t phase;
+ struct thread *t_phase_hanging;
+ const char *vtydir;
+ long period;
+ long timeout;
+ long restart_timeout;
+ long min_restart_interval;
+ long max_restart_interval;
+ int do_ping;
+ struct daemon *daemons;
+ const char *restart_command;
+ const char *start_command;
+ const char *stop_command;
+ struct restart_info restart;
+ int unresponsive_restart;
+ int loglevel;
+ struct daemon *special; /* points to zebra when doing phased restart */
+ int numdaemons;
+ int numpids;
+ int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
+} gs = {
+ .mode = MODE_MONITOR,
+ .phase = PHASE_NONE,
+ .vtydir = VTYDIR,
+ .period = 1000*DEFAULT_PERIOD,
+ .timeout = DEFAULT_TIMEOUT,
+ .restart_timeout = DEFAULT_RESTART_TIMEOUT,
+ .loglevel = DEFAULT_LOGLEVEL,
+ .min_restart_interval = DEFAULT_MIN_RESTART,
+ .max_restart_interval = DEFAULT_MAX_RESTART,
+ .do_ping = 1,
+};
+
+typedef enum
+{
+ DAEMON_INIT,
+ DAEMON_DOWN,
+ DAEMON_CONNECTING,
+ DAEMON_UP,
+ DAEMON_UNRESPONSIVE
+} daemon_state_t;
+
+#define IS_UP(DMN) \
+ (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
+
+static const char *state_str[] =
+{
+ "Init",
+ "Down",
+ "Connecting",
+ "Up",
+ "Unresponsive",
+};
+
+struct daemon {
+ const char *name;
+ daemon_state_t state;
+ int fd;
+ struct timeval echo_sent;
+ u_int connect_tries;
+ struct thread *t_wakeup;
+ struct thread *t_read;
+ struct thread *t_write;
+ struct daemon *next;
+ struct restart_info restart;
+};
+
+static const struct option longopts[] =
+{
+ { "daemon", no_argument, NULL, 'd'},
+ { "statedir", required_argument, NULL, 'S'},
+ { "no-echo", no_argument, NULL, 'e'},
+ { "loglevel", required_argument, NULL, 'l'},
+ { "interval", required_argument, NULL, 'i'},
+ { "timeout", required_argument, NULL, 't'},
+ { "restart-timeout", required_argument, NULL, 'T'},
+ { "restart", required_argument, NULL, 'r'},
+ { "start-command", required_argument, NULL, 's'},
+ { "kill-command", required_argument, NULL, 'k'},
+ { "restart-all", required_argument, NULL, 'R'},
+ { "all-restart", no_argument, NULL, 'a'},
+ { "always-all-restart", no_argument, NULL, 'A'},
+ { "unresponsive-restart", no_argument, NULL, 'z'},
+ { "min-restart-interval", required_argument, NULL, 'm'},
+ { "max-restart-interval", required_argument, NULL, 'M'},
+ { "pid-file", required_argument, NULL, 'p'},
+ { "blank-string", required_argument, NULL, 'b'},
+ { "help", no_argument, NULL, 'h'},
+ { "version", no_argument, NULL, 'v'},
+ { NULL, 0, NULL, 0 }
+};
+
+static int try_connect(struct daemon *dmn);
+static int wakeup_send_echo(struct thread *t_wakeup);
+static void try_restart(struct daemon *dmn);
+static void phase_check(void);
+
+static int
+usage(const char *progname, int status)
+{
+ if (status != 0)
+ fprintf(stderr, "Try `%s --help' for more information.\n", progname);
+ else
+ {
+ printf("Usage : %s [OPTION...] <daemon name> ...\n\n\
+Watchdog program to monitor status of frr daemons and try to restart\n\
+them if they are down or unresponsive. It determines whether a daemon is\n\
+up based on whether it can connect to the daemon's vty unix stream socket.\n\
+It then repeatedly sends echo commands over that socket to determine whether\n\
+the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
+on the socket connection and know immediately that the daemon is down.\n\n\
+The daemons to be monitored should be listed on the command line.\n\n\
+This program can run in one of 5 modes:\n\n\
+0. Mode: %s.\n\
+ Just monitor and report on status changes. Example:\n\
+ %s -d zebra ospfd bgpd\n\n\
+1. Mode: %s.\n\
+ Whenever any daemon hangs or crashes, use the given command to restart\n\
+ them all. Example:\n\
+ %s -dz \\\n\
+ -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\
+ zebra ospfd\n\n\
+2. Mode: %s.\n\
+ When any single daemon hangs or crashes, restart only the daemon that's\n\
+ in trouble using the supplied restart command. Example:\n\
+ %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\
+3. Mode: %s.\n\
+ The same as the previous mode, except that there is special treatment when\n\
+ the zebra daemon is in trouble. In that case, a phased restart approach\n\
+ is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\
+ daemons. Example:\n\
+ %s -adz -r '/sbin/service %%s restart' \\\n\
+ -s '/sbin/service %%s start' \\\n\
+ -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
+4. Mode: %s.\n\
+ This is the same as the previous mode, except that the phased restart\n\
+ procedure is used whenever any of the daemons hangs or crashes. Example:\n\
+ %s -Adz -r '/sbin/service %%s restart' \\\n\
+ -s '/sbin/service %%s start' \\\n\
+ -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
+As of this writing, it is believed that mode 2 [%s]\n\
+is not safe, and mode 3 [%s] may not be safe with some of the\n\
+routing daemons.\n\n\
+In order to avoid attempting to restart the daemons in a fast loop,\n\
+the -m and -M options allow you to control the minimum delay between\n\
+restart commands. The minimum restart delay is recalculated each time\n\
+a restart is attempted: if the time since the last restart attempt exceeds\n\
+twice the -M value, then the restart delay is set to the -m value.\n\
+Otherwise, the interval is doubled (but capped at the -M value).\n\n",
+ progname,mode_str[0],progname,mode_str[1],progname,mode_str[2],
+ progname,mode_str[3],progname,mode_str[4],progname,mode_str[2],
+ mode_str[3]);
+
+ printf("Options:\n\
+-d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
+ to syslog instead of stdout.\n\
+-S, --statedir Set the vty socket directory (default is %s)\n\
+-e, --no-echo Do not ping the daemons to test responsiveness (this\n\
+ option is necessary if the daemons do not support the\n\
+ echo command)\n\
+-l, --loglevel Set the logging level (default is %d).\n\
+ The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
+ but it can be set higher than %d if extra-verbose debugging\n\
+ messages are desired.\n\
+-m, --min-restart-interval\n\
+ Set the minimum seconds to wait between invocations of daemon\n\
+ restart commands (default is %d).\n\
+-M, --max-restart-interval\n\
+ Set the maximum seconds to wait between invocations of daemon\n\
+ restart commands (default is %d).\n\
+-i, --interval Set the status polling interval in seconds (default is %d)\n\
+-t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
+-T, --restart-timeout\n\
+ Set the restart (kill) timeout in seconds (default is %d).\n\
+ If any background jobs are still running after this much\n\
+ time has elapsed, they will be killed.\n\
+-r, --restart Supply a Bourne shell command to use to restart a single\n\
+ daemon. The command string should include '%%s' where the\n\
+ name of the daemon should be substituted.\n\
+ Note that -r and -R are incompatible.\n\
+-s, --start-command\n\
+ Supply a Bourne shell to command to use to start a single\n\
+ daemon. The command string should include '%%s' where the\n\
+ name of the daemon should be substituted.\n\
+-k, --kill-command\n\
+ Supply a Bourne shell to command to use to stop a single\n\
+ daemon. The command string should include '%%s' where the\n\
+ name of the daemon should be substituted.\n\
+-R, --restart-all\n\
+ When one or more daemons is down, try to restart everything\n\
+ using the Bourne shell command supplied as the argument.\n\
+ Note that -r and -R are incompatible.\n\
+-z, --unresponsive-restart\n\
+ When a daemon is unresponsive, treat it as being down for\n\
+ restart purposes.\n\
+-a, --all-restart\n\
+ When zebra hangs or crashes, restart all daemons using\n\
+ this phased approach: 1. stop all other daemons; 2. restart\n\
+ zebra; 3. start other daemons. Requires -r, -s, and -k.\n\
+-A, --always-all-restart\n\
+ When any daemon (not just zebra) hangs or crashes, use the\n\
+ same phased restart mechanism described above for -a.\n\
+ Requires -r, -s, and -k.\n\
+-p, --pid-file Set process identifier file name\n\
+ (default is %s).\n\
+-b, --blank-string\n\
+ When the supplied argument string is found in any of the\n\
+ various shell command arguments (-r, -s, -k, or -R), replace\n\
+ it with a space. This is an ugly hack to circumvent problems\n\
+ passing command-line arguments with embedded spaces.\n\
+-v, --version Print program version\n\
+-h, --help Display this help and exit\n",
+ VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG,
+ DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART,
+ DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT,
+ DEFAULT_PIDFILE);
+ }
+
+ return status;
+}
+
+static pid_t
+run_background(char *shell_cmd)
+{
+ pid_t child;
+
+ switch (child = fork())
+ {
+ case -1:
+ zlog_err("fork failed, cannot run command [%s]: %s",
+ shell_cmd,safe_strerror(errno));
+ return -1;
+ case 0:
+ /* Child process. */
+ /* Use separate process group so child processes can be killed easily. */
+ if (setpgid(0,0) < 0)
+ zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno));
+ {
+ char shell[] = "sh";
+ char dashc[] = "-c";
+ char * const argv[4] = { shell, dashc, shell_cmd, NULL};
+ execv("/bin/sh", argv);
+ zlog_err("execv(/bin/sh -c '%s') failed: %s",
+ shell_cmd,safe_strerror(errno));
+ _exit(127);
+ }
+ default:
+ /* Parent process: we will reap the child later. */
+ zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd);
+ return child;
+ }
+}
+
+static struct timeval *
+time_elapsed(struct timeval *result, const struct timeval *start_time)
+{
+ gettimeofday(result,NULL);
+ result->tv_sec -= start_time->tv_sec;
+ result->tv_usec -= start_time->tv_usec;
+ while (result->tv_usec < 0)
+ {
+ result->tv_usec += 1000000L;
+ result->tv_sec--;
+ }
+ return result;
+}
+
+static int
+restart_kill(struct thread *t_kill)
+{
+ struct restart_info *restart = THREAD_ARG(t_kill);
+ struct timeval delay;
+
+ time_elapsed(&delay,&restart->time);
+ zlog_warn("Warning: %s %s child process %d still running after "
+ "%ld seconds, sending signal %d",
+ restart->what,restart->name,(int)restart->pid, (long)delay.tv_sec,
+ (restart->kills ? SIGKILL : SIGTERM));
+ kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM));
+ restart->kills++;
+ restart->t_kill = thread_add_timer(master,restart_kill,restart,
+ gs.restart_timeout);
+ return 0;
+}
+
+static struct restart_info *
+find_child(pid_t child)
+{
+ if (gs.mode == MODE_GLOBAL_RESTART)
+ {
+ if (gs.restart.pid == child)
+ return &gs.restart;
+ }
+ else
+ {
+ struct daemon *dmn;
+ for (dmn = gs.daemons; dmn; dmn = dmn->next)
+ {
+ if (dmn->restart.pid == child)
+ return &dmn->restart;
+ }
+ }
+ return NULL;
+}
+
+static void
+sigchild(void)
+{
+ pid_t child;
+ int status;
+ const char *name;
+ const char *what;
+ struct restart_info *restart;
+
+ switch (child = waitpid(-1,&status,WNOHANG))
+ {
+ case -1:
+ zlog_err("waitpid failed: %s",safe_strerror(errno));
+ return;
+ case 0:
+ zlog_warn("SIGCHLD received, but waitpid did not reap a child");
+ return;
+ }
+
+ if (child == integrated_write_pid)
+ {
+ integrated_write_sigchld(status);
+ return;
+ }
+
+ if ((restart = find_child(child)) != NULL)
+ {
+ name = restart->name;
+ what = restart->what;
+ restart->pid = 0;
+ gs.numpids--;
+ thread_cancel(restart->t_kill);
+ restart->t_kill = NULL;
+ /* Update restart time to reflect the time the command completed. */
+ gettimeofday(&restart->time,NULL);
+ }
+ else
+ {
+ zlog_err("waitpid returned status for an unknown child process %d",
+ (int)child);
+ name = "(unknown)";
+ what = "background";
+ }
+ if (WIFSTOPPED(status))
+ zlog_warn("warning: %s %s process %d is stopped",
+ what,name,(int)child);
+ else if (WIFSIGNALED(status))
+ zlog_warn("%s %s process %d terminated due to signal %d",
+ what,name,(int)child,WTERMSIG(status));
+ else if (WIFEXITED(status))
+ {
+ if (WEXITSTATUS(status) != 0)
+ zlog_warn("%s %s process %d exited with non-zero status %d",
+ what,name,(int)child,WEXITSTATUS(status));
+ else
+ zlog_debug("%s %s process %d exited normally",what,name,(int)child);
+ }
+ else
+ zlog_err("cannot interpret %s %s process %d wait status 0x%x",
+ what,name,(int)child,status);
+ phase_check();
+}
+
+static int
+run_job(struct restart_info *restart, const char *cmdtype, const char *command,
+ int force, int update_interval)
+{
+ struct timeval delay;
+
+ if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("attempting to %s %s",cmdtype,restart->name);
+
+ if (restart->pid)
+ {
+ if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("cannot %s %s, previous pid %d still running",
+ cmdtype,restart->name,(int)restart->pid);
+ return -1;
+ }
+
+ /* Note: time_elapsed test must come before the force test, since we need
+ to make sure that delay is initialized for use below in updating the
+ restart interval. */
+ if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) &&
+ !force)
+ {
+ if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("postponing %s %s: "
+ "elapsed time %ld < retry interval %ld",
+ cmdtype,restart->name,(long)delay.tv_sec,restart->interval);
+ return -1;
+ }
+
+ gettimeofday(&restart->time,NULL);
+ restart->kills = 0;
+ {
+ char cmd[strlen(command)+strlen(restart->name)+1];
+ snprintf(cmd,sizeof(cmd),command,restart->name);
+ if ((restart->pid = run_background(cmd)) > 0)
+ {
+ restart->t_kill = thread_add_timer(master,restart_kill,restart,
+ gs.restart_timeout);
+ restart->what = cmdtype;
+ gs.numpids++;
+ }
+ else
+ restart->pid = 0;
+ }
+
+ /* Calculate the new restart interval. */
+ if (update_interval)
+ {
+ if (delay.tv_sec > 2*gs.max_restart_interval)
+ restart->interval = gs.min_restart_interval;
+ else if ((restart->interval *= 2) > gs.max_restart_interval)
+ restart->interval = gs.max_restart_interval;
+ if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("restart %s interval is now %ld",
+ restart->name,restart->interval);
+ }
+ return restart->pid;
+}
+
+#define SET_READ_HANDLER(DMN) \
+ (DMN)->t_read = thread_add_read(master,handle_read,(DMN),(DMN)->fd)
+
+#define SET_WAKEUP_DOWN(DMN) \
+ (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN), \
+ FUZZY(gs.period))
+
+#define SET_WAKEUP_UNRESPONSIVE(DMN) \
+ (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \
+ FUZZY(gs.period))
+
+#define SET_WAKEUP_ECHO(DMN) \
+ (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \
+ FUZZY(gs.period))
+
+static int
+wakeup_down(struct thread *t_wakeup)
+{
+ struct daemon *dmn = THREAD_ARG(t_wakeup);
+
+ dmn->t_wakeup = NULL;
+ if (try_connect(dmn) < 0)
+ SET_WAKEUP_DOWN(dmn);
+ if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
+ try_restart(dmn);
+ return 0;
+}
+
+static int
+wakeup_init(struct thread *t_wakeup)
+{
+ struct daemon *dmn = THREAD_ARG(t_wakeup);
+
+ dmn->t_wakeup = NULL;
+ if (try_connect(dmn) < 0)
+ {
+ SET_WAKEUP_DOWN(dmn);
+ zlog_err("%s state -> down : initial connection attempt failed",
+ dmn->name);
+ dmn->state = DAEMON_DOWN;
+ }
+ return 0;
+}
+
+static void
+daemon_down(struct daemon *dmn, const char *why)
+{
+ if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
+ zlog_err("%s state -> down : %s",dmn->name,why);
+ else if (gs.loglevel > LOG_DEBUG)
+ zlog_debug("%s still down : %s",dmn->name,why);
+ if (IS_UP(dmn))
+ gs.numdown++;
+ dmn->state = DAEMON_DOWN;
+ if (dmn->fd >= 0)
+ {
+ close(dmn->fd);
+ dmn->fd = -1;
+ }
+ THREAD_OFF(dmn->t_read);
+ THREAD_OFF(dmn->t_write);
+ THREAD_OFF(dmn->t_wakeup);
+ if (try_connect(dmn) < 0)
+ SET_WAKEUP_DOWN(dmn);
+ phase_check();
+}
+
+static int
+handle_read(struct thread *t_read)
+{
+ struct daemon *dmn = THREAD_ARG(t_read);
+ static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n";
+ char buf[sizeof(resp)+100];
+ ssize_t rc;
+ struct timeval delay;
+
+ dmn->t_read = NULL;
+ if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0)
+ {
+ char why[100];
+
+ if (ERRNO_IO_RETRY(errno))
+ {
+ /* Pretend it never happened. */
+ SET_READ_HANDLER(dmn);
+ return 0;
+ }
+ snprintf(why,sizeof(why),"unexpected read error: %s",
+ safe_strerror(errno));
+ daemon_down(dmn,why);
+ return 0;
+ }
+ if (rc == 0)
+ {
+ daemon_down(dmn,"read returned EOF");
+ return 0;
+ }
+ if (!dmn->echo_sent.tv_sec)
+ {
+ char why[sizeof(buf)+100];
+ snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s",
+ (int)rc,(int)rc,buf);
+ daemon_down(dmn,why);
+ return 0;
+ }
+
+ /* We are expecting an echo response: is there any chance that the
+ response would not be returned entirely in the first read? That
+ seems inconceivable... */
+ if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp)))
+ {
+ char why[100+sizeof(buf)];
+ snprintf(why,sizeof(why),"read returned bad echo response of %d bytes "
+ "(expecting %u): %.*s",
+ (int)rc,(u_int)sizeof(resp),(int)rc,buf);
+ daemon_down(dmn,why);
+ return 0;
+ }
+
+ time_elapsed(&delay,&dmn->echo_sent);
+ dmn->echo_sent.tv_sec = 0;
+ if (dmn->state == DAEMON_UNRESPONSIVE)
+ {
+ if (delay.tv_sec < gs.timeout)
+ {
+ dmn->state = DAEMON_UP;
+ zlog_warn("%s state -> up : echo response received after %ld.%06ld "
+ "seconds", dmn->name,
+ (long)delay.tv_sec, (long)delay.tv_usec);
+ }
+ else
+ zlog_warn("%s: slow echo response finally received after %ld.%06ld "
+ "seconds", dmn->name,
+ (long)delay.tv_sec, (long)delay.tv_usec);
+ }
+ else if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("%s: echo response received after %ld.%06ld seconds",
+ dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
+
+ SET_READ_HANDLER(dmn);
+ if (dmn->t_wakeup)
+ thread_cancel(dmn->t_wakeup);
+ SET_WAKEUP_ECHO(dmn);
+
+ return 0;
+}
+
+/*
+ * Wait till we notice that all daemons are ready before
+ * we send we are ready to systemd
+ */
+static void
+daemon_send_ready (void)
+{
+ static int sent = 0;
+ if (!sent && gs.numdown == 0)
+ {
+#if defined (HAVE_CUMULUS)
+ FILE *fp;
+
+ fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
+ fclose(fp);
+#endif
+ zlog_notice ("Watchfrr: Notifying Systemd we are up and running");
+ systemd_send_started(master, 0);
+ sent = 1;
+ }
+}
+
+static void
+daemon_up(struct daemon *dmn, const char *why)
+{
+ dmn->state = DAEMON_UP;
+ gs.numdown--;
+ dmn->connect_tries = 0;
+ zlog_notice("%s state -> up : %s",dmn->name,why);
+ daemon_send_ready();
+ if (gs.do_ping)
+ SET_WAKEUP_ECHO(dmn);
+ phase_check();
+}
+
+static int
+check_connect(struct thread *t_write)
+{
+ struct daemon *dmn = THREAD_ARG(t_write);
+ int sockerr;
+ socklen_t reslen = sizeof(sockerr);
+
+ dmn->t_write = NULL;
+ if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0)
+ {
+ zlog_warn("%s: check_connect: getsockopt failed: %s",
+ dmn->name,safe_strerror(errno));
+ daemon_down(dmn,"getsockopt failed checking connection success");
+ return 0;
+ }
+ if ((reslen == sizeof(sockerr)) && sockerr)
+ {
+ char why[100];
+ snprintf(why,sizeof(why),
+ "getsockopt reports that connection attempt failed: %s",
+ safe_strerror(sockerr));
+ daemon_down(dmn,why);
+ return 0;
+ }
+
+ daemon_up(dmn,"delayed connect succeeded");
+ return 0;
+}
+
+static int
+wakeup_connect_hanging(struct thread *t_wakeup)
+{
+ struct daemon *dmn = THREAD_ARG(t_wakeup);
+ char why[100];
+
+ dmn->t_wakeup = NULL;
+ snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds",
+ gs.timeout);
+ daemon_down(dmn,why);
+ return 0;
+}
+
+/* Making connection to protocol daemon. */
+static int
+try_connect(struct daemon *dmn)
+{
+ int sock;
+ struct sockaddr_un addr;
+ socklen_t len;
+
+ if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("%s: attempting to connect",dmn->name);
+ dmn->connect_tries++;
+
+ memset (&addr, 0, sizeof (struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+ snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty",
+ gs.vtydir,dmn->name);
+#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
+ len = addr.sun_len = SUN_LEN(&addr);
+#else
+ len = sizeof (addr.sun_family) + strlen (addr.sun_path);
+#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
+
+ /* Quick check to see if we might succeed before we go to the trouble
+ of creating a socket. */
+ if (access(addr.sun_path, W_OK) < 0)
+ {
+ if (errno != ENOENT)
+ zlog_err("%s: access to socket %s denied: %s",
+ dmn->name,addr.sun_path,safe_strerror(errno));
+ return -1;
+ }
+
+ if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
+ {
+ zlog_err("%s(%s): cannot make socket: %s",
+ __func__,addr.sun_path, safe_strerror(errno));
+ return -1;
+ }
+
+ if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0)
+ {
+ zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed",
+ __func__, addr.sun_path, sock);
+ close(sock);
+ return -1;
+ }
+
+ if (connect (sock, (struct sockaddr *) &addr, len) < 0)
+ {
+ if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK))
+ {
+ if (gs.loglevel > LOG_DEBUG)
+ zlog_debug("%s(%s): connect failed: %s",
+ __func__,addr.sun_path, safe_strerror(errno));
+ close (sock);
+ return -1;
+ }
+ if (gs.loglevel > LOG_DEBUG)
+ zlog_debug("%s: connection in progress",dmn->name);
+ dmn->state = DAEMON_CONNECTING;
+ dmn->fd = sock;
+ dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd);
+ dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn,
+ gs.timeout);
+ SET_READ_HANDLER(dmn);
+ return 0;
+ }
+
+ dmn->fd = sock;
+ SET_READ_HANDLER(dmn);
+ daemon_up(dmn,"connect succeeded");
+ return 1;
+}
+
+static int
+phase_hanging(struct thread *t_hanging)
+{
+ gs.t_phase_hanging = NULL;
+ zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
+ phase_str[gs.phase],PHASE_TIMEOUT);
+ gs.phase = PHASE_NONE;
+ return 0;
+}
+
+static void
+set_phase(restart_phase_t new_phase)
+{
+ gs.phase = new_phase;
+ if (gs.t_phase_hanging)
+ thread_cancel(gs.t_phase_hanging);
+ gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL,
+ PHASE_TIMEOUT);
+}
+
+static void
+phase_check(void)
+{
+ switch (gs.phase)
+ {
+ case PHASE_NONE:
+ break;
+ case PHASE_STOPS_PENDING:
+ if (gs.numpids)
+ break;
+ zlog_info("Phased restart: all routing daemon stop jobs have completed.");
+ set_phase(PHASE_WAITING_DOWN);
+ /*FALLTHRU*/
+ case PHASE_WAITING_DOWN:
+ if (gs.numdown+IS_UP(gs.special) < gs.numdaemons)
+ break;
+ zlog_info("Phased restart: all routing daemons now down.");
+ run_job(&gs.special->restart,"restart",gs.restart_command,1,1);
+ set_phase(PHASE_ZEBRA_RESTART_PENDING);
+ /*FALLTHRU*/
+ case PHASE_ZEBRA_RESTART_PENDING:
+ if (gs.special->restart.pid)
+ break;
+ zlog_info("Phased restart: %s restart job completed.",gs.special->name);
+ set_phase(PHASE_WAITING_ZEBRA_UP);
+ /*FALLTHRU*/
+ case PHASE_WAITING_ZEBRA_UP:
+ if (!IS_UP(gs.special))
+ break;
+ zlog_info("Phased restart: %s is now up.",gs.special->name);
+ {
+ struct daemon *dmn;
+ for (dmn = gs.daemons; dmn; dmn = dmn->next)
+ {
+ if (dmn != gs.special)
+ run_job(&dmn->restart,"start",gs.start_command,1,0);
+ }
+ }
+ gs.phase = PHASE_NONE;
+ THREAD_OFF(gs.t_phase_hanging);
+ zlog_notice("Phased global restart has completed.");
+ break;
+ }
+}
+
+static void
+try_restart(struct daemon *dmn)
+{
+ switch (gs.mode)
+ {
+ case MODE_MONITOR:
+ return;
+ case MODE_GLOBAL_RESTART:
+ run_job(&gs.restart,"restart",gs.restart_command,0,1);
+ break;
+ case MODE_SEPARATE_RESTART:
+ run_job(&dmn->restart,"restart",gs.restart_command,0,1);
+ break;
+ case MODE_PHASED_ZEBRA_RESTART:
+ if (dmn != gs.special)
+ {
+ if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE))
+ run_job(&dmn->restart,"restart",gs.restart_command,0,1);
+ else
+ zlog_debug("%s: postponing restart attempt because master %s daemon "
+ "not up [%s], or phased restart in progress",
+ dmn->name,gs.special->name,state_str[gs.special->state]);
+ break;
+ }
+ /*FALLTHRU*/
+ case MODE_PHASED_ALL_RESTART:
+ if ((gs.phase != PHASE_NONE) || gs.numpids)
+ {
+ if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("postponing phased global restart: restart already in "
+ "progress [%s], or outstanding child processes [%d]",
+ phase_str[gs.phase],gs.numpids);
+ break;
+ }
+ /* Is it too soon for a restart? */
+ {
+ struct timeval delay;
+ if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec <
+ gs.special->restart.interval)
+ {
+ if (gs.loglevel > LOG_DEBUG+1)
+ zlog_debug("postponing phased global restart: "
+ "elapsed time %ld < retry interval %ld",
+ (long)delay.tv_sec,gs.special->restart.interval);
+ break;
+ }
+ }
+ run_job(&gs.restart,"restart",gs.restart_command,0,1);
+ break;
+ default:
+ zlog_err("error: unknown restart mode %d",gs.mode);
+ break;
+ }
+}
+
+static int
+wakeup_unresponsive(struct thread *t_wakeup)
+{
+ struct daemon *dmn = THREAD_ARG(t_wakeup);
+
+ dmn->t_wakeup = NULL;
+ if (dmn->state != DAEMON_UNRESPONSIVE)
+ zlog_err("%s: no longer unresponsive (now %s), "
+ "wakeup should have been cancelled!",
+ dmn->name,state_str[dmn->state]);
+ else
+ {
+ SET_WAKEUP_UNRESPONSIVE(dmn);
+ try_restart(dmn);
+ }
+ return 0;
+}
+
+static int
+wakeup_no_answer(struct thread *t_wakeup)
+{
+ struct daemon *dmn = THREAD_ARG(t_wakeup);
+
+ dmn->t_wakeup = NULL;
+ dmn->state = DAEMON_UNRESPONSIVE;
+ zlog_err("%s state -> unresponsive : no response yet to ping "
+ "sent %ld seconds ago",dmn->name,gs.timeout);
+ if (gs.unresponsive_restart)
+ {
+ SET_WAKEUP_UNRESPONSIVE(dmn);
+ try_restart(dmn);
+ }
+ return 0;
+}
+
+static int
+wakeup_send_echo(struct thread *t_wakeup)
+{
+ static const char echocmd[] = "echo " PING_TOKEN;
+ ssize_t rc;
+ struct daemon *dmn = THREAD_ARG(t_wakeup);
+
+ dmn->t_wakeup = NULL;
+ if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) ||
+ ((size_t)rc != sizeof(echocmd)))
+ {
+ char why[100+sizeof(echocmd)];
+ snprintf(why,sizeof(why),"write '%s' returned %d instead of %u",
+ echocmd,(int)rc,(u_int)sizeof(echocmd));
+ daemon_down(dmn,why);
+ }
+ else
+ {
+ gettimeofday(&dmn->echo_sent,NULL);
+ dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout);
+ }
+ return 0;
+}
+
+static void
+sigint(void)
+{
+ zlog_notice("Terminating on signal");
+ systemd_send_stopping ();
+ exit(0);
+}
+
+static int
+valid_command(const char *cmd)
+{
+ char *p;
+
+ return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%');
+}
+
+/* This is an ugly hack to circumvent problems with passing command-line
+ arguments that contain spaces. The fix is to use a configuration file. */
+static char *
+translate_blanks(const char *cmd, const char *blankstr)
+{
+ char *res;
+ char *p;
+ size_t bslen = strlen(blankstr);
+
+ if (!(res = strdup(cmd)))
+ {
+ perror("strdup");
+ exit(1);
+ }
+ while ((p = strstr(res,blankstr)) != NULL)
+ {
+ *p = ' ';
+ if (bslen != 1)
+ memmove(p+1,p+bslen,strlen(p+bslen)+1);
+ }
+ return res;
+}
+
+struct zebra_privs_t watchfrr_privs =
+{
+#ifdef VTY_GROUP
+ .vty_group = VTY_GROUP,
+#endif
+};
+
+int
+main(int argc, char **argv)
+{
+ const char *progname;
+ int opt;
+ int daemon_mode = 0;
+ const char *pidfile = DEFAULT_PIDFILE;
+ const char *special = "zebra";
+ const char *blankstr = NULL;
+ static struct quagga_signal_t my_signals[] =
+ {
+ {
+ .signal = SIGINT,
+ .handler = sigint,
+ },
+ {
+ .signal = SIGTERM,
+ .handler = sigint,
+ },
+ {
+ .signal = SIGCHLD,
+ .handler = sigchild,
+ },
+ };
+
+ if ((progname = strrchr (argv[0], '/')) != NULL)
+ progname++;
+ else
+ progname = argv[0];
+
+ gs.restart.name = "all";
+ while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh",
+ longopts, 0)) != EOF)
+ {
+ switch (opt)
+ {
+ case 0:
+ break;
+ case 'a':
+ if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
+ {
+ fputs("Ambiguous operating mode selected.\n",stderr);
+ return usage(progname,1);
+ }
+ gs.mode = MODE_PHASED_ZEBRA_RESTART;
+ break;
+ case 'A':
+ if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
+ {
+ fputs("Ambiguous operating mode selected.\n",stderr);
+ return usage(progname,1);
+ }
+ gs.mode = MODE_PHASED_ALL_RESTART;
+ break;
+ case 'b':
+ blankstr = optarg;
+ break;
+ case 'd':
+ daemon_mode = 1;
+ break;
+ case 'e':
+ gs.do_ping = 0;
+ break;
+ case 'k':
+ if (!valid_command(optarg))
+ {
+ fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n",
+ optarg);
+ return usage(progname,1);
+ }
+ gs.stop_command = optarg;
+ break;
+ case 'l':
+ {
+ char garbage[3];
+ if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) ||
+ (gs.loglevel < LOG_EMERG))
+ {
+ fprintf(stderr,"Invalid loglevel argument: %s\n",optarg);
+ return usage(progname,1);
+ }
+ }
+ break;
+ case 'm':
+ {
+ char garbage[3];
+ if ((sscanf(optarg,"%ld%1s",
+ &gs.min_restart_interval,garbage) != 1) ||
+ (gs.min_restart_interval < 0))
+ {
+ fprintf(stderr,"Invalid min_restart_interval argument: %s\n",
+ optarg);
+ return usage(progname,1);
+ }
+ }
+ break;
+ case 'M':
+ {
+ char garbage[3];
+ if ((sscanf(optarg,"%ld%1s",
+ &gs.max_restart_interval,garbage) != 1) ||
+ (gs.max_restart_interval < 0))
+ {
+ fprintf(stderr,"Invalid max_restart_interval argument: %s\n",
+ optarg);
+ return usage(progname,1);
+ }
+ }
+ break;
+ case 'i':
+ {
+ char garbage[3];
+ int period;
+ if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) ||
+ (gs.period < 1))
+ {
+ fprintf(stderr,"Invalid interval argument: %s\n",optarg);
+ return usage(progname,1);
+ }
+ gs.period = 1000*period;
+ }
+ break;
+ case 'p':
+ pidfile = optarg;
+ break;
+ case 'r':
+ if ((gs.mode == MODE_GLOBAL_RESTART) ||
+ (gs.mode == MODE_SEPARATE_RESTART))
+ {
+ fputs("Ambiguous operating mode selected.\n",stderr);
+ return usage(progname,1);
+ }
+ if (!valid_command(optarg))
+ {
+ fprintf(stderr,
+ "Invalid restart command, must contain '%%s': %s\n",
+ optarg);
+ return usage(progname,1);
+ }
+ gs.restart_command = optarg;
+ if (gs.mode == MODE_MONITOR)
+ gs.mode = MODE_SEPARATE_RESTART;
+ break;
+ case 'R':
+ if (gs.mode != MODE_MONITOR)
+ {
+ fputs("Ambiguous operating mode selected.\n",stderr);
+ return usage(progname,1);
+ }
+ if (strchr(optarg,'%'))
+ {
+ fprintf(stderr,
+ "Invalid restart-all arg, must not contain '%%s': %s\n",
+ optarg);
+ return usage(progname,1);
+ }
+ gs.restart_command = optarg;
+ gs.mode = MODE_GLOBAL_RESTART;
+ break;
+ case 's':
+ if (!valid_command(optarg))
+ {
+ fprintf(stderr,"Invalid start command, must contain '%%s': %s\n",
+ optarg);
+ return usage(progname,1);
+ }
+ gs.start_command = optarg;
+ break;
+ case 'S':
+ gs.vtydir = optarg;
+ break;
+ case 't':
+ {
+ char garbage[3];
+ if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) ||
+ (gs.timeout < 1))
+ {
+ fprintf(stderr,"Invalid timeout argument: %s\n",optarg);
+ return usage(progname,1);
+ }
+ }
+ break;
+ case 'T':
+ {
+ char garbage[3];
+ if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) ||
+ (gs.restart_timeout < 1))
+ {
+ fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg);
+ return usage(progname,1);
+ }
+ }
+ break;
+ case 'z':
+ gs.unresponsive_restart = 1;
+ break;
+ case 'v':
+ printf ("%s version %s\n", progname, FRR_VERSION);
+ puts("Copyright 2004 Andrew J. Schorr");
+ return 0;
+ case 'h':
+ return usage(progname,0);
+ default:
+ fputs("Invalid option.\n",stderr);
+ return usage(progname,1);
+ }
+ }
+
+ if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR))
+ {
+ fputs("Option -z requires a -r or -R restart option.\n",stderr);
+ return usage(progname,1);
+ }
+ switch (gs.mode)
+ {
+ case MODE_MONITOR:
+ if (gs.restart_command || gs.start_command || gs.stop_command)
+ {
+ fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n",
+ mode_str[gs.mode]);
+ return usage(progname,1);
+ }
+ break;
+ case MODE_GLOBAL_RESTART:
+ case MODE_SEPARATE_RESTART:
+ if (!gs.restart_command || gs.start_command || gs.stop_command)
+ {
+ fprintf(stderr,"No start/kill commands needed in [%s] mode.\n",
+ mode_str[gs.mode]);
+ return usage(progname,1);
+ }
+ break;
+ case MODE_PHASED_ZEBRA_RESTART:
+ case MODE_PHASED_ALL_RESTART:
+ if (!gs.restart_command || !gs.start_command || !gs.stop_command)
+ {
+ fprintf(stderr,
+ "Need start, kill, and restart commands in [%s] mode.\n",
+ mode_str[gs.mode]);
+ return usage(progname,1);
+ }
+ break;
+ }
+
+ if (blankstr)
+ {
+ if (gs.restart_command)
+ gs.restart_command = translate_blanks(gs.restart_command,blankstr);
+ if (gs.start_command)
+ gs.start_command = translate_blanks(gs.start_command,blankstr);
+ if (gs.stop_command)
+ gs.stop_command = translate_blanks(gs.stop_command,blankstr);
+ }
+
+ gs.restart.interval = gs.min_restart_interval;
+
+ zprivs_init (&watchfrr_privs);
+
+ master = thread_master_create();
+ cmd_init(-1);
+ memory_init();
+ vty_init(master);
+ watchfrr_vty_init();
+ vty_serv_sock(NULL, 0, WATCHFRR_VTYSH_PATH);
+
+ signal_init (master, array_size(my_signals), my_signals);
+ srandom(time(NULL));
+
+ {
+ int i;
+ struct daemon *tail = NULL;
+
+ for (i = optind; i < argc; i++)
+ {
+ struct daemon *dmn;
+
+ if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn))))
+ {
+ fprintf(stderr,"calloc(1,%u) failed: %s\n",
+ (u_int)sizeof(*dmn), safe_strerror(errno));
+ return 1;
+ }
+ dmn->name = dmn->restart.name = argv[i];
+ dmn->state = DAEMON_INIT;
+ gs.numdaemons++;
+ gs.numdown++;
+ dmn->fd = -1;
+ dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn,
+ 100+(random() % 900));
+ dmn->restart.interval = gs.min_restart_interval;
+ if (tail)
+ tail->next = dmn;
+ else
+ gs.daemons = dmn;
+ tail = dmn;
+
+ if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
+ (gs.mode == MODE_PHASED_ALL_RESTART)) &&
+ !strcmp(dmn->name,special))
+ gs.special = dmn;
+ }
+ }
+ if (!gs.daemons)
+ {
+ fputs("Must specify one or more daemons to monitor.\n",stderr);
+ return usage(progname,1);
+ }
+ if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
+ (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special)
+ {
+ fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n",
+ mode_str[gs.mode],special);
+ return usage(progname,1);
+ }
+
+ zlog_default = openzlog(progname, ZLOG_WATCHFRR, 0,
+ LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON);
+ zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED);
+ if (daemon_mode)
+ {
+ zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG));
+ if (daemon (0, 0) < 0)
+ {
+ fprintf(stderr, "Watchfrr daemon failed: %s", strerror(errno));
+ exit (1);
+ }
+ }
+ else
+ zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG));
+
+ /* Make sure we're not already running. */
+ pid_output (pidfile);
+
+ /* Announce which daemons are being monitored. */
+ {
+ struct daemon *dmn;
+ size_t len = 0;
+
+ for (dmn = gs.daemons; dmn; dmn = dmn->next)
+ len += strlen(dmn->name)+1;
+
+ {
+ char buf[len+1];
+ char *p = buf;
+
+ for (dmn = gs.daemons; dmn; dmn = dmn->next)
+ {
+ if (p != buf)
+ *p++ = ' ';
+ strcpy(p,dmn->name);
+ p += strlen(p);
+ }
+ zlog_notice("%s %s watching [%s], mode [%s]",
+ progname, FRR_VERSION, buf, mode_str[gs.mode]);
+ }
+ }
+
+ {
+ struct thread thread;
+
+ while (thread_fetch (master, &thread))
+ thread_call (&thread);
+ }
+
+ systemd_send_stopping ();
+ /* Not reached. */
+ return 0;
+}
--- /dev/null
+/*
+ Common definitions for watchfrr API socket.
+
+ Copyright (C) 2016 David Lamparter for NetDEF, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef FRR_WATCHFRR_H
+#define FRR_WATCHFRR_H
+
+extern void watchfrr_vty_init(void);
+
+extern pid_t integrated_write_pid;
+extern void integrated_write_sigchld(int status);
+
+#endif /* FRR_WATCHFRR_H */
--- /dev/null
+/*
+ watchfrr CLI functions.
+
+ Copyright (C) 2016 David Lamparter for NetDEF, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <zebra.h>
+#include <sys/wait.h>
+
+#include "memory.h"
+#include "log.h"
+#include "vty.h"
+#include "command.h"
+
+#include "watchfrr.h"
+
+pid_t integrated_write_pid;
+static int integrated_result_fd;
+
+DEFUN (config_write_integrated,
+ config_write_integrated_cmd,
+ "write integrated",
+ "Write running configuration to memory, network, or terminal\n"
+ "Write integrated all-daemon Frr.conf file\n")
+{
+ pid_t child;
+ sigset_t oldmask, sigmask;
+
+ if (integrated_write_pid != -1) {
+ vty_out(vty, "%% configuration write already in progress.%s",
+ VTY_NEWLINE);
+ return CMD_WARNING;
+ }
+
+ fflush(stdout);
+ fflush(stderr);
+
+ /* need to temporarily block SIGCHLD because it could arrive between
+ * fork() call and setting the integrated_write_pid variable. This
+ * would mean the completion call gets lost and this hangs forever.
+ */
+ sigemptyset(&oldmask);
+ sigemptyset(&sigmask);
+ sigaddset(&sigmask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &sigmask, &oldmask);
+
+ child = fork();
+ if (child == -1) {
+ vty_out(vty, "%% configuration write fork() failed: %s.%s",
+ safe_strerror(errno), VTY_NEWLINE);
+ sigprocmask(SIG_SETMASK, &oldmask, NULL);
+ return CMD_WARNING;
+ }
+ if (child != 0) {
+ /* note: the VTY won't write a command return value to vtysh; the
+ * session temporarily enters an intentional "hang" state. This is
+ * to make sure latency in vtysh doing the config write (several
+ * seconds is not rare to see) does not interfere with watchfrr's
+ * supervisor job.
+ *
+ * The fd is duplicated here so we don't need to hold a vty pointer
+ * (which could become invalid in the meantime).
+ */
+ integrated_write_pid = child;
+ integrated_result_fd = dup(vty->wfd);
+ sigprocmask(SIG_SETMASK, &oldmask, NULL);
+ return CMD_SUSPEND;
+ }
+
+ /* redirect stdout/stderr to vty session. Note vty->wfd is marked
+ * CLOEXEC, but dup2 will clear that flag. */
+ dup2(vty->wfd, 1);
+ dup2(vty->wfd, 2);
+
+ /* don't allow the user to pass parameters, we're root here!
+ * should probably harden vtysh at some point too... */
+ execl(VTYSH_BIN_PATH, "vtysh", "-w", NULL);
+
+ /* unbuffered write; we just messed with stdout... */
+ char msg[512];
+ snprintf(msg, sizeof(msg), "error executing %s: %s\n",
+ VTYSH_BIN_PATH, safe_strerror(errno));
+ write(1, msg, strlen(msg));
+ exit(1);
+}
+
+void integrated_write_sigchld(int status)
+{
+ uint8_t reply[4] = { 0, 0, 0, CMD_WARNING };
+
+ if (WIFEXITED(status)) {
+ zlog_info("configuration write completed with exit code %d",
+ WEXITSTATUS(status));
+ reply[3] = WEXITSTATUS(status);
+ } else if (WIFSIGNALED(status)) {
+ zlog_warn("configuration write terminated by signal %d",
+ WTERMSIG(status));
+ } else {
+ zlog_warn("configuration write terminated");
+ }
+
+ if (reply[3] != CMD_SUCCESS) {
+ /* failure might be silent in vtysh without this */
+ static const char msg[] = "% Configuration write failed.\n";
+ write(integrated_result_fd, msg, strlen(msg));
+ }
+
+ /* don't care about failures here, if the connection is broken the
+ * return value will just be lost. */
+ write(integrated_result_fd, reply, sizeof(reply));
+ close(integrated_result_fd);
+
+ integrated_write_pid = -1;
+}
+
+void watchfrr_vty_init(void)
+{
+ integrated_write_pid = -1;
+ install_element(ENABLE_NODE, &config_write_integrated_cmd);
+}
+++ /dev/null
-Makefile
-Makefile.in
-*.o
-watchquagga
-tags
-TAGS
-.deps
-.nfs*
-*.lo
-*.la
-*.libs
-.arch-inventory
-.arch-ids
-*~
-*.loT
-
+++ /dev/null
-## Process this file with Automake to create Makefile.in
-
-AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib -I$(top_builddir)/lib
-DEFS = @DEFS@ -DSTATEDIR=\"$(localstatedir)/\"
-
-AM_CFLAGS = $(WERROR)
-
-sbin_PROGRAMS = watchquagga
-
-noinst_HEADERS = watchquagga.h
-
-watchquagga_SOURCES = watchquagga.c watchquagga_vty.c
-watchquagga_LDADD = ../lib/libzebra.la @LIBCAP@
+++ /dev/null
-/*
- Monitor status of quagga daemons and restart if necessary.
-
- Copyright (C) 2004 Andrew J. Schorr
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <zebra.h>
-#include <thread.h>
-#include <log.h>
-#include <network.h>
-#include <sigevent.h>
-#include <lib/version.h>
-#include "command.h"
-#include "memory_vty.h"
-
-#include <getopt.h>
-#include <sys/un.h>
-#include <sys/wait.h>
-#include <memory.h>
-#include <systemd.h>
-
-#include "watchquagga.h"
-
-#ifndef MIN
-#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
-#endif
-
-/* Macros to help randomize timers. */
-#define JITTER(X) ((random() % ((X)+1))-((X)/2))
-#define FUZZY(X) ((X)+JITTER((X)/20))
-
-#define DEFAULT_PERIOD 5
-#define DEFAULT_TIMEOUT 10
-#define DEFAULT_RESTART_TIMEOUT 20
-#define DEFAULT_LOGLEVEL LOG_INFO
-#define DEFAULT_MIN_RESTART 60
-#define DEFAULT_MAX_RESTART 600
-#ifdef PATH_WATCHQUAGGA_PID
-#define DEFAULT_PIDFILE PATH_WATCHQUAGGA_PID
-#else
-#define DEFAULT_PIDFILE STATEDIR "/watchquagga.pid"
-#endif
-#ifdef DAEMON_VTY_DIR
-#define VTYDIR DAEMON_VTY_DIR
-#else
-#define VTYDIR STATEDIR
-#endif
-
-#define PING_TOKEN "PING"
-
-/* Needs to be global, referenced somewhere inside libzebra. */
-struct thread_master *master;
-
-typedef enum
-{
- MODE_MONITOR = 0,
- MODE_GLOBAL_RESTART,
- MODE_SEPARATE_RESTART,
- MODE_PHASED_ZEBRA_RESTART,
- MODE_PHASED_ALL_RESTART
-} watch_mode_t;
-
-static const char *mode_str[] =
-{
- "monitor",
- "global restart",
- "individual daemon restart",
- "phased zebra restart",
- "phased global restart for any failure",
-};
-
-typedef enum
-{
- PHASE_NONE = 0,
- PHASE_STOPS_PENDING,
- PHASE_WAITING_DOWN,
- PHASE_ZEBRA_RESTART_PENDING,
- PHASE_WAITING_ZEBRA_UP
-} restart_phase_t;
-
-static const char *phase_str[] =
-{
- "None",
- "Stop jobs running",
- "Waiting for other daemons to come down",
- "Zebra restart job running",
- "Waiting for zebra to come up",
- "Start jobs running",
-};
-
-#define PHASE_TIMEOUT (3*gs.restart_timeout)
-
-struct restart_info
-{
- const char *name;
- const char *what;
- pid_t pid;
- struct timeval time;
- long interval;
- struct thread *t_kill;
- int kills;
-};
-
-static struct global_state
-{
- watch_mode_t mode;
- restart_phase_t phase;
- struct thread *t_phase_hanging;
- const char *vtydir;
- long period;
- long timeout;
- long restart_timeout;
- long min_restart_interval;
- long max_restart_interval;
- int do_ping;
- struct daemon *daemons;
- const char *restart_command;
- const char *start_command;
- const char *stop_command;
- struct restart_info restart;
- int unresponsive_restart;
- int loglevel;
- struct daemon *special; /* points to zebra when doing phased restart */
- int numdaemons;
- int numpids;
- int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
-} gs = {
- .mode = MODE_MONITOR,
- .phase = PHASE_NONE,
- .vtydir = VTYDIR,
- .period = 1000*DEFAULT_PERIOD,
- .timeout = DEFAULT_TIMEOUT,
- .restart_timeout = DEFAULT_RESTART_TIMEOUT,
- .loglevel = DEFAULT_LOGLEVEL,
- .min_restart_interval = DEFAULT_MIN_RESTART,
- .max_restart_interval = DEFAULT_MAX_RESTART,
- .do_ping = 1,
-};
-
-typedef enum
-{
- DAEMON_INIT,
- DAEMON_DOWN,
- DAEMON_CONNECTING,
- DAEMON_UP,
- DAEMON_UNRESPONSIVE
-} daemon_state_t;
-
-#define IS_UP(DMN) \
- (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
-
-static const char *state_str[] =
-{
- "Init",
- "Down",
- "Connecting",
- "Up",
- "Unresponsive",
-};
-
-struct daemon {
- const char *name;
- daemon_state_t state;
- int fd;
- struct timeval echo_sent;
- u_int connect_tries;
- struct thread *t_wakeup;
- struct thread *t_read;
- struct thread *t_write;
- struct daemon *next;
- struct restart_info restart;
-};
-
-static const struct option longopts[] =
-{
- { "daemon", no_argument, NULL, 'd'},
- { "statedir", required_argument, NULL, 'S'},
- { "no-echo", no_argument, NULL, 'e'},
- { "loglevel", required_argument, NULL, 'l'},
- { "interval", required_argument, NULL, 'i'},
- { "timeout", required_argument, NULL, 't'},
- { "restart-timeout", required_argument, NULL, 'T'},
- { "restart", required_argument, NULL, 'r'},
- { "start-command", required_argument, NULL, 's'},
- { "kill-command", required_argument, NULL, 'k'},
- { "restart-all", required_argument, NULL, 'R'},
- { "all-restart", no_argument, NULL, 'a'},
- { "always-all-restart", no_argument, NULL, 'A'},
- { "unresponsive-restart", no_argument, NULL, 'z'},
- { "min-restart-interval", required_argument, NULL, 'm'},
- { "max-restart-interval", required_argument, NULL, 'M'},
- { "pid-file", required_argument, NULL, 'p'},
- { "blank-string", required_argument, NULL, 'b'},
- { "help", no_argument, NULL, 'h'},
- { "version", no_argument, NULL, 'v'},
- { NULL, 0, NULL, 0 }
-};
-
-static int try_connect(struct daemon *dmn);
-static int wakeup_send_echo(struct thread *t_wakeup);
-static void try_restart(struct daemon *dmn);
-static void phase_check(void);
-
-static int
-usage(const char *progname, int status)
-{
- if (status != 0)
- fprintf(stderr, "Try `%s --help' for more information.\n", progname);
- else
- {
- printf("Usage : %s [OPTION...] <daemon name> ...\n\n\
-Watchdog program to monitor status of quagga daemons and try to restart\n\
-them if they are down or unresponsive. It determines whether a daemon is\n\
-up based on whether it can connect to the daemon's vty unix stream socket.\n\
-It then repeatedly sends echo commands over that socket to determine whether\n\
-the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
-on the socket connection and know immediately that the daemon is down.\n\n\
-The daemons to be monitored should be listed on the command line.\n\n\
-This program can run in one of 5 modes:\n\n\
-0. Mode: %s.\n\
- Just monitor and report on status changes. Example:\n\
- %s -d zebra ospfd bgpd\n\n\
-1. Mode: %s.\n\
- Whenever any daemon hangs or crashes, use the given command to restart\n\
- them all. Example:\n\
- %s -dz \\\n\
- -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\
- zebra ospfd\n\n\
-2. Mode: %s.\n\
- When any single daemon hangs or crashes, restart only the daemon that's\n\
- in trouble using the supplied restart command. Example:\n\
- %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\
-3. Mode: %s.\n\
- The same as the previous mode, except that there is special treatment when\n\
- the zebra daemon is in trouble. In that case, a phased restart approach\n\
- is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\
- daemons. Example:\n\
- %s -adz -r '/sbin/service %%s restart' \\\n\
- -s '/sbin/service %%s start' \\\n\
- -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
-4. Mode: %s.\n\
- This is the same as the previous mode, except that the phased restart\n\
- procedure is used whenever any of the daemons hangs or crashes. Example:\n\
- %s -Adz -r '/sbin/service %%s restart' \\\n\
- -s '/sbin/service %%s start' \\\n\
- -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
-As of this writing, it is believed that mode 2 [%s]\n\
-is not safe, and mode 3 [%s] may not be safe with some of the\n\
-routing daemons.\n\n\
-In order to avoid attempting to restart the daemons in a fast loop,\n\
-the -m and -M options allow you to control the minimum delay between\n\
-restart commands. The minimum restart delay is recalculated each time\n\
-a restart is attempted: if the time since the last restart attempt exceeds\n\
-twice the -M value, then the restart delay is set to the -m value.\n\
-Otherwise, the interval is doubled (but capped at the -M value).\n\n",
- progname,mode_str[0],progname,mode_str[1],progname,mode_str[2],
- progname,mode_str[3],progname,mode_str[4],progname,mode_str[2],
- mode_str[3]);
-
- printf("Options:\n\
--d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
- to syslog instead of stdout.\n\
--S, --statedir Set the vty socket directory (default is %s)\n\
--e, --no-echo Do not ping the daemons to test responsiveness (this\n\
- option is necessary if the daemons do not support the\n\
- echo command)\n\
--l, --loglevel Set the logging level (default is %d).\n\
- The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
- but it can be set higher than %d if extra-verbose debugging\n\
- messages are desired.\n\
--m, --min-restart-interval\n\
- Set the minimum seconds to wait between invocations of daemon\n\
- restart commands (default is %d).\n\
--M, --max-restart-interval\n\
- Set the maximum seconds to wait between invocations of daemon\n\
- restart commands (default is %d).\n\
--i, --interval Set the status polling interval in seconds (default is %d)\n\
--t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
--T, --restart-timeout\n\
- Set the restart (kill) timeout in seconds (default is %d).\n\
- If any background jobs are still running after this much\n\
- time has elapsed, they will be killed.\n\
--r, --restart Supply a Bourne shell command to use to restart a single\n\
- daemon. The command string should include '%%s' where the\n\
- name of the daemon should be substituted.\n\
- Note that -r and -R are incompatible.\n\
--s, --start-command\n\
- Supply a Bourne shell to command to use to start a single\n\
- daemon. The command string should include '%%s' where the\n\
- name of the daemon should be substituted.\n\
--k, --kill-command\n\
- Supply a Bourne shell to command to use to stop a single\n\
- daemon. The command string should include '%%s' where the\n\
- name of the daemon should be substituted.\n\
--R, --restart-all\n\
- When one or more daemons is down, try to restart everything\n\
- using the Bourne shell command supplied as the argument.\n\
- Note that -r and -R are incompatible.\n\
--z, --unresponsive-restart\n\
- When a daemon is unresponsive, treat it as being down for\n\
- restart purposes.\n\
--a, --all-restart\n\
- When zebra hangs or crashes, restart all daemons using\n\
- this phased approach: 1. stop all other daemons; 2. restart\n\
- zebra; 3. start other daemons. Requires -r, -s, and -k.\n\
--A, --always-all-restart\n\
- When any daemon (not just zebra) hangs or crashes, use the\n\
- same phased restart mechanism described above for -a.\n\
- Requires -r, -s, and -k.\n\
--p, --pid-file Set process identifier file name\n\
- (default is %s).\n\
--b, --blank-string\n\
- When the supplied argument string is found in any of the\n\
- various shell command arguments (-r, -s, -k, or -R), replace\n\
- it with a space. This is an ugly hack to circumvent problems\n\
- passing command-line arguments with embedded spaces.\n\
--v, --version Print program version\n\
--h, --help Display this help and exit\n",
- VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG,
- DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART,
- DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT,
- DEFAULT_PIDFILE);
- }
-
- return status;
-}
-
-static pid_t
-run_background(char *shell_cmd)
-{
- pid_t child;
-
- switch (child = fork())
- {
- case -1:
- zlog_err("fork failed, cannot run command [%s]: %s",
- shell_cmd,safe_strerror(errno));
- return -1;
- case 0:
- /* Child process. */
- /* Use separate process group so child processes can be killed easily. */
- if (setpgid(0,0) < 0)
- zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno));
- {
- char shell[] = "sh";
- char dashc[] = "-c";
- char * const argv[4] = { shell, dashc, shell_cmd, NULL};
- execv("/bin/sh", argv);
- zlog_err("execv(/bin/sh -c '%s') failed: %s",
- shell_cmd,safe_strerror(errno));
- _exit(127);
- }
- default:
- /* Parent process: we will reap the child later. */
- zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd);
- return child;
- }
-}
-
-static struct timeval *
-time_elapsed(struct timeval *result, const struct timeval *start_time)
-{
- gettimeofday(result,NULL);
- result->tv_sec -= start_time->tv_sec;
- result->tv_usec -= start_time->tv_usec;
- while (result->tv_usec < 0)
- {
- result->tv_usec += 1000000L;
- result->tv_sec--;
- }
- return result;
-}
-
-static int
-restart_kill(struct thread *t_kill)
-{
- struct restart_info *restart = THREAD_ARG(t_kill);
- struct timeval delay;
-
- time_elapsed(&delay,&restart->time);
- zlog_warn("Warning: %s %s child process %d still running after "
- "%ld seconds, sending signal %d",
- restart->what,restart->name,(int)restart->pid, (long)delay.tv_sec,
- (restart->kills ? SIGKILL : SIGTERM));
- kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM));
- restart->kills++;
- restart->t_kill = thread_add_timer(master,restart_kill,restart,
- gs.restart_timeout);
- return 0;
-}
-
-static struct restart_info *
-find_child(pid_t child)
-{
- if (gs.mode == MODE_GLOBAL_RESTART)
- {
- if (gs.restart.pid == child)
- return &gs.restart;
- }
- else
- {
- struct daemon *dmn;
- for (dmn = gs.daemons; dmn; dmn = dmn->next)
- {
- if (dmn->restart.pid == child)
- return &dmn->restart;
- }
- }
- return NULL;
-}
-
-static void
-sigchild(void)
-{
- pid_t child;
- int status;
- const char *name;
- const char *what;
- struct restart_info *restart;
-
- switch (child = waitpid(-1,&status,WNOHANG))
- {
- case -1:
- zlog_err("waitpid failed: %s",safe_strerror(errno));
- return;
- case 0:
- zlog_warn("SIGCHLD received, but waitpid did not reap a child");
- return;
- }
-
- if (child == integrated_write_pid)
- {
- integrated_write_sigchld(status);
- return;
- }
-
- if ((restart = find_child(child)) != NULL)
- {
- name = restart->name;
- what = restart->what;
- restart->pid = 0;
- gs.numpids--;
- thread_cancel(restart->t_kill);
- restart->t_kill = NULL;
- /* Update restart time to reflect the time the command completed. */
- gettimeofday(&restart->time,NULL);
- }
- else
- {
- zlog_err("waitpid returned status for an unknown child process %d",
- (int)child);
- name = "(unknown)";
- what = "background";
- }
- if (WIFSTOPPED(status))
- zlog_warn("warning: %s %s process %d is stopped",
- what,name,(int)child);
- else if (WIFSIGNALED(status))
- zlog_warn("%s %s process %d terminated due to signal %d",
- what,name,(int)child,WTERMSIG(status));
- else if (WIFEXITED(status))
- {
- if (WEXITSTATUS(status) != 0)
- zlog_warn("%s %s process %d exited with non-zero status %d",
- what,name,(int)child,WEXITSTATUS(status));
- else
- zlog_debug("%s %s process %d exited normally",what,name,(int)child);
- }
- else
- zlog_err("cannot interpret %s %s process %d wait status 0x%x",
- what,name,(int)child,status);
- phase_check();
-}
-
-static int
-run_job(struct restart_info *restart, const char *cmdtype, const char *command,
- int force, int update_interval)
-{
- struct timeval delay;
-
- if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("attempting to %s %s",cmdtype,restart->name);
-
- if (restart->pid)
- {
- if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("cannot %s %s, previous pid %d still running",
- cmdtype,restart->name,(int)restart->pid);
- return -1;
- }
-
- /* Note: time_elapsed test must come before the force test, since we need
- to make sure that delay is initialized for use below in updating the
- restart interval. */
- if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) &&
- !force)
- {
- if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("postponing %s %s: "
- "elapsed time %ld < retry interval %ld",
- cmdtype,restart->name,(long)delay.tv_sec,restart->interval);
- return -1;
- }
-
- gettimeofday(&restart->time,NULL);
- restart->kills = 0;
- {
- char cmd[strlen(command)+strlen(restart->name)+1];
- snprintf(cmd,sizeof(cmd),command,restart->name);
- if ((restart->pid = run_background(cmd)) > 0)
- {
- restart->t_kill = thread_add_timer(master,restart_kill,restart,
- gs.restart_timeout);
- restart->what = cmdtype;
- gs.numpids++;
- }
- else
- restart->pid = 0;
- }
-
- /* Calculate the new restart interval. */
- if (update_interval)
- {
- if (delay.tv_sec > 2*gs.max_restart_interval)
- restart->interval = gs.min_restart_interval;
- else if ((restart->interval *= 2) > gs.max_restart_interval)
- restart->interval = gs.max_restart_interval;
- if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("restart %s interval is now %ld",
- restart->name,restart->interval);
- }
- return restart->pid;
-}
-
-#define SET_READ_HANDLER(DMN) \
- (DMN)->t_read = thread_add_read(master,handle_read,(DMN),(DMN)->fd)
-
-#define SET_WAKEUP_DOWN(DMN) \
- (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN), \
- FUZZY(gs.period))
-
-#define SET_WAKEUP_UNRESPONSIVE(DMN) \
- (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \
- FUZZY(gs.period))
-
-#define SET_WAKEUP_ECHO(DMN) \
- (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \
- FUZZY(gs.period))
-
-static int
-wakeup_down(struct thread *t_wakeup)
-{
- struct daemon *dmn = THREAD_ARG(t_wakeup);
-
- dmn->t_wakeup = NULL;
- if (try_connect(dmn) < 0)
- SET_WAKEUP_DOWN(dmn);
- if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
- try_restart(dmn);
- return 0;
-}
-
-static int
-wakeup_init(struct thread *t_wakeup)
-{
- struct daemon *dmn = THREAD_ARG(t_wakeup);
-
- dmn->t_wakeup = NULL;
- if (try_connect(dmn) < 0)
- {
- SET_WAKEUP_DOWN(dmn);
- zlog_err("%s state -> down : initial connection attempt failed",
- dmn->name);
- dmn->state = DAEMON_DOWN;
- }
- return 0;
-}
-
-static void
-daemon_down(struct daemon *dmn, const char *why)
-{
- if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
- zlog_err("%s state -> down : %s",dmn->name,why);
- else if (gs.loglevel > LOG_DEBUG)
- zlog_debug("%s still down : %s",dmn->name,why);
- if (IS_UP(dmn))
- gs.numdown++;
- dmn->state = DAEMON_DOWN;
- if (dmn->fd >= 0)
- {
- close(dmn->fd);
- dmn->fd = -1;
- }
- THREAD_OFF(dmn->t_read);
- THREAD_OFF(dmn->t_write);
- THREAD_OFF(dmn->t_wakeup);
- if (try_connect(dmn) < 0)
- SET_WAKEUP_DOWN(dmn);
- phase_check();
-}
-
-static int
-handle_read(struct thread *t_read)
-{
- struct daemon *dmn = THREAD_ARG(t_read);
- static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n";
- char buf[sizeof(resp)+100];
- ssize_t rc;
- struct timeval delay;
-
- dmn->t_read = NULL;
- if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0)
- {
- char why[100];
-
- if (ERRNO_IO_RETRY(errno))
- {
- /* Pretend it never happened. */
- SET_READ_HANDLER(dmn);
- return 0;
- }
- snprintf(why,sizeof(why),"unexpected read error: %s",
- safe_strerror(errno));
- daemon_down(dmn,why);
- return 0;
- }
- if (rc == 0)
- {
- daemon_down(dmn,"read returned EOF");
- return 0;
- }
- if (!dmn->echo_sent.tv_sec)
- {
- char why[sizeof(buf)+100];
- snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s",
- (int)rc,(int)rc,buf);
- daemon_down(dmn,why);
- return 0;
- }
-
- /* We are expecting an echo response: is there any chance that the
- response would not be returned entirely in the first read? That
- seems inconceivable... */
- if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp)))
- {
- char why[100+sizeof(buf)];
- snprintf(why,sizeof(why),"read returned bad echo response of %d bytes "
- "(expecting %u): %.*s",
- (int)rc,(u_int)sizeof(resp),(int)rc,buf);
- daemon_down(dmn,why);
- return 0;
- }
-
- time_elapsed(&delay,&dmn->echo_sent);
- dmn->echo_sent.tv_sec = 0;
- if (dmn->state == DAEMON_UNRESPONSIVE)
- {
- if (delay.tv_sec < gs.timeout)
- {
- dmn->state = DAEMON_UP;
- zlog_warn("%s state -> up : echo response received after %ld.%06ld "
- "seconds", dmn->name,
- (long)delay.tv_sec, (long)delay.tv_usec);
- }
- else
- zlog_warn("%s: slow echo response finally received after %ld.%06ld "
- "seconds", dmn->name,
- (long)delay.tv_sec, (long)delay.tv_usec);
- }
- else if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("%s: echo response received after %ld.%06ld seconds",
- dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
-
- SET_READ_HANDLER(dmn);
- if (dmn->t_wakeup)
- thread_cancel(dmn->t_wakeup);
- SET_WAKEUP_ECHO(dmn);
-
- return 0;
-}
-
-/*
- * Wait till we notice that all daemons are ready before
- * we send we are ready to systemd
- */
-static void
-daemon_send_ready (void)
-{
- static int sent = 0;
- if (!sent && gs.numdown == 0)
- {
-#if defined (HAVE_CUMULUS)
- FILE *fp;
-
- fp = fopen(DAEMON_VTY_DIR "/watchquagga.started", "w");
- fclose(fp);
-#endif
- zlog_notice ("Watchquagga: Notifying Systemd we are up and running");
- systemd_send_started(master, 0);
- sent = 1;
- }
-}
-
-static void
-daemon_up(struct daemon *dmn, const char *why)
-{
- dmn->state = DAEMON_UP;
- gs.numdown--;
- dmn->connect_tries = 0;
- zlog_notice("%s state -> up : %s",dmn->name,why);
- daemon_send_ready();
- if (gs.do_ping)
- SET_WAKEUP_ECHO(dmn);
- phase_check();
-}
-
-static int
-check_connect(struct thread *t_write)
-{
- struct daemon *dmn = THREAD_ARG(t_write);
- int sockerr;
- socklen_t reslen = sizeof(sockerr);
-
- dmn->t_write = NULL;
- if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0)
- {
- zlog_warn("%s: check_connect: getsockopt failed: %s",
- dmn->name,safe_strerror(errno));
- daemon_down(dmn,"getsockopt failed checking connection success");
- return 0;
- }
- if ((reslen == sizeof(sockerr)) && sockerr)
- {
- char why[100];
- snprintf(why,sizeof(why),
- "getsockopt reports that connection attempt failed: %s",
- safe_strerror(sockerr));
- daemon_down(dmn,why);
- return 0;
- }
-
- daemon_up(dmn,"delayed connect succeeded");
- return 0;
-}
-
-static int
-wakeup_connect_hanging(struct thread *t_wakeup)
-{
- struct daemon *dmn = THREAD_ARG(t_wakeup);
- char why[100];
-
- dmn->t_wakeup = NULL;
- snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds",
- gs.timeout);
- daemon_down(dmn,why);
- return 0;
-}
-
-/* Making connection to protocol daemon. */
-static int
-try_connect(struct daemon *dmn)
-{
- int sock;
- struct sockaddr_un addr;
- socklen_t len;
-
- if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("%s: attempting to connect",dmn->name);
- dmn->connect_tries++;
-
- memset (&addr, 0, sizeof (struct sockaddr_un));
- addr.sun_family = AF_UNIX;
- snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty",
- gs.vtydir,dmn->name);
-#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
- len = addr.sun_len = SUN_LEN(&addr);
-#else
- len = sizeof (addr.sun_family) + strlen (addr.sun_path);
-#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
-
- /* Quick check to see if we might succeed before we go to the trouble
- of creating a socket. */
- if (access(addr.sun_path, W_OK) < 0)
- {
- if (errno != ENOENT)
- zlog_err("%s: access to socket %s denied: %s",
- dmn->name,addr.sun_path,safe_strerror(errno));
- return -1;
- }
-
- if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
- {
- zlog_err("%s(%s): cannot make socket: %s",
- __func__,addr.sun_path, safe_strerror(errno));
- return -1;
- }
-
- if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0)
- {
- zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed",
- __func__, addr.sun_path, sock);
- close(sock);
- return -1;
- }
-
- if (connect (sock, (struct sockaddr *) &addr, len) < 0)
- {
- if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK))
- {
- if (gs.loglevel > LOG_DEBUG)
- zlog_debug("%s(%s): connect failed: %s",
- __func__,addr.sun_path, safe_strerror(errno));
- close (sock);
- return -1;
- }
- if (gs.loglevel > LOG_DEBUG)
- zlog_debug("%s: connection in progress",dmn->name);
- dmn->state = DAEMON_CONNECTING;
- dmn->fd = sock;
- dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd);
- dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn,
- gs.timeout);
- SET_READ_HANDLER(dmn);
- return 0;
- }
-
- dmn->fd = sock;
- SET_READ_HANDLER(dmn);
- daemon_up(dmn,"connect succeeded");
- return 1;
-}
-
-static int
-phase_hanging(struct thread *t_hanging)
-{
- gs.t_phase_hanging = NULL;
- zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
- phase_str[gs.phase],PHASE_TIMEOUT);
- gs.phase = PHASE_NONE;
- return 0;
-}
-
-static void
-set_phase(restart_phase_t new_phase)
-{
- gs.phase = new_phase;
- if (gs.t_phase_hanging)
- thread_cancel(gs.t_phase_hanging);
- gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL,
- PHASE_TIMEOUT);
-}
-
-static void
-phase_check(void)
-{
- switch (gs.phase)
- {
- case PHASE_NONE:
- break;
- case PHASE_STOPS_PENDING:
- if (gs.numpids)
- break;
- zlog_info("Phased restart: all routing daemon stop jobs have completed.");
- set_phase(PHASE_WAITING_DOWN);
- /*FALLTHRU*/
- case PHASE_WAITING_DOWN:
- if (gs.numdown+IS_UP(gs.special) < gs.numdaemons)
- break;
- zlog_info("Phased restart: all routing daemons now down.");
- run_job(&gs.special->restart,"restart",gs.restart_command,1,1);
- set_phase(PHASE_ZEBRA_RESTART_PENDING);
- /*FALLTHRU*/
- case PHASE_ZEBRA_RESTART_PENDING:
- if (gs.special->restart.pid)
- break;
- zlog_info("Phased restart: %s restart job completed.",gs.special->name);
- set_phase(PHASE_WAITING_ZEBRA_UP);
- /*FALLTHRU*/
- case PHASE_WAITING_ZEBRA_UP:
- if (!IS_UP(gs.special))
- break;
- zlog_info("Phased restart: %s is now up.",gs.special->name);
- {
- struct daemon *dmn;
- for (dmn = gs.daemons; dmn; dmn = dmn->next)
- {
- if (dmn != gs.special)
- run_job(&dmn->restart,"start",gs.start_command,1,0);
- }
- }
- gs.phase = PHASE_NONE;
- THREAD_OFF(gs.t_phase_hanging);
- zlog_notice("Phased global restart has completed.");
- break;
- }
-}
-
-static void
-try_restart(struct daemon *dmn)
-{
- switch (gs.mode)
- {
- case MODE_MONITOR:
- return;
- case MODE_GLOBAL_RESTART:
- run_job(&gs.restart,"restart",gs.restart_command,0,1);
- break;
- case MODE_SEPARATE_RESTART:
- run_job(&dmn->restart,"restart",gs.restart_command,0,1);
- break;
- case MODE_PHASED_ZEBRA_RESTART:
- if (dmn != gs.special)
- {
- if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE))
- run_job(&dmn->restart,"restart",gs.restart_command,0,1);
- else
- zlog_debug("%s: postponing restart attempt because master %s daemon "
- "not up [%s], or phased restart in progress",
- dmn->name,gs.special->name,state_str[gs.special->state]);
- break;
- }
- /*FALLTHRU*/
- case MODE_PHASED_ALL_RESTART:
- if ((gs.phase != PHASE_NONE) || gs.numpids)
- {
- if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("postponing phased global restart: restart already in "
- "progress [%s], or outstanding child processes [%d]",
- phase_str[gs.phase],gs.numpids);
- break;
- }
- /* Is it too soon for a restart? */
- {
- struct timeval delay;
- if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec <
- gs.special->restart.interval)
- {
- if (gs.loglevel > LOG_DEBUG+1)
- zlog_debug("postponing phased global restart: "
- "elapsed time %ld < retry interval %ld",
- (long)delay.tv_sec,gs.special->restart.interval);
- break;
- }
- }
- run_job(&gs.restart,"restart",gs.restart_command,0,1);
- break;
- default:
- zlog_err("error: unknown restart mode %d",gs.mode);
- break;
- }
-}
-
-static int
-wakeup_unresponsive(struct thread *t_wakeup)
-{
- struct daemon *dmn = THREAD_ARG(t_wakeup);
-
- dmn->t_wakeup = NULL;
- if (dmn->state != DAEMON_UNRESPONSIVE)
- zlog_err("%s: no longer unresponsive (now %s), "
- "wakeup should have been cancelled!",
- dmn->name,state_str[dmn->state]);
- else
- {
- SET_WAKEUP_UNRESPONSIVE(dmn);
- try_restart(dmn);
- }
- return 0;
-}
-
-static int
-wakeup_no_answer(struct thread *t_wakeup)
-{
- struct daemon *dmn = THREAD_ARG(t_wakeup);
-
- dmn->t_wakeup = NULL;
- dmn->state = DAEMON_UNRESPONSIVE;
- zlog_err("%s state -> unresponsive : no response yet to ping "
- "sent %ld seconds ago",dmn->name,gs.timeout);
- if (gs.unresponsive_restart)
- {
- SET_WAKEUP_UNRESPONSIVE(dmn);
- try_restart(dmn);
- }
- return 0;
-}
-
-static int
-wakeup_send_echo(struct thread *t_wakeup)
-{
- static const char echocmd[] = "echo " PING_TOKEN;
- ssize_t rc;
- struct daemon *dmn = THREAD_ARG(t_wakeup);
-
- dmn->t_wakeup = NULL;
- if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) ||
- ((size_t)rc != sizeof(echocmd)))
- {
- char why[100+sizeof(echocmd)];
- snprintf(why,sizeof(why),"write '%s' returned %d instead of %u",
- echocmd,(int)rc,(u_int)sizeof(echocmd));
- daemon_down(dmn,why);
- }
- else
- {
- gettimeofday(&dmn->echo_sent,NULL);
- dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout);
- }
- return 0;
-}
-
-static void
-sigint(void)
-{
- zlog_notice("Terminating on signal");
- systemd_send_stopping ();
- exit(0);
-}
-
-static int
-valid_command(const char *cmd)
-{
- char *p;
-
- return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%');
-}
-
-/* This is an ugly hack to circumvent problems with passing command-line
- arguments that contain spaces. The fix is to use a configuration file. */
-static char *
-translate_blanks(const char *cmd, const char *blankstr)
-{
- char *res;
- char *p;
- size_t bslen = strlen(blankstr);
-
- if (!(res = strdup(cmd)))
- {
- perror("strdup");
- exit(1);
- }
- while ((p = strstr(res,blankstr)) != NULL)
- {
- *p = ' ';
- if (bslen != 1)
- memmove(p+1,p+bslen,strlen(p+bslen)+1);
- }
- return res;
-}
-
-struct zebra_privs_t watchquagga_privs =
-{
-#ifdef VTY_GROUP
- .vty_group = VTY_GROUP,
-#endif
-};
-
-int
-main(int argc, char **argv)
-{
- const char *progname;
- int opt;
- int daemon_mode = 0;
- const char *pidfile = DEFAULT_PIDFILE;
- const char *special = "zebra";
- const char *blankstr = NULL;
- static struct quagga_signal_t my_signals[] =
- {
- {
- .signal = SIGINT,
- .handler = sigint,
- },
- {
- .signal = SIGTERM,
- .handler = sigint,
- },
- {
- .signal = SIGCHLD,
- .handler = sigchild,
- },
- };
-
- if ((progname = strrchr (argv[0], '/')) != NULL)
- progname++;
- else
- progname = argv[0];
-
- gs.restart.name = "all";
- while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh",
- longopts, 0)) != EOF)
- {
- switch (opt)
- {
- case 0:
- break;
- case 'a':
- if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
- {
- fputs("Ambiguous operating mode selected.\n",stderr);
- return usage(progname,1);
- }
- gs.mode = MODE_PHASED_ZEBRA_RESTART;
- break;
- case 'A':
- if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
- {
- fputs("Ambiguous operating mode selected.\n",stderr);
- return usage(progname,1);
- }
- gs.mode = MODE_PHASED_ALL_RESTART;
- break;
- case 'b':
- blankstr = optarg;
- break;
- case 'd':
- daemon_mode = 1;
- break;
- case 'e':
- gs.do_ping = 0;
- break;
- case 'k':
- if (!valid_command(optarg))
- {
- fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n",
- optarg);
- return usage(progname,1);
- }
- gs.stop_command = optarg;
- break;
- case 'l':
- {
- char garbage[3];
- if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) ||
- (gs.loglevel < LOG_EMERG))
- {
- fprintf(stderr,"Invalid loglevel argument: %s\n",optarg);
- return usage(progname,1);
- }
- }
- break;
- case 'm':
- {
- char garbage[3];
- if ((sscanf(optarg,"%ld%1s",
- &gs.min_restart_interval,garbage) != 1) ||
- (gs.min_restart_interval < 0))
- {
- fprintf(stderr,"Invalid min_restart_interval argument: %s\n",
- optarg);
- return usage(progname,1);
- }
- }
- break;
- case 'M':
- {
- char garbage[3];
- if ((sscanf(optarg,"%ld%1s",
- &gs.max_restart_interval,garbage) != 1) ||
- (gs.max_restart_interval < 0))
- {
- fprintf(stderr,"Invalid max_restart_interval argument: %s\n",
- optarg);
- return usage(progname,1);
- }
- }
- break;
- case 'i':
- {
- char garbage[3];
- int period;
- if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) ||
- (gs.period < 1))
- {
- fprintf(stderr,"Invalid interval argument: %s\n",optarg);
- return usage(progname,1);
- }
- gs.period = 1000*period;
- }
- break;
- case 'p':
- pidfile = optarg;
- break;
- case 'r':
- if ((gs.mode == MODE_GLOBAL_RESTART) ||
- (gs.mode == MODE_SEPARATE_RESTART))
- {
- fputs("Ambiguous operating mode selected.\n",stderr);
- return usage(progname,1);
- }
- if (!valid_command(optarg))
- {
- fprintf(stderr,
- "Invalid restart command, must contain '%%s': %s\n",
- optarg);
- return usage(progname,1);
- }
- gs.restart_command = optarg;
- if (gs.mode == MODE_MONITOR)
- gs.mode = MODE_SEPARATE_RESTART;
- break;
- case 'R':
- if (gs.mode != MODE_MONITOR)
- {
- fputs("Ambiguous operating mode selected.\n",stderr);
- return usage(progname,1);
- }
- if (strchr(optarg,'%'))
- {
- fprintf(stderr,
- "Invalid restart-all arg, must not contain '%%s': %s\n",
- optarg);
- return usage(progname,1);
- }
- gs.restart_command = optarg;
- gs.mode = MODE_GLOBAL_RESTART;
- break;
- case 's':
- if (!valid_command(optarg))
- {
- fprintf(stderr,"Invalid start command, must contain '%%s': %s\n",
- optarg);
- return usage(progname,1);
- }
- gs.start_command = optarg;
- break;
- case 'S':
- gs.vtydir = optarg;
- break;
- case 't':
- {
- char garbage[3];
- if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) ||
- (gs.timeout < 1))
- {
- fprintf(stderr,"Invalid timeout argument: %s\n",optarg);
- return usage(progname,1);
- }
- }
- break;
- case 'T':
- {
- char garbage[3];
- if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) ||
- (gs.restart_timeout < 1))
- {
- fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg);
- return usage(progname,1);
- }
- }
- break;
- case 'z':
- gs.unresponsive_restart = 1;
- break;
- case 'v':
- printf ("%s version %s\n", progname, FRR_VERSION);
- puts("Copyright 2004 Andrew J. Schorr");
- return 0;
- case 'h':
- return usage(progname,0);
- default:
- fputs("Invalid option.\n",stderr);
- return usage(progname,1);
- }
- }
-
- if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR))
- {
- fputs("Option -z requires a -r or -R restart option.\n",stderr);
- return usage(progname,1);
- }
- switch (gs.mode)
- {
- case MODE_MONITOR:
- if (gs.restart_command || gs.start_command || gs.stop_command)
- {
- fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n",
- mode_str[gs.mode]);
- return usage(progname,1);
- }
- break;
- case MODE_GLOBAL_RESTART:
- case MODE_SEPARATE_RESTART:
- if (!gs.restart_command || gs.start_command || gs.stop_command)
- {
- fprintf(stderr,"No start/kill commands needed in [%s] mode.\n",
- mode_str[gs.mode]);
- return usage(progname,1);
- }
- break;
- case MODE_PHASED_ZEBRA_RESTART:
- case MODE_PHASED_ALL_RESTART:
- if (!gs.restart_command || !gs.start_command || !gs.stop_command)
- {
- fprintf(stderr,
- "Need start, kill, and restart commands in [%s] mode.\n",
- mode_str[gs.mode]);
- return usage(progname,1);
- }
- break;
- }
-
- if (blankstr)
- {
- if (gs.restart_command)
- gs.restart_command = translate_blanks(gs.restart_command,blankstr);
- if (gs.start_command)
- gs.start_command = translate_blanks(gs.start_command,blankstr);
- if (gs.stop_command)
- gs.stop_command = translate_blanks(gs.stop_command,blankstr);
- }
-
- gs.restart.interval = gs.min_restart_interval;
-
- zprivs_init (&watchquagga_privs);
-
- master = thread_master_create();
- cmd_init(-1);
- memory_init();
- vty_init(master);
- watchquagga_vty_init();
- vty_serv_sock(NULL, 0, WATCHQUAGGA_VTYSH_PATH);
-
- signal_init (master, array_size(my_signals), my_signals);
- srandom(time(NULL));
-
- {
- int i;
- struct daemon *tail = NULL;
-
- for (i = optind; i < argc; i++)
- {
- struct daemon *dmn;
-
- if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn))))
- {
- fprintf(stderr,"calloc(1,%u) failed: %s\n",
- (u_int)sizeof(*dmn), safe_strerror(errno));
- return 1;
- }
- dmn->name = dmn->restart.name = argv[i];
- dmn->state = DAEMON_INIT;
- gs.numdaemons++;
- gs.numdown++;
- dmn->fd = -1;
- dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn,
- 100+(random() % 900));
- dmn->restart.interval = gs.min_restart_interval;
- if (tail)
- tail->next = dmn;
- else
- gs.daemons = dmn;
- tail = dmn;
-
- if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
- (gs.mode == MODE_PHASED_ALL_RESTART)) &&
- !strcmp(dmn->name,special))
- gs.special = dmn;
- }
- }
- if (!gs.daemons)
- {
- fputs("Must specify one or more daemons to monitor.\n",stderr);
- return usage(progname,1);
- }
- if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
- (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special)
- {
- fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n",
- mode_str[gs.mode],special);
- return usage(progname,1);
- }
-
- zlog_default = openzlog(progname, ZLOG_WATCHQUAGGA, 0,
- LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON);
- zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED);
- if (daemon_mode)
- {
- zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG));
- if (daemon (0, 0) < 0)
- {
- fprintf(stderr, "Watchquagga daemon failed: %s", strerror(errno));
- exit (1);
- }
- }
- else
- zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG));
-
- /* Make sure we're not already running. */
- pid_output (pidfile);
-
- /* Announce which daemons are being monitored. */
- {
- struct daemon *dmn;
- size_t len = 0;
-
- for (dmn = gs.daemons; dmn; dmn = dmn->next)
- len += strlen(dmn->name)+1;
-
- {
- char buf[len+1];
- char *p = buf;
-
- for (dmn = gs.daemons; dmn; dmn = dmn->next)
- {
- if (p != buf)
- *p++ = ' ';
- strcpy(p,dmn->name);
- p += strlen(p);
- }
- zlog_notice("%s %s watching [%s], mode [%s]",
- progname, FRR_VERSION, buf, mode_str[gs.mode]);
- }
- }
-
- {
- struct thread thread;
-
- while (thread_fetch (master, &thread))
- thread_call (&thread);
- }
-
- systemd_send_stopping ();
- /* Not reached. */
- return 0;
-}
+++ /dev/null
-/*
- Common definitions for watchquagga API socket.
-
- Copyright (C) 2016 David Lamparter for NetDEF, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef QUAGGA_WATCHQUAGGA_H
-#define QUAGGA_WATCHQUAGGA_H
-
-extern void watchquagga_vty_init(void);
-
-extern pid_t integrated_write_pid;
-extern void integrated_write_sigchld(int status);
-
-#endif /* QUAGGA_WATCHQUAGGA_H */
+++ /dev/null
-/*
- watchquagga CLI functions.
-
- Copyright (C) 2016 David Lamparter for NetDEF, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <zebra.h>
-#include <sys/wait.h>
-
-#include "memory.h"
-#include "log.h"
-#include "vty.h"
-#include "command.h"
-
-#include "watchquagga.h"
-
-pid_t integrated_write_pid;
-static int integrated_result_fd;
-
-DEFUN (config_write_integrated,
- config_write_integrated_cmd,
- "write integrated",
- "Write running configuration to memory, network, or terminal\n"
- "Write integrated all-daemon Quagga.conf file\n")
-{
- pid_t child;
- sigset_t oldmask, sigmask;
-
- if (integrated_write_pid != -1) {
- vty_out(vty, "%% configuration write already in progress.%s",
- VTY_NEWLINE);
- return CMD_WARNING;
- }
-
- fflush(stdout);
- fflush(stderr);
-
- /* need to temporarily block SIGCHLD because it could arrive between
- * fork() call and setting the integrated_write_pid variable. This
- * would mean the completion call gets lost and this hangs forever.
- */
- sigemptyset(&oldmask);
- sigemptyset(&sigmask);
- sigaddset(&sigmask, SIGCHLD);
- sigprocmask(SIG_BLOCK, &sigmask, &oldmask);
-
- child = fork();
- if (child == -1) {
- vty_out(vty, "%% configuration write fork() failed: %s.%s",
- safe_strerror(errno), VTY_NEWLINE);
- sigprocmask(SIG_SETMASK, &oldmask, NULL);
- return CMD_WARNING;
- }
- if (child != 0) {
- /* note: the VTY won't write a command return value to vtysh; the
- * session temporarily enters an intentional "hang" state. This is
- * to make sure latency in vtysh doing the config write (several
- * seconds is not rare to see) does not interfere with watchquagga's
- * supervisor job.
- *
- * The fd is duplicated here so we don't need to hold a vty pointer
- * (which could become invalid in the meantime).
- */
- integrated_write_pid = child;
- integrated_result_fd = dup(vty->wfd);
- sigprocmask(SIG_SETMASK, &oldmask, NULL);
- return CMD_SUSPEND;
- }
-
- /* redirect stdout/stderr to vty session. Note vty->wfd is marked
- * CLOEXEC, but dup2 will clear that flag. */
- dup2(vty->wfd, 1);
- dup2(vty->wfd, 2);
-
- /* don't allow the user to pass parameters, we're root here!
- * should probably harden vtysh at some point too... */
- execl(VTYSH_BIN_PATH, "vtysh", "-w", NULL);
-
- /* unbuffered write; we just messed with stdout... */
- char msg[512];
- snprintf(msg, sizeof(msg), "error executing %s: %s\n",
- VTYSH_BIN_PATH, safe_strerror(errno));
- write(1, msg, strlen(msg));
- exit(1);
-}
-
-void integrated_write_sigchld(int status)
-{
- uint8_t reply[4] = { 0, 0, 0, CMD_WARNING };
-
- if (WIFEXITED(status)) {
- zlog_info("configuration write completed with exit code %d",
- WEXITSTATUS(status));
- reply[3] = WEXITSTATUS(status);
- } else if (WIFSIGNALED(status)) {
- zlog_warn("configuration write terminated by signal %d",
- WTERMSIG(status));
- } else {
- zlog_warn("configuration write terminated");
- }
-
- if (reply[3] != CMD_SUCCESS) {
- /* failure might be silent in vtysh without this */
- static const char msg[] = "% Configuration write failed.\n";
- write(integrated_result_fd, msg, strlen(msg));
- }
-
- /* don't care about failures here, if the connection is broken the
- * return value will just be lost. */
- write(integrated_result_fd, reply, sizeof(reply));
- close(integrated_result_fd);
-
- integrated_write_pid = -1;
-}
-
-void watchquagga_vty_init(void)
-{
- integrated_write_pid = -1;
- install_element(ENABLE_NODE, &config_write_integrated_cmd);
-}