diff options
34 files changed, 1216 insertions, 184 deletions
diff --git a/doc/developer/building-frr-for-ubuntu2204.rst b/doc/developer/building-frr-for-ubuntu2204.rst new file mode 100644 index 0000000000..6b941b3679 --- /dev/null +++ b/doc/developer/building-frr-for-ubuntu2204.rst @@ -0,0 +1,171 @@ +Ubuntu 22.04 LTS +================ + +This document describes installation from source. If you want to build a +``deb``, see :ref:`packaging-debian`. + +Installing Dependencies +----------------------- + +.. code-block:: console + + sudo apt update + sudo apt-get install \ + git autoconf automake libtool make libreadline-dev texinfo \ + pkg-config libpam0g-dev libjson-c-dev bison flex \ + libc-ares-dev python3-dev python3-sphinx \ + install-info build-essential libsnmp-dev perl \ + libcap-dev python2 libelf-dev libunwind-dev \ + libyang2 libyang2-dev + +.. include:: building-libunwind-note.rst + +Note that Ubuntu >= 20 no longer installs python 2.x, so it must be +installed explicitly. Ensure that your system has a symlink named +``/usr/bin/python`` pointing at ``/usr/bin/python3``. + +.. code-block:: shell + + sudo ln -s /usr/bin/python3 /usr/bin/python + python --version + +In addition, ``pip`` for python2 must be installed if you wish to run +the FRR topotests. That version of ``pip`` is not available from the +ubuntu apt repositories; in order to install it: + +.. code-block:: shell + + curl https://bootstrap.pypa.io/pip/2.7/get-pip.py --output get-pip.py + sudo python2 ./get-pip.py + + # And verify the installation + pip2 --version + + +Protobuf +^^^^^^^^ +This is optional + +.. code-block:: console + + sudo apt-get install protobuf-c-compiler libprotobuf-c-dev + +ZeroMQ +^^^^^^ +This is optional + +.. code-block:: console + + sudo apt-get install libzmq5 libzmq3-dev + +Building & Installing FRR +------------------------- + +Add FRR user and groups +^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: console + + sudo groupadd -r -g 92 frr + sudo groupadd -r -g 85 frrvty + sudo adduser --system --ingroup frr --home /var/run/frr/ \ + --gecos "FRR suite" --shell /sbin/nologin frr + sudo usermod -a -G frrvty frr + +Compile +^^^^^^^ + +.. include:: include-compile.rst + +Install FRR configuration files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: console + + sudo install -m 775 -o frr -g frr -d /var/log/frr + sudo install -m 775 -o frr -g frrvty -d /etc/frr + sudo install -m 640 -o frr -g frrvty tools/etc/frr/vtysh.conf /etc/frr/vtysh.conf + sudo install -m 640 -o frr -g frr tools/etc/frr/frr.conf /etc/frr/frr.conf + sudo install -m 640 -o frr -g frr tools/etc/frr/daemons.conf /etc/frr/daemons.conf + sudo install -m 640 -o frr -g frr tools/etc/frr/daemons /etc/frr/daemons + +Tweak sysctls +^^^^^^^^^^^^^ + +Some sysctls need to be changed in order to enable IPv4/IPv6 forwarding and +MPLS (if supported by your platform). If your platform does not support MPLS, +skip the MPLS related configuration in this section. + +Edit :file:`/etc/sysctl.conf` and uncomment the following values (ignore the +other settings): + +:: + + # Uncomment the next line to enable packet forwarding for IPv4 + net.ipv4.ip_forward=1 + + # Uncomment the next line to enable packet forwarding for IPv6 + # Enabling this option disables Stateless Address Autoconfiguration + # based on Router Advertisements for this host + net.ipv6.conf.all.forwarding=1 + +Reboot or use ``sysctl -p`` to apply the same config to the running system. + +Add MPLS kernel modules +""""""""""""""""""""""" + +Ubuntu 20.04 ships with kernel 5.4; MPLS modules are present by default. To +enable, add the following lines to :file:`/etc/modules-load.d/modules.conf`: + +:: + + # Load MPLS Kernel Modules + mpls_router + mpls_iptunnel + + +And load the kernel modules on the running system: + +.. code-block:: console + + sudo modprobe mpls-router mpls-iptunnel + +If the above command returns an error, you may need to install the appropriate +or latest linux-modules-extra-<kernel-version>-generic package. For example +``apt-get install linux-modules-extra-`uname -r`-generic`` + +Enable MPLS Forwarding +"""""""""""""""""""""" + +Edit :file:`/etc/sysctl.conf` and the following lines. Make sure to add a line +equal to :file:`net.mpls.conf.eth0.input` for each interface used with MPLS. + +:: + + # Enable MPLS Label processing on all interfaces + net.mpls.conf.eth0.input=1 + net.mpls.conf.eth1.input=1 + net.mpls.conf.eth2.input=1 + net.mpls.platform_labels=100000 + +Install service files +^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: console + + sudo install -m 644 tools/frr.service /etc/systemd/system/frr.service + sudo systemctl enable frr + +Enable daemons +^^^^^^^^^^^^^^ + +Open :file:`/etc/frr/daemons` with your text editor of choice. Look for the +section with ``watchfrr_enable=...`` and ``zebra=...`` etc. Enable the daemons +as required by changing the value to ``yes``. + +Start FRR +^^^^^^^^^ + +.. code-block:: shell + + systemctl start frr diff --git a/doc/developer/topotests.rst b/doc/developer/topotests.rst index 13936e18ed..7b1fde7b53 100644 --- a/doc/developer/topotests.rst +++ b/doc/developer/topotests.rst @@ -22,10 +22,8 @@ Installing Topotest Requirements .. code:: shell - apt-get install gdb - apt-get install iproute2 - apt-get install net-tools - apt-get install python3-pip + apt-get install gdb iproute2 net-tools python3-pip \ + iputils-ping valgrind python3 -m pip install wheel python3 -m pip install 'pytest>=6.2.4' python3 -m pip install 'pytest-xdist>=2.3.0' @@ -119,6 +117,7 @@ If you prefer to manually build FRR, then use the following suggested config: --sysconfdir=/etc/frr \ --enable-vtysh \ --enable-pimd \ + --enable-pim6d \ --enable-sharpd \ --enable-multipath=64 \ --enable-user=frr \ @@ -311,32 +310,6 @@ Test will set exit code which can be used with ``git bisect``. For the simulated topology, see the description in the python file. -StdErr log from daemos after exit -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To enable the reporting of any messages seen on StdErr after the daemons exit, -the following env variable can be set:: - - export TOPOTESTS_CHECK_STDERR=Yes - -(The value doesn't matter at this time. The check is whether the env -variable exists or not.) There is no pass/fail on this reporting; the -Output will be reported to the console. - -Collect Memory Leak Information -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -FRR processes can report unfreed memory allocations upon exit. To -enable the reporting of memory leaks, define an environment variable -``TOPOTESTS_CHECK_MEMLEAK`` with the file prefix, i.e.:: - - export TOPOTESTS_CHECK_MEMLEAK="/home/mydir/memleak_" - -This will enable the check and output to console and the writing of -the information to files with the given prefix (followed by testname), -ie :file:`/home/mydir/memcheck_test_bgp_multiview_topo1.txt` in case -of a memory leak. - Running Topotests with AddressSanitizer ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -562,6 +535,48 @@ Here's an example of launching ``zebra`` and ``bgpd`` inside ``gdb`` on router --gdb-breakpoints=nb_config_diff \ all-protocol-startup +Reporting Memleaks with FRR Memory Statistics +""""""""""""""""""""""""""""""""""""""""""""" + +FRR reports all allocated FRR memory objects on exit to standard error. +Topotest can be run to report such output as errors in order to check for +memleaks in FRR memory allocations. Specifying the CLI argument +``--memleaks`` will enable reporting FRR-based memory allocations at exit as errors. + +.. code:: shell + + sudo -E pytest --memleaks all-protocol-startup + + +StdErr log from daemos after exit +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When running with ``--memleaks``, to enable the reporting of other, +non-memory related, messages seen on StdErr after the daemons exit, +the following env variable can be set:: + + export TOPOTESTS_CHECK_STDERR=Yes + +(The value doesn't matter at this time. The check is whether the env +variable exists or not.) There is no pass/fail on this reporting; the +Output will be reported to the console. + +Collect Memory Leak Information +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When running with ``--memleaks``, FRR processes report unfreed memory +allocations upon exit. To enable also reporting of memory leaks to a specific +location, define an environment variable ``TOPOTESTS_CHECK_MEMLEAK`` with the +file prefix, i.e.: + + export TOPOTESTS_CHECK_MEMLEAK="/home/mydir/memleak_" + +For tests that support the TOPOTESTS_CHECK_MEMLEAK environment variable, this +will enable output to the information to files with the given prefix (followed +by testname), e.g.,: +file:`/home/mydir/memcheck_test_bgp_multiview_topo1.txt` in case +of a memory leak. + Detecting Memleaks with Valgrind """""""""""""""""""""""""""""""" diff --git a/doc/user/ospf6d.rst b/doc/user/ospf6d.rst index 8dacb9c9dc..2f4c956ffd 100644 --- a/doc/user/ospf6d.rst +++ b/doc/user/ospf6d.rst @@ -287,6 +287,19 @@ OSPF6 interface Sets interface's Router Dead Interval. Default value is 40. +.. clicmd:: ipv6 ospf6 graceful-restart hello-delay HELLODELAYINTERVAL + + Set the length of time during which Grace-LSAs are sent at 1-second intervals + while coming back up after an unplanned outage. During this time, no hello + packets are sent. + + A higher hello delay will increase the chance that all neighbors are notified + about the ongoing graceful restart before receiving a hello packet (which is + crucial for the graceful restart to succeed). The hello delay shouldn't be set + too high, however, otherwise the adjacencies might time out. As a best practice, + it's recommended to set the hello delay and hello interval with the same values. + The default value is 10 seconds. + .. clicmd:: ipv6 ospf6 retransmit-interval RETRANSMITINTERVAL Sets interface's Rxmt Interval. Default value is 5. @@ -343,6 +356,10 @@ Graceful Restart To perform a graceful shutdown, the "graceful-restart prepare ipv6 ospf" EXEC-level command needs to be issued before restarting the ospf6d daemon. + When Graceful Restart is enabled and the ospf6d daemon crashes or is killed + abruptely (e.g. SIGKILL), it will attempt an unplanned Graceful Restart once + it restarts. + .. clicmd:: graceful-restart helper enable [A.B.C.D] diff --git a/doc/user/ospfd.rst b/doc/user/ospfd.rst index 5171832604..effad0fd00 100644 --- a/doc/user/ospfd.rst +++ b/doc/user/ospfd.rst @@ -635,6 +635,19 @@ Interfaces :clicmd:`ip ospf dead-interval minimal hello-multiplier (2-20)` is also specified for the interface. +.. clicmd:: ip ospf graceful-restart hello-delay (1-1800) + + Set the length of time during which Grace-LSAs are sent at 1-second intervals + while coming back up after an unplanned outage. During this time, no hello + packets are sent. + + A higher hello delay will increase the chance that all neighbors are notified + about the ongoing graceful restart before receiving a hello packet (which is + crucial for the graceful restart to succeed). The hello delay shouldn't be set + too high, however, otherwise the adjacencies might time out. As a best practice, + it's recommended to set the hello delay and hello interval with the same values. + The default value is 10 seconds. + .. clicmd:: ip ospf network (broadcast|non-broadcast|point-to-multipoint|point-to-point [dmvpn]) When configuring a point-to-point network on an interface and the interface @@ -770,6 +783,10 @@ Graceful Restart To perform a graceful shutdown, the "graceful-restart prepare ip ospf" EXEC-level command needs to be issued before restarting the ospfd daemon. + When Graceful Restart is enabled and the ospfd daemon crashes or is killed + abruptely (e.g. SIGKILL), it will attempt an unplanned Graceful Restart once + it restarts. + .. clicmd:: graceful-restart helper enable [A.B.C.D] diff --git a/lib/libfrr.c b/lib/libfrr.c index 07e2eafec5..e890057269 100644 --- a/lib/libfrr.c +++ b/lib/libfrr.c @@ -130,6 +130,7 @@ static const struct optspec os_always = { " --limit-fds Limit number of fds supported\n", lo_always}; +static bool logging_to_stdout = false; /* set when --log stdout specified */ static const struct option lo_cfg[] = { {"config_file", required_argument, NULL, 'f'}, @@ -738,6 +739,11 @@ struct event_loop *frr_init(void) while ((log_arg = log_args_pop(di->early_logging))) { command_setup_early_logging(log_arg->target, di->early_loglevel); + /* this is a bit of a hack, + but need to notice when + the target is stdout */ + if (strcmp(log_arg->target, "stdout") == 0) + logging_to_stdout = true; XFREE(MTYPE_TMP, log_arg); } @@ -1088,9 +1094,15 @@ static void frr_terminal_close(int isexit) "%s: failed to open /dev/null: %s", __func__, safe_strerror(errno)); } else { - dup2(nullfd, 0); - dup2(nullfd, 1); - dup2(nullfd, 2); + int fd; + /* + * only redirect stdin, stdout, stderr to null when a tty also + * don't redirect when stdout is set with --log stdout + */ + for (fd = 2; fd >= 0; fd--) + if (isatty(fd) && + (fd != STDOUT_FILENO || !logging_to_stdout)) + dup2(nullfd, fd); close(nullfd); } } @@ -1168,9 +1180,16 @@ void frr_run(struct event_loop *master) "%s: failed to open /dev/null: %s", __func__, safe_strerror(errno)); } else { - dup2(nullfd, 0); - dup2(nullfd, 1); - dup2(nullfd, 2); + int fd; + /* + * only redirect stdin, stdout, stderr to null when a + * tty also don't redirect when stdout is set with --log + * stdout + */ + for (fd = 2; fd >= 0; fd--) + if (isatty(fd) && + (fd != STDOUT_FILENO || !logging_to_stdout)) + dup2(nullfd, fd); close(nullfd); } @@ -1194,7 +1213,7 @@ void frr_fini(void) { FILE *fp; char filename[128]; - int have_leftovers; + int have_leftovers = 0; hook_call(frr_fini); @@ -1220,16 +1239,15 @@ void frr_fini(void) /* frrmod_init -> nothing needed / hooks */ rcu_shutdown(); - if (!debug_memstats_at_exit) - return; - - have_leftovers = log_memstats(stderr, di->name); + /* also log memstats to stderr when stderr goes to a file*/ + if (debug_memstats_at_exit || !isatty(STDERR_FILENO)) + have_leftovers = log_memstats(stderr, di->name); /* in case we decide at runtime that we want exit-memstats for - * a daemon, but it has no stderr because it's daemonized + * a daemon * (only do this if we actually have something to print though) */ - if (!have_leftovers) + if (!debug_memstats_at_exit || !have_leftovers) return; snprintf(filename, sizeof(filename), "/tmp/frr-memstats-%s-%llu-%llu", diff --git a/lib/libospf.h b/lib/libospf.h index 3262534de5..676b563fff 100644 --- a/lib/libospf.h +++ b/lib/libospf.h @@ -55,6 +55,7 @@ extern "C" { #define OSPF_ROUTER_DEAD_INTERVAL_DEFAULT 40 #define OSPF_ROUTER_DEAD_INTERVAL_MINIMAL 1 #define OSPF_HELLO_INTERVAL_DEFAULT 10 +#define OSPF_HELLO_DELAY_DEFAULT 10 #define OSPF_ROUTER_PRIORITY_DEFAULT 1 #define OSPF_RETRANSMIT_INTERVAL_DEFAULT 5 #define OSPF_TRANSMIT_DELAY_DEFAULT 1 diff --git a/ospf6d/ospf6_gr.c b/ospf6d/ospf6_gr.c index 976eb529d7..3d5d4d259f 100644 --- a/ospf6d/ospf6_gr.c +++ b/ospf6d/ospf6_gr.c @@ -14,6 +14,7 @@ #include "log.h" #include "hook.h" #include "printfrr.h" +#include "lib_errors.h" #include "ospf6d/ospf6_lsa.h" #include "ospf6d/ospf6_lsdb.h" @@ -25,21 +26,25 @@ #include "ospf6d/ospf6_zebra.h" #include "ospf6d/ospf6_message.h" #include "ospf6d/ospf6_neighbor.h" +#include "ospf6d/ospf6_network.h" #include "ospf6d/ospf6_flood.h" #include "ospf6d/ospf6_intra.h" #include "ospf6d/ospf6_spf.h" #include "ospf6d/ospf6_gr.h" #include "ospf6d/ospf6_gr_clippy.c" -static void ospf6_gr_nvm_delete(struct ospf6 *ospf6); +static void ospf6_gr_grace_period_expired(struct event *thread); /* Originate and install Grace-LSA for a given interface. */ -static int ospf6_gr_lsa_originate(struct ospf6_interface *oi) +static int ospf6_gr_lsa_originate(struct ospf6_interface *oi, + enum ospf6_gr_restart_reason reason) { - struct ospf6_gr_info *gr_info = &oi->area->ospf6->gr_info; + struct ospf6 *ospf6 = oi->area->ospf6; + struct ospf6_gr_info *gr_info = &ospf6->gr_info; struct ospf6_lsa_header *lsa_header; struct ospf6_grace_lsa *grace_lsa; struct ospf6_lsa *lsa; + uint16_t lsa_length; char buffer[OSPF6_MAX_LSASIZE]; if (IS_OSPF6_DEBUG_ORIGINATE(LINK)) @@ -61,29 +66,59 @@ static int ospf6_gr_lsa_originate(struct ospf6_interface *oi) /* Put restart reason. */ grace_lsa->tlv_reason.header.type = htons(RESTART_REASON_TYPE); grace_lsa->tlv_reason.header.length = htons(RESTART_REASON_LENGTH); - if (gr_info->restart_support) - grace_lsa->tlv_reason.reason = OSPF6_GR_SW_RESTART; - else - grace_lsa->tlv_reason.reason = OSPF6_GR_UNKNOWN_RESTART; + grace_lsa->tlv_reason.reason = reason; /* Fill LSA Header */ + lsa_length = sizeof(*lsa_header) + sizeof(*grace_lsa); lsa_header->age = 0; lsa_header->type = htons(OSPF6_LSTYPE_GRACE_LSA); lsa_header->id = htonl(oi->interface->ifindex); - lsa_header->adv_router = oi->area->ospf6->router_id; + lsa_header->adv_router = ospf6->router_id; lsa_header->seqnum = ospf6_new_ls_seqnum(lsa_header->type, lsa_header->id, lsa_header->adv_router, oi->lsdb); - lsa_header->length = htons(sizeof(*lsa_header) + sizeof(*grace_lsa)); + lsa_header->length = htons(lsa_length); /* LSA checksum */ ospf6_lsa_checksum(lsa_header); - /* create LSA */ - lsa = ospf6_lsa_create(lsa_header); - - /* Originate */ - ospf6_lsa_originate_interface(lsa, oi); + if (reason == OSPF6_GR_UNKNOWN_RESTART) { + struct ospf6_header *oh; + uint32_t *uv32; + int n; + uint16_t length = OSPF6_HEADER_SIZE + 4 + lsa_length; + struct iovec iovector[2] = {}; + + /* Reserve space for OSPFv3 header. */ + memmove(&buffer[OSPF6_HEADER_SIZE + 4], buffer, lsa_length); + + /* Fill in the OSPFv3 header. */ + oh = (struct ospf6_header *)buffer; + oh->version = OSPFV3_VERSION; + oh->type = OSPF6_MESSAGE_TYPE_LSUPDATE; + oh->router_id = oi->area->ospf6->router_id; + oh->area_id = oi->area->area_id; + oh->instance_id = oi->instance_id; + oh->reserved = 0; + oh->length = htons(length); + + /* Fill LSA header. */ + uv32 = (uint32_t *)&buffer[sizeof(*oh)]; + *uv32 = htonl(1); + + /* Send packet. */ + iovector[0].iov_base = lsa_header; + iovector[0].iov_len = length; + n = ospf6_sendmsg(oi->linklocal_addr, &allspfrouters6, + oi->interface->ifindex, iovector, ospf6->fd); + if (n != length) + flog_err(EC_LIB_DEVELOPMENT, + "%s: could not send entire message", __func__); + } else { + /* Create and install LSA. */ + lsa = ospf6_lsa_create(lsa_header); + ospf6_lsa_originate_interface(lsa, oi); + } return 0; } @@ -134,11 +169,10 @@ static void ospf6_gr_restart_exit(struct ospf6 *ospf6, const char *reason) ospf6->gr_info.restart_in_progress = false; ospf6->gr_info.finishing_restart = true; + XFREE(MTYPE_TMP, ospf6->gr_info.exit_reason); + ospf6->gr_info.exit_reason = XSTRDUP(MTYPE_TMP, reason); EVENT_OFF(ospf6->gr_info.t_grace_period); - /* Record in non-volatile memory that the restart is complete. */ - ospf6_gr_nvm_delete(ospf6); - for (ALL_LIST_ELEMENTS_RO(ospf6->area_list, onode, area)) { struct ospf6_interface *oi; @@ -156,6 +190,14 @@ static void ospf6_gr_restart_exit(struct ospf6 *ospf6, const char *reason) OSPF6_INTRA_PREFIX_LSA_SCHEDULE_STUB(area); for (ALL_LIST_ELEMENTS_RO(area->if_list, anode, oi)) { + /* Disable hello delay. */ + if (oi->gr.hello_delay.t_grace_send) { + oi->gr.hello_delay.elapsed_seconds = 0; + EVENT_OFF(oi->gr.hello_delay.t_grace_send); + event_add_event(master, ospf6_hello_send, oi, 0, + &oi->thread_send_hello); + } + /* Reoriginate Link-LSA. */ if (oi->type != OSPF_IFTYPE_VIRTUALLINK) OSPF6_LINK_LSA_EXECUTE(oi); @@ -195,6 +237,26 @@ static void ospf6_gr_restart_exit(struct ospf6 *ospf6, const char *reason) ospf6_gr_flush_grace_lsas(ospf6); } +/* Enter the Graceful Restart mode. */ +void ospf6_gr_restart_enter(struct ospf6 *ospf6, + enum ospf6_gr_restart_reason reason, int timestamp) +{ + unsigned long remaining_time; + + ospf6->gr_info.restart_in_progress = true; + ospf6->gr_info.reason = reason; + + /* Schedule grace period timeout. */ + remaining_time = timestamp - time(NULL); + if (IS_DEBUG_OSPF6_GR) + zlog_debug( + "GR: remaining time until grace period expires: %lu(s)", + remaining_time); + + event_add_timer(master, ospf6_gr_grace_period_expired, ospf6, + remaining_time, &ospf6->gr_info.t_grace_period); +} + #define RTR_LSA_MISSING 0 #define RTR_LSA_ADJ_FOUND 1 #define RTR_LSA_ADJ_NOT_FOUND 2 @@ -466,11 +528,26 @@ static void ospf6_gr_grace_period_expired(struct event *thread) ospf6_gr_restart_exit(ospf6, "grace period has expired"); } +/* Send extra Grace-LSA out the interface (unplanned outages only). */ +void ospf6_gr_iface_send_grace_lsa(struct event *thread) +{ + struct ospf6_interface *oi = EVENT_ARG(thread); + + ospf6_gr_lsa_originate(oi, oi->area->ospf6->gr_info.reason); + + if (++oi->gr.hello_delay.elapsed_seconds < oi->gr.hello_delay.interval) + event_add_timer(master, ospf6_gr_iface_send_grace_lsa, oi, 1, + &oi->gr.hello_delay.t_grace_send); + else + event_add_event(master, ospf6_hello_send, oi, 0, + &oi->thread_send_hello); +} + /* * Record in non-volatile memory that the given OSPF instance is attempting to * perform a graceful restart. */ -static void ospf6_gr_nvm_update(struct ospf6 *ospf6) +static void ospf6_gr_nvm_update(struct ospf6 *ospf6, bool prepare) { const char *inst_name; json_object *json; @@ -496,16 +573,18 @@ static void ospf6_gr_nvm_update(struct ospf6 *ospf6) json_instance); } + json_object_int_add(json_instance, "gracePeriod", + ospf6->gr_info.grace_period); + /* * Record not only the grace period, but also a UNIX timestamp * corresponding to the end of that period. That way, once ospf6d is * restarted, it will be possible to take into account the time that * passed while ospf6d wasn't running. */ - json_object_int_add(json_instance, "gracePeriod", - ospf6->gr_info.grace_period); - json_object_int_add(json_instance, "timestamp", - time(NULL) + ospf6->gr_info.grace_period); + if (prepare) + json_object_int_add(json_instance, "timestamp", + time(NULL) + ospf6->gr_info.grace_period); json_object_to_file_ext((char *)OSPF6D_GR_STATE, json, JSON_C_TO_STRING_PRETTY); @@ -516,7 +595,7 @@ static void ospf6_gr_nvm_update(struct ospf6 *ospf6) * Delete GR status information about the given OSPF instance from non-volatile * memory. */ -static void ospf6_gr_nvm_delete(struct ospf6 *ospf6) +void ospf6_gr_nvm_delete(struct ospf6 *ospf6) { const char *inst_name; json_object *json; @@ -552,6 +631,7 @@ void ospf6_gr_nvm_read(struct ospf6 *ospf6) json_object *json_instances; json_object *json_instance; json_object *json_timestamp; + json_object *json_grace_period; time_t timestamp = 0; inst_name = ospf6->name ? ospf6->name : VRF_DEFAULT_NAME; @@ -573,29 +653,33 @@ void ospf6_gr_nvm_read(struct ospf6 *ospf6) json_instance); } + json_object_object_get_ex(json_instance, "gracePeriod", + &json_grace_period); json_object_object_get_ex(json_instance, "timestamp", &json_timestamp); if (json_timestamp) { time_t now; - unsigned long remaining_time; - /* Check if the grace period has already expired. */ + /* Planned GR: check if the grace period has already expired. */ now = time(NULL); timestamp = json_object_get_int(json_timestamp); if (now > timestamp) { ospf6_gr_restart_exit( ospf6, "grace period has expired already"); - } else { - /* Schedule grace period timeout. */ - ospf6->gr_info.restart_in_progress = true; - remaining_time = timestamp - time(NULL); - if (IS_DEBUG_OSPF6_GR) - zlog_debug( - "GR: remaining time until grace period expires: %lu(s)", - remaining_time); - event_add_timer(master, ospf6_gr_grace_period_expired, - ospf6, remaining_time, - &ospf6->gr_info.t_grace_period); - } + } else + ospf6_gr_restart_enter(ospf6, OSPF6_GR_SW_RESTART, + timestamp); + } else if (json_grace_period) { + uint32_t grace_period; + + /* + * Unplanned GR: the Grace-LSAs will be sent later as soon as + * the interfaces are operational. + */ + grace_period = json_object_get_int(json_grace_period); + ospf6->gr_info.grace_period = grace_period; + ospf6_gr_restart_enter(ospf6, OSPF6_GR_UNKNOWN_RESTART, + time(NULL) + + ospf6->gr_info.grace_period); } json_object_object_del(json_instances, inst_name); @@ -605,6 +689,24 @@ void ospf6_gr_nvm_read(struct ospf6 *ospf6) json_object_free(json); } +void ospf6_gr_unplanned_start_interface(struct ospf6_interface *oi) +{ + /* + * Can't check OSPF interface state as the OSPF instance might not be + * enabled yet. + */ + if (!if_is_operative(oi->interface) || if_is_loopback(oi->interface)) + return; + + /* Send Grace-LSA. */ + ospf6_gr_lsa_originate(oi, oi->area->ospf6->gr_info.reason); + + /* Start GR hello-delay interval. */ + oi->gr.hello_delay.elapsed_seconds = 0; + event_add_timer(master, ospf6_gr_iface_send_grace_lsa, oi, 1, + &oi->gr.hello_delay.t_grace_send); +} + /* Prepare to start a Graceful Restart. */ static void ospf6_gr_prepare(void) { @@ -625,25 +727,17 @@ static void ospf6_gr_prepare(void) ospf6->gr_info.grace_period, ospf6_vrf_id_to_name(ospf6->vrf_id)); - /* Freeze OSPF routes in the RIB. */ - if (ospf6_zebra_gr_enable(ospf6, ospf6->gr_info.grace_period)) { - zlog_warn( - "%s: failed to activate graceful restart: not connected to zebra", - __func__); - continue; - } - /* Send a Grace-LSA to all neighbors. */ for (ALL_LIST_ELEMENTS_RO(ospf6->area_list, anode, area)) { for (ALL_LIST_ELEMENTS_RO(area->if_list, inode, oi)) { if (oi->state < OSPF6_INTERFACE_POINTTOPOINT) continue; - ospf6_gr_lsa_originate(oi); + ospf6_gr_lsa_originate(oi, OSPF6_GR_SW_RESTART); } } /* Record end of the grace period in non-volatile memory. */ - ospf6_gr_nvm_update(ospf6); + ospf6_gr_nvm_update(ospf6, true); /* * Mark that a Graceful Restart preparation is in progress, to @@ -714,6 +808,12 @@ DEFPY(ospf6_graceful_restart, ospf6_graceful_restart_cmd, ospf6->gr_info.restart_support = true; ospf6->gr_info.grace_period = grace_period; + /* Freeze OSPF routes in the RIB. */ + (void)ospf6_zebra_gr_enable(ospf6, ospf6->gr_info.grace_period); + + /* Record that GR is enabled in non-volatile memory. */ + ospf6_gr_nvm_update(ospf6, false); + return CMD_SUCCESS; } @@ -736,6 +836,8 @@ DEFPY(ospf6_no_graceful_restart, ospf6_no_graceful_restart_cmd, ospf6->gr_info.restart_support = false; ospf6->gr_info.grace_period = OSPF6_DFLT_GRACE_INTERVAL; + ospf6_gr_nvm_delete(ospf6); + ospf6_zebra_gr_disable(ospf6); return CMD_SUCCESS; } diff --git a/ospf6d/ospf6_gr.h b/ospf6d/ospf6_gr.h index 2c7e8b341c..e6566a6098 100644 --- a/ospf6d/ospf6_gr.h +++ b/ospf6d/ospf6_gr.h @@ -155,9 +155,15 @@ extern int config_write_ospf6_gr(struct vty *vty, struct ospf6 *ospf6); extern int config_write_ospf6_gr_helper(struct vty *vty, struct ospf6 *ospf6); extern int config_write_ospf6_debug_gr_helper(struct vty *vty); +extern void ospf6_gr_iface_send_grace_lsa(struct event *thread); +extern void ospf6_gr_restart_enter(struct ospf6 *ospf6, + enum ospf6_gr_restart_reason reason, + int timestamp); extern void ospf6_gr_check_lsdb_consistency(struct ospf6 *ospf, struct ospf6_area *area); extern void ospf6_gr_nvm_read(struct ospf6 *ospf); +extern void ospf6_gr_nvm_delete(struct ospf6 *ospf6); +extern void ospf6_gr_unplanned_start_interface(struct ospf6_interface *oi); extern void ospf6_gr_init(void); #endif /* OSPF6_GR_H */ diff --git a/ospf6d/ospf6_interface.c b/ospf6d/ospf6_interface.c index e7148d66ba..ea059c4be6 100644 --- a/ospf6d/ospf6_interface.c +++ b/ospf6d/ospf6_interface.c @@ -35,6 +35,7 @@ #include "ospf6_proto.h" #include "lib/keychain.h" #include "ospf6_auth_trailer.h" +#include "ospf6d/ospf6_interface_clippy.c" DEFINE_MTYPE_STATIC(OSPF6D, OSPF6_IF, "OSPF6 interface"); DEFINE_MTYPE(OSPF6D, OSPF6_AUTH_KEYCHAIN, "OSPF6 auth keychain"); @@ -202,6 +203,7 @@ struct ospf6_interface *ospf6_interface_create(struct interface *ifp) oi->priority = OSPF6_INTERFACE_PRIORITY; oi->hello_interval = OSPF_HELLO_INTERVAL_DEFAULT; + oi->gr.hello_delay.interval = OSPF_HELLO_DELAY_DEFAULT; oi->dead_interval = OSPF_ROUTER_DEAD_INTERVAL_DEFAULT; oi->rxmt_interval = OSPF_RETRANSMIT_INTERVAL_DEFAULT; oi->type = ospf6_default_iftype(ifp); @@ -324,6 +326,9 @@ void ospf6_interface_disable(struct ospf6_interface *oi) EVENT_OFF(oi->thread_intra_prefix_lsa); EVENT_OFF(oi->thread_as_extern_lsa); EVENT_OFF(oi->thread_wait_timer); + + oi->gr.hello_delay.elapsed_seconds = 0; + EVENT_OFF(oi->gr.hello_delay.t_grace_send); } static struct in6_addr * @@ -772,6 +777,17 @@ void interface_up(struct event *thread) return; } + /* + * RFC 3623 - Section 5 ("Unplanned Outages"): + * "The grace-LSAs are encapsulated in Link State Update Packets + * and sent out to all interfaces, even though the restarted + * router has no adjacencies and no knowledge of previous + * adjacencies". + */ + if (oi->area->ospf6->gr_info.restart_in_progress && + oi->area->ospf6->gr_info.reason == OSPF6_GR_UNKNOWN_RESTART) + ospf6_gr_unplanned_start_interface(oi); + #ifdef __FreeBSD__ /* * There's a delay in FreeBSD between issuing a command to leave a @@ -1180,6 +1196,9 @@ static int ospf6_interface_show(struct vty *vty, struct interface *ifp, json_arr, json_object_new_string(lsa->name)); json_object_object_add(json_obj, "pendingLsaLsAck", json_arr); + if (oi->gr.hello_delay.interval != 0) + json_object_int_add(json_obj, "grHelloDelaySecs", + oi->gr.hello_delay.interval); } else { timerclear(&res); if (event_is_scheduled(oi->thread_send_lsupdate)) @@ -1205,6 +1224,10 @@ static int ospf6_interface_show(struct vty *vty, struct interface *ifp, : "off")); for (ALL_LSDB(oi->lsack_list, lsa, lsanext)) vty_out(vty, " %s\n", lsa->name); + + if (oi->gr.hello_delay.interval != 0) + vty_out(vty, " Graceful Restart hello delay: %us\n", + oi->gr.hello_delay.interval); } /* BFD specific. */ @@ -2157,6 +2180,50 @@ ALIAS (ipv6_ospf6_deadinterval, "Interval time after which a neighbor is declared down\n" SECONDS_STR) +DEFPY(ipv6_ospf6_gr_hdelay, ipv6_ospf6_gr_hdelay_cmd, + "ipv6 ospf6 graceful-restart hello-delay (1-1800)", + IP6_STR + OSPF6_STR + "Graceful Restart parameters\n" + "Delay the sending of the first hello packets.\n" + "Delay in seconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct ospf6_interface *oi; + + oi = ifp->info; + if (oi == NULL) + oi = ospf6_interface_create(ifp); + + /* Note: new or updated value won't affect ongoing graceful restart. */ + oi->gr.hello_delay.interval = hello_delay; + + return CMD_SUCCESS; +} + +DEFPY(no_ipv6_ospf6_gr_hdelay, no_ipv6_ospf6_gr_hdelay_cmd, + "no ipv6 ospf6 graceful-restart hello-delay [(1-1800)]", + NO_STR + IP6_STR + OSPF6_STR + "Graceful Restart parameters\n" + "Delay the sending of the first hello packets.\n" + "Delay in seconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct ospf6_interface *oi; + + oi = ifp->info; + if (oi == NULL) + oi = ospf6_interface_create(ifp); + + oi->gr.hello_delay.interval = OSPF_HELLO_DELAY_DEFAULT; + oi->gr.hello_delay.elapsed_seconds = 0; + EVENT_OFF(oi->gr.hello_delay.t_grace_send); + + return CMD_SUCCESS; +} + /* interface variable set command */ DEFUN (ipv6_ospf6_transmitdelay, ipv6_ospf6_transmitdelay_cmd, @@ -2624,6 +2691,11 @@ static int config_write_ospf6_interface(struct vty *vty, struct vrf *vrf) else if (oi->type_cfg && oi->type == OSPF_IFTYPE_BROADCAST) vty_out(vty, " ipv6 ospf6 network broadcast\n"); + if (oi->gr.hello_delay.interval != OSPF_HELLO_DELAY_DEFAULT) + vty_out(vty, + " ipv6 ospf6 graceful-restart hello-delay %u\n", + oi->gr.hello_delay.interval); + ospf6_bfd_write_config(vty, oi); ospf6_auth_write_config(vty, &oi->at_data); @@ -2722,12 +2794,14 @@ void ospf6_interface_init(void) install_element(INTERFACE_NODE, &ipv6_ospf6_deadinterval_cmd); install_element(INTERFACE_NODE, &ipv6_ospf6_hellointerval_cmd); + install_element(INTERFACE_NODE, &ipv6_ospf6_gr_hdelay_cmd); install_element(INTERFACE_NODE, &ipv6_ospf6_priority_cmd); install_element(INTERFACE_NODE, &ipv6_ospf6_retransmitinterval_cmd); install_element(INTERFACE_NODE, &ipv6_ospf6_transmitdelay_cmd); install_element(INTERFACE_NODE, &ipv6_ospf6_instance_cmd); install_element(INTERFACE_NODE, &no_ipv6_ospf6_deadinterval_cmd); install_element(INTERFACE_NODE, &no_ipv6_ospf6_hellointerval_cmd); + install_element(INTERFACE_NODE, &no_ipv6_ospf6_gr_hdelay_cmd); install_element(INTERFACE_NODE, &no_ipv6_ospf6_priority_cmd); install_element(INTERFACE_NODE, &no_ipv6_ospf6_retransmitinterval_cmd); install_element(INTERFACE_NODE, &no_ipv6_ospf6_transmitdelay_cmd); diff --git a/ospf6d/ospf6_interface.h b/ospf6d/ospf6_interface.h index ae0744b25d..5942df0ab5 100644 --- a/ospf6d/ospf6_interface.h +++ b/ospf6d/ospf6_interface.h @@ -74,6 +74,15 @@ struct ospf6_interface { uint16_t dead_interval; uint32_t rxmt_interval; + /* Graceful-Restart data. */ + struct { + struct { + uint16_t interval; + uint16_t elapsed_seconds; + struct event *t_grace_send; + } hello_delay; + } gr; + uint32_t state_change; /* Cost */ diff --git a/ospf6d/ospf6_message.c b/ospf6d/ospf6_message.c index 14b02dac79..032988a91f 100644 --- a/ospf6d/ospf6_message.c +++ b/ospf6d/ospf6_message.c @@ -2244,6 +2244,10 @@ void ospf6_hello_send(struct event *thread) oi = (struct ospf6_interface *)EVENT_ARG(thread); + /* Check if the GR hello-delay is active. */ + if (oi->gr.hello_delay.t_grace_send) + return; + if (oi->state <= OSPF6_INTERFACE_DOWN) { if (IS_OSPF6_DEBUG_MESSAGE(OSPF6_MESSAGE_TYPE_HELLO, SEND_HDR)) zlog_debug("Unable to send Hello on down interface %s", diff --git a/ospf6d/ospf6_spf.c b/ospf6d/ospf6_spf.c index 0990b14307..e39ae504a2 100644 --- a/ospf6d/ospf6_spf.c +++ b/ospf6d/ospf6_spf.c @@ -1262,6 +1262,7 @@ static void ospf6_ase_calculate_timer(struct event *t) * no longer valid. */ ospf6_zebra_gr_disable(ospf6); + ospf6_zebra_gr_enable(ospf6, ospf6->gr_info.grace_period); ospf6->gr_info.finishing_restart = false; } } diff --git a/ospf6d/ospf6_top.c b/ospf6d/ospf6_top.c index c2aa3abeed..01c962194c 100644 --- a/ospf6d/ospf6_top.c +++ b/ospf6d/ospf6_top.c @@ -455,6 +455,13 @@ struct ospf6 *ospf6_instance_create(const char *name) if (ospf6->router_id == 0) ospf6_router_id_update(ospf6, true); ospf6_add(ospf6); + + /* + * Read from non-volatile memory whether this instance is performing a + * graceful restart or not. + */ + ospf6_gr_nvm_read(ospf6); + if (ospf6->vrf_id != VRF_UNKNOWN) { vrf = vrf_lookup_by_id(ospf6->vrf_id); FOR_ALL_INTERFACES (vrf, ifp) { @@ -465,12 +472,6 @@ struct ospf6 *ospf6_instance_create(const char *name) if (ospf6->fd < 0) return ospf6; - /* - * Read from non-volatile memory whether this instance is performing a - * graceful restart or not. - */ - ospf6_gr_nvm_read(ospf6); - event_add_read(master, ospf6_receive, ospf6, ospf6->fd, &ospf6->t_ospf6_receive); @@ -490,6 +491,7 @@ void ospf6_delete(struct ospf6 *o) ospf6_gr_helper_deinit(o); if (!o->gr_info.prepare_in_progress) ospf6_flush_self_originated_lsas_now(o); + XFREE(MTYPE_TMP, o->gr_info.exit_reason); ospf6_disable(o); ospf6_del(o); @@ -693,6 +695,9 @@ DEFUN(no_router_ospf6, no_router_ospf6_cmd, "no router ospf6 [vrf NAME]", if (ospf6 == NULL) vty_out(vty, "OSPFv3 is not configured\n"); else { + if (ospf6->gr_info.restart_support) + ospf6_gr_nvm_delete(ospf6); + ospf6_delete(ospf6); ospf6 = NULL; } diff --git a/ospf6d/ospf6_top.h b/ospf6d/ospf6_top.h index 8fdd291122..a38dad8fce 100644 --- a/ospf6d/ospf6_top.h +++ b/ospf6d/ospf6_top.h @@ -51,6 +51,8 @@ struct ospf6_gr_info { bool prepare_in_progress; bool finishing_restart; uint32_t grace_period; + int reason; + char *exit_reason; struct event *t_grace_period; }; diff --git a/ospf6d/ospf6_zebra.c b/ospf6d/ospf6_zebra.c index 6b3d4955da..0f631c4d01 100644 --- a/ospf6d/ospf6_zebra.c +++ b/ospf6d/ospf6_zebra.c @@ -239,12 +239,18 @@ static int ospf6_zebra_gr_update(struct ospf6 *ospf6, int command, int ospf6_zebra_gr_enable(struct ospf6 *ospf6, uint32_t stale_time) { + if (IS_DEBUG_OSPF6_GR) + zlog_debug("Zebra enable GR [stale time %u]", stale_time); + return ospf6_zebra_gr_update(ospf6, ZEBRA_CLIENT_GR_CAPABILITIES, stale_time); } int ospf6_zebra_gr_disable(struct ospf6 *ospf6) { + if (IS_DEBUG_OSPF6_GR) + zlog_debug("Zebra disable GR"); + return ospf6_zebra_gr_update(ospf6, ZEBRA_CLIENT_GR_DISABLE, 0); } @@ -735,10 +741,20 @@ uint8_t ospf6_distance_apply(struct prefix_ipv6 *p, struct ospf6_route * or, static void ospf6_zebra_connected(struct zclient *zclient) { + struct ospf6 *ospf6; + struct listnode *node; + /* Send the client registration */ bfd_client_sendmsg(zclient, ZEBRA_BFD_CLIENT_REGISTER, VRF_DEFAULT); zclient_send_reg_requests(zclient, VRF_DEFAULT); + + /* Activate graceful restart if configured. */ + for (ALL_LIST_ELEMENTS_RO(om6->ospf6, node, ospf6)) { + if (!ospf6->gr_info.restart_support) + continue; + (void)ospf6_zebra_gr_enable(ospf6, ospf6->gr_info.grace_period); + } } static zclient_handler *const ospf6_handlers[] = { diff --git a/ospf6d/subdir.am b/ospf6d/subdir.am index 3dff03956c..c34db3012d 100644 --- a/ospf6d/subdir.am +++ b/ospf6d/subdir.am @@ -78,6 +78,7 @@ clippy_scan += \ ospf6d/ospf6_top.c \ ospf6d/ospf6_area.c \ ospf6d/ospf6_asbr.c \ + ospf6d/ospf6_interface.c \ ospf6d/ospf6_lsa.c \ ospf6d/ospf6_gr_helper.c \ ospf6d/ospf6_gr.c \ diff --git a/ospfd/ospf_ase.c b/ospfd/ospf_ase.c index cc2110d433..610b5fc08e 100644 --- a/ospfd/ospf_ase.c +++ b/ospfd/ospf_ase.c @@ -615,6 +615,7 @@ static void ospf_ase_calculate_timer(struct event *t) */ if (ospf->gr_info.finishing_restart) { ospf_zebra_gr_disable(ospf); + ospf_zebra_gr_enable(ospf, ospf->gr_info.grace_period); ospf->gr_info.finishing_restart = false; } } diff --git a/ospfd/ospf_gr.c b/ospfd/ospf_gr.c index f60f0e863e..6999b3f623 100644 --- a/ospfd/ospf_gr.c +++ b/ospfd/ospf_gr.c @@ -33,7 +33,7 @@ #include "ospfd/ospf_dump.h" #include "ospfd/ospf_gr_clippy.c" -static void ospf_gr_nvm_delete(struct ospf *ospf); +static void ospf_gr_grace_period_expired(struct event *thread); /* Lookup self-originated Grace-LSA in the LSDB. */ static struct ospf_lsa *ospf_gr_lsa_lookup(struct ospf *ospf, @@ -53,7 +53,9 @@ static struct ospf_lsa *ospf_gr_lsa_lookup(struct ospf *ospf, /* Fill in fields of the Grace-LSA that is being originated. */ static void ospf_gr_lsa_body_set(struct ospf_gr_info *gr_info, - struct ospf_interface *oi, struct stream *s) + struct ospf_interface *oi, + enum ospf_gr_restart_reason reason, + struct stream *s) { struct grace_tlv_graceperiod tlv_period = {}; struct grace_tlv_restart_reason tlv_reason = {}; @@ -68,10 +70,7 @@ static void ospf_gr_lsa_body_set(struct ospf_gr_info *gr_info, /* Put restart reason. */ tlv_reason.header.type = htons(RESTART_REASON_TYPE); tlv_reason.header.length = htons(RESTART_REASON_LENGTH); - if (gr_info->restart_support) - tlv_reason.reason = OSPF_GR_SW_RESTART; - else - tlv_reason.reason = OSPF_GR_UNKNOWN_RESTART; + tlv_reason.reason = reason; stream_put(s, &tlv_reason, sizeof(tlv_reason)); /* Put IP address. */ @@ -85,7 +84,8 @@ static void ospf_gr_lsa_body_set(struct ospf_gr_info *gr_info, } /* Generate Grace-LSA for a given interface. */ -static struct ospf_lsa *ospf_gr_lsa_new(struct ospf_interface *oi) +static struct ospf_lsa *ospf_gr_lsa_new(struct ospf_interface *oi, + enum ospf_gr_restart_reason reason) { struct stream *s; struct lsa_header *lsah; @@ -112,7 +112,7 @@ static struct ospf_lsa *ospf_gr_lsa_new(struct ospf_interface *oi) lsa_header_set(s, options, lsa_type, lsa_id, oi->ospf->router_id); /* Set opaque-LSA body fields. */ - ospf_gr_lsa_body_set(&oi->ospf->gr_info, oi, s); + ospf_gr_lsa_body_set(&oi->ospf->gr_info, oi, reason, s); /* Set length. */ length = stream_get_endp(s); @@ -135,15 +135,20 @@ static struct ospf_lsa *ospf_gr_lsa_new(struct ospf_interface *oi) } /* Originate and install Grace-LSA for a given interface. */ -static void ospf_gr_lsa_originate(struct ospf_interface *oi, bool maxage) +static void ospf_gr_lsa_originate(struct ospf_interface *oi, + enum ospf_gr_restart_reason reason, + bool maxage) { struct ospf_lsa *lsa, *old; - if (ospf_interface_neighbor_count(oi) == 0) + /* Skip originating a Grace-LSA when not necessary. */ + if (!if_is_operative(oi->ifp) || if_is_loopback(oi->ifp) || + (reason != OSPF_GR_UNKNOWN_RESTART && + ospf_interface_neighbor_count(oi) == 0)) return; /* Create new Grace-LSA. */ - lsa = ospf_gr_lsa_new(oi); + lsa = ospf_gr_lsa_new(oi, reason); if (!lsa) { zlog_warn("%s: ospf_gr_lsa_new() failed", __func__); return; @@ -157,18 +162,36 @@ static void ospf_gr_lsa_originate(struct ospf_interface *oi, bool maxage) if (old) lsa->data->ls_seqnum = lsa_seqnum_increment(old); - /* Install this LSA into LSDB. */ - if (ospf_lsa_install(oi->ospf, oi, lsa) == NULL) { - zlog_warn("%s: ospf_lsa_install() failed", __func__); - ospf_lsa_unlock(&lsa); - return; + if (!maxage && reason == OSPF_GR_UNKNOWN_RESTART) { + struct list *update; + struct in_addr addr; + + /* + * When performing an unplanned restart, send a handcrafted + * Grace-LSA since the interface isn't fully initialized yet. + */ + ospf_lsa_checksum(lsa->data); + ospf_lsa_lock(lsa); + update = list_new(); + listnode_add(update, lsa); + addr.s_addr = htonl(OSPF_ALLSPFROUTERS); + ospf_ls_upd_queue_send(oi, update, addr, true); + list_delete(&update); + ospf_lsa_discard(lsa); + } else { + /* Install this LSA into LSDB. */ + if (ospf_lsa_install(oi->ospf, oi, lsa) == NULL) { + zlog_warn("%s: ospf_lsa_install() failed", __func__); + ospf_lsa_unlock(&lsa); + return; + } + + /* Flood the LSA through out the interface */ + ospf_flood_through_interface(oi, NULL, lsa); } /* Update new LSA origination count. */ oi->ospf->lsa_originate_count++; - - /* Flood the LSA through out the interface */ - ospf_flood_through_interface(oi, NULL, lsa); } /* Flush all self-originated Grace-LSAs. */ @@ -181,13 +204,14 @@ static void ospf_gr_flush_grace_lsas(struct ospf *ospf) struct ospf_interface *oi; struct listnode *inode; - if (IS_DEBUG_OSPF_GR) - zlog_debug( - "GR: flushing self-originated Grace-LSAs [area %pI4]", - &area->area_id); + for (ALL_LIST_ELEMENTS_RO(area->oiflist, inode, oi)) { + if (IS_DEBUG_OSPF_GR) + zlog_debug( + "GR: flushing self-originated Grace-LSA [area %pI4] [interface %s]", + &area->area_id, oi->ifp->name); - for (ALL_LIST_ELEMENTS_RO(area->oiflist, inode, oi)) - ospf_gr_lsa_originate(oi, true); + ospf_gr_lsa_originate(oi, ospf->gr_info.reason, true); + } } } @@ -203,9 +227,6 @@ static void ospf_gr_restart_exit(struct ospf *ospf, const char *reason) ospf->gr_info.restart_in_progress = false; EVENT_OFF(ospf->gr_info.t_grace_period); - /* Record in non-volatile memory that the restart is complete. */ - ospf_gr_nvm_delete(ospf); - for (ALL_LIST_ELEMENTS_RO(ospf->areas, onode, area)) { struct ospf_interface *oi; @@ -216,13 +237,22 @@ static void ospf_gr_restart_exit(struct ospf *ospf, const char *reason) */ ospf_router_lsa_update_area(area); - /* - * 2) The router should reoriginate network-LSAs on all segments - * where it is the Designated Router. - */ - for (ALL_LIST_ELEMENTS_RO(area->oiflist, anode, oi)) + for (ALL_LIST_ELEMENTS_RO(area->oiflist, anode, oi)) { + /* Disable hello delay. */ + if (oi->gr.hello_delay.t_grace_send) { + oi->gr.hello_delay.elapsed_seconds = 0; + EVENT_OFF(oi->gr.hello_delay.t_grace_send); + OSPF_ISM_TIMER_MSEC_ON(oi->t_hello, + ospf_hello_timer, 1); + } + + /* + * 2) The router should reoriginate network-LSAs on all + * segments where it is the Designated Router. + */ if (oi->state == ISM_DR) ospf_network_lsa_update(oi); + } } /* @@ -242,12 +272,34 @@ static void ospf_gr_restart_exit(struct ospf *ospf, const char *reason) * should be removed. */ ospf->gr_info.finishing_restart = true; + XFREE(MTYPE_TMP, ospf->gr_info.exit_reason); + ospf->gr_info.exit_reason = XSTRDUP(MTYPE_TMP, reason); ospf_spf_calculate_schedule(ospf, SPF_FLAG_GR_FINISH); /* 6) Any grace-LSAs that the router originated should be flushed. */ ospf_gr_flush_grace_lsas(ospf); } +/* Enter the Graceful Restart mode. */ +void ospf_gr_restart_enter(struct ospf *ospf, + enum ospf_gr_restart_reason reason, int timestamp) +{ + unsigned long remaining_time; + + ospf->gr_info.restart_in_progress = true; + ospf->gr_info.reason = reason; + + /* Schedule grace period timeout. */ + remaining_time = timestamp - time(NULL); + if (IS_DEBUG_OSPF_GR) + zlog_debug( + "GR: remaining time until grace period expires: %lu(s)", + remaining_time); + + event_add_timer(master, ospf_gr_grace_period_expired, ospf, + remaining_time, &ospf->gr_info.t_grace_period); +} + /* Check if a Router-LSA contains a given link. */ static bool ospf_router_lsa_contains_adj(struct ospf_lsa *lsa, struct in_addr *id) @@ -518,11 +570,26 @@ static char *ospf_gr_nvm_filepath(struct ospf *ospf) return filepath; } +/* Send extra Grace-LSA out the interface (unplanned outages only). */ +void ospf_gr_iface_send_grace_lsa(struct event *thread) +{ + struct ospf_interface *oi = EVENT_ARG(thread); + struct ospf_if_params *params = IF_DEF_PARAMS(oi->ifp); + + ospf_gr_lsa_originate(oi, oi->ospf->gr_info.reason, false); + + if (++oi->gr.hello_delay.elapsed_seconds < params->v_gr_hello_delay) + event_add_timer(master, ospf_gr_iface_send_grace_lsa, oi, 1, + &oi->gr.hello_delay.t_grace_send); + else + OSPF_ISM_TIMER_MSEC_ON(oi->t_hello, ospf_hello_timer, 1); +} + /* * Record in non-volatile memory that the given OSPF instance is attempting to * perform a graceful restart. */ -static void ospf_gr_nvm_update(struct ospf *ospf) +static void ospf_gr_nvm_update(struct ospf *ospf, bool prepare) { char *filepath; const char *inst_name; @@ -550,16 +617,18 @@ static void ospf_gr_nvm_update(struct ospf *ospf) json_instance); } + json_object_int_add(json_instance, "gracePeriod", + ospf->gr_info.grace_period); + /* * Record not only the grace period, but also a UNIX timestamp * corresponding to the end of that period. That way, once ospfd is * restarted, it will be possible to take into account the time that * passed while ospfd wasn't running. */ - json_object_int_add(json_instance, "gracePeriod", - ospf->gr_info.grace_period); - json_object_int_add(json_instance, "timestamp", - time(NULL) + ospf->gr_info.grace_period); + if (prepare) + json_object_int_add(json_instance, "timestamp", + time(NULL) + ospf->gr_info.grace_period); json_object_to_file_ext(filepath, json, JSON_C_TO_STRING_PRETTY); json_object_free(json); @@ -569,7 +638,7 @@ static void ospf_gr_nvm_update(struct ospf *ospf) * Delete GR status information about the given OSPF instance from non-volatile * memory. */ -static void ospf_gr_nvm_delete(struct ospf *ospf) +void ospf_gr_nvm_delete(struct ospf *ospf) { char *filepath; const char *inst_name; @@ -607,6 +676,7 @@ void ospf_gr_nvm_read(struct ospf *ospf) json_object *json_instances; json_object *json_instance; json_object *json_timestamp; + json_object *json_grace_period; time_t timestamp = 0; filepath = ospf_gr_nvm_filepath(ospf); @@ -629,29 +699,33 @@ void ospf_gr_nvm_read(struct ospf *ospf) json_instance); } + json_object_object_get_ex(json_instance, "gracePeriod", + &json_grace_period); json_object_object_get_ex(json_instance, "timestamp", &json_timestamp); + if (json_timestamp) { time_t now; - unsigned long remaining_time; - /* Check if the grace period has already expired. */ + /* Planned GR: check if the grace period has already expired. */ now = time(NULL); timestamp = json_object_get_int(json_timestamp); if (now > timestamp) { ospf_gr_restart_exit( ospf, "grace period has expired already"); - } else { - /* Schedule grace period timeout. */ - ospf->gr_info.restart_in_progress = true; - remaining_time = timestamp - time(NULL); - if (IS_DEBUG_OSPF_GR) - zlog_debug( - "GR: remaining time until grace period expires: %lu(s)", - remaining_time); - event_add_timer(master, ospf_gr_grace_period_expired, - ospf, remaining_time, - &ospf->gr_info.t_grace_period); - } + } else + ospf_gr_restart_enter(ospf, OSPF_GR_SW_RESTART, + timestamp); + } else if (json_grace_period) { + uint32_t grace_period; + + /* + * Unplanned GR: the Grace-LSAs will be sent later as soon as + * the interfaces are operational. + */ + grace_period = json_object_get_int(json_grace_period); + ospf->gr_info.grace_period = grace_period; + ospf_gr_restart_enter(ospf, OSPF_GR_UNKNOWN_RESTART, + time(NULL) + ospf->gr_info.grace_period); } json_object_object_del(json_instances, inst_name); @@ -660,6 +734,17 @@ void ospf_gr_nvm_read(struct ospf *ospf) json_object_free(json); } +void ospf_gr_unplanned_start_interface(struct ospf_interface *oi) +{ + /* Send Grace-LSA. */ + ospf_gr_lsa_originate(oi, oi->ospf->gr_info.reason, false); + + /* Start GR hello-delay interval. */ + oi->gr.hello_delay.elapsed_seconds = 0; + event_add_timer(master, ospf_gr_iface_send_grace_lsa, oi, 1, + &oi->gr.hello_delay.t_grace_send); +} + /* Prepare to start a Graceful Restart. */ static void ospf_gr_prepare(void) { @@ -687,20 +772,12 @@ static void ospf_gr_prepare(void) continue; } - /* Freeze OSPF routes in the RIB. */ - if (ospf_zebra_gr_enable(ospf, ospf->gr_info.grace_period)) { - zlog_warn( - "%s: failed to activate graceful restart: not connected to zebra", - __func__); - continue; - } - /* Send a Grace-LSA to all neighbors. */ for (ALL_LIST_ELEMENTS_RO(ospf->oiflist, inode, oi)) - ospf_gr_lsa_originate(oi, false); + ospf_gr_lsa_originate(oi, OSPF_GR_SW_RESTART, false); /* Record end of the grace period in non-volatile memory. */ - ospf_gr_nvm_update(ospf); + ospf_gr_nvm_update(ospf, true); /* * Mark that a Graceful Restart preparation is in progress, to @@ -749,6 +826,12 @@ DEFPY(graceful_restart, graceful_restart_cmd, ospf->gr_info.restart_support = true; ospf->gr_info.grace_period = grace_period; + /* Freeze OSPF routes in the RIB. */ + (void)ospf_zebra_gr_enable(ospf, ospf->gr_info.grace_period); + + /* Record that GR is enabled in non-volatile memory. */ + ospf_gr_nvm_update(ospf, false); + return CMD_SUCCESS; } @@ -771,6 +854,8 @@ DEFPY(no_graceful_restart, no_graceful_restart_cmd, ospf->gr_info.restart_support = false; ospf->gr_info.grace_period = OSPF_DFLT_GRACE_INTERVAL; + ospf_gr_nvm_delete(ospf); + ospf_zebra_gr_disable(ospf); return CMD_SUCCESS; } diff --git a/ospfd/ospf_gr.h b/ospfd/ospf_gr.h index 9760bb1728..750d77381d 100644 --- a/ospfd/ospf_gr.h +++ b/ospfd/ospf_gr.h @@ -166,11 +166,16 @@ extern void ospf_gr_helper_supported_gracetime_set(struct ospf *ospf, uint32_t interval); extern void ospf_gr_helper_set_supported_planned_only_restart(struct ospf *ospf, bool planned_only); - +extern void ospf_gr_iface_send_grace_lsa(struct event *thread); +extern void ospf_gr_restart_enter(struct ospf *ospf, + enum ospf_gr_restart_reason reason, + int timestamp); extern void ospf_gr_check_lsdb_consistency(struct ospf *ospf, struct ospf_area *area); extern void ospf_gr_check_adjs(struct ospf *ospf); extern void ospf_gr_nvm_read(struct ospf *ospf); +extern void ospf_gr_nvm_delete(struct ospf *ospf); +extern void ospf_gr_unplanned_start_interface(struct ospf_interface *oi); extern void ospf_gr_init(void); #endif /* _ZEBRA_OSPF_GR_H */ diff --git a/ospfd/ospf_interface.c b/ospfd/ospf_interface.c index f18aa399a4..8da982aed8 100644 --- a/ospfd/ospf_interface.c +++ b/ospfd/ospf_interface.c @@ -530,6 +530,7 @@ static struct ospf_if_params *ospf_new_if_params(void) UNSET_IF_PARAM(oip, passive_interface); UNSET_IF_PARAM(oip, v_hello); UNSET_IF_PARAM(oip, fast_hello); + UNSET_IF_PARAM(oip, v_gr_hello_delay); UNSET_IF_PARAM(oip, v_wait); UNSET_IF_PARAM(oip, priority); UNSET_IF_PARAM(oip, type); @@ -679,6 +680,9 @@ int ospf_if_new_hook(struct interface *ifp) SET_IF_PARAM(IF_DEF_PARAMS(ifp), fast_hello); IF_DEF_PARAMS(ifp)->fast_hello = OSPF_FAST_HELLO_DEFAULT; + SET_IF_PARAM(IF_DEF_PARAMS(ifp), v_gr_hello_delay); + IF_DEF_PARAMS(ifp)->v_gr_hello_delay = OSPF_HELLO_DELAY_DEFAULT; + SET_IF_PARAM(IF_DEF_PARAMS(ifp), v_wait); IF_DEF_PARAMS(ifp)->v_wait = OSPF_ROUTER_DEAD_INTERVAL_DEFAULT; diff --git a/ospfd/ospf_interface.h b/ospfd/ospf_interface.h index 649df437a4..24768b9ab4 100644 --- a/ospfd/ospf_interface.h +++ b/ospfd/ospf_interface.h @@ -72,6 +72,9 @@ struct ospf_if_params { DECLARE_IF_PARAM(uint32_t, v_wait); /* Router Dead Interval */ bool is_v_wait_set; /* Check for Dead Interval set */ + /* GR Hello Delay Interval */ + DECLARE_IF_PARAM(uint16_t, v_gr_hello_delay); + /* MTU mismatch check (see RFC2328, chap 10.6) */ DECLARE_IF_PARAM(uint8_t, mtu_ignore); @@ -214,6 +217,14 @@ struct ospf_interface { /* List of configured NBMA neighbor. */ struct list *nbr_nbma; + /* Graceful-Restart data. */ + struct { + struct { + uint16_t elapsed_seconds; + struct event *t_grace_send; + } hello_delay; + } gr; + /* self-originated LSAs. */ struct ospf_lsa *network_lsa_self; /* network-LSA. */ struct list *opaque_lsa_self; /* Type-9 Opaque-LSAs */ diff --git a/ospfd/ospf_ism.c b/ospfd/ospf_ism.c index 9f795ea918..2516fa75db 100644 --- a/ospfd/ospf_ism.c +++ b/ospfd/ospf_ism.c @@ -244,6 +244,10 @@ void ospf_hello_timer(struct event *thread) oi = EVENT_ARG(thread); oi->t_hello = NULL; + /* Check if the GR hello-delay is active. */ + if (oi->gr.hello_delay.t_grace_send) + return; + if (IS_DEBUG_OSPF(ism, ISM_TIMERS)) zlog_debug("ISM[%s]: Timer (Hello timer expire)", IF_NAME(oi)); @@ -282,6 +286,7 @@ static void ism_timer_set(struct ospf_interface *oi) EVENT_OFF(oi->t_hello); EVENT_OFF(oi->t_wait); EVENT_OFF(oi->t_ls_ack); + EVENT_OFF(oi->gr.hello_delay.t_grace_send); break; case ISM_Loopback: /* In this state, the interface may be looped back and will be @@ -289,6 +294,7 @@ static void ism_timer_set(struct ospf_interface *oi) EVENT_OFF(oi->t_hello); EVENT_OFF(oi->t_wait); EVENT_OFF(oi->t_ls_ack); + EVENT_OFF(oi->gr.hello_delay.t_grace_send); break; case ISM_Waiting: /* The router is trying to determine the identity of DRouter and diff --git a/ospfd/ospf_packet.c b/ospfd/ospf_packet.c index fe94c34e4e..d010b8b6e6 100644 --- a/ospfd/ospf_packet.c +++ b/ospfd/ospf_packet.c @@ -4040,9 +4040,8 @@ static struct ospf_packet *ospf_ls_upd_packet_new(struct list *update, return ospf_packet_new(size - sizeof(struct ip)); } -static void ospf_ls_upd_queue_send(struct ospf_interface *oi, - struct list *update, struct in_addr addr, - int send_lsupd_now) +void ospf_ls_upd_queue_send(struct ospf_interface *oi, struct list *update, + struct in_addr addr, int send_lsupd_now) { struct ospf_packet *op; uint16_t length = OSPF_HEADER_SIZE; diff --git a/ospfd/ospf_packet.h b/ospfd/ospf_packet.h index 4003e2add6..234738979e 100644 --- a/ospfd/ospf_packet.h +++ b/ospfd/ospf_packet.h @@ -132,6 +132,9 @@ extern void ospf_ls_req_send(struct ospf_neighbor *); extern void ospf_ls_upd_send_lsa(struct ospf_neighbor *, struct ospf_lsa *, int); extern void ospf_ls_upd_send(struct ospf_neighbor *, struct list *, int, int); +extern void ospf_ls_upd_queue_send(struct ospf_interface *oi, + struct list *update, struct in_addr addr, + int send_lsupd_now); extern void ospf_ls_ack_send(struct ospf_neighbor *, struct ospf_lsa *); extern void ospf_ls_ack_send_delayed(struct ospf_interface *); extern void ospf_ls_retransmit(struct ospf_interface *, struct ospf_lsa *); diff --git a/ospfd/ospf_vty.c b/ospfd/ospf_vty.c index 1b4d26ef3d..f92be3fca5 100644 --- a/ospfd/ospf_vty.c +++ b/ospfd/ospf_vty.c @@ -232,9 +232,12 @@ DEFUN (no_router_ospf, return CMD_NOT_MY_INSTANCE; ospf = ospf_lookup(instance, vrf_name); - if (ospf) + if (ospf) { + if (ospf->gr_info.restart_support) + ospf_gr_nvm_delete(ospf); + ospf_finish(ospf); - else + } else ret = CMD_WARNING_CONFIG_FAILED; return ret; @@ -3617,6 +3620,7 @@ static void show_ip_ospf_interface_sub(struct vty *vty, struct ospf *ospf, struct ospf_neighbor *nbr; struct route_node *rn; uint32_t bandwidth = ifp->bandwidth ? ifp->bandwidth : ifp->speed; + struct ospf_if_params *params; /* Is interface up? */ if (use_json) { @@ -3936,6 +3940,20 @@ static void show_ip_ospf_interface_sub(struct vty *vty, struct ospf *ospf, ospf_nbr_count(oi, 0), ospf_nbr_count(oi, NSM_Full)); + + params = IF_DEF_PARAMS(ifp); + if (params && + OSPF_IF_PARAM_CONFIGURED(params, v_gr_hello_delay)) { + if (use_json) { + json_object_int_add(json_interface_sub, + "grHelloDelaySecs", + params->v_gr_hello_delay); + } else + vty_out(vty, + " Graceful Restart hello delay: %us\n", + params->v_gr_hello_delay); + } + ospf_interface_bfd_show(vty, ifp, json_interface_sub); /* OSPF Authentication information */ @@ -8639,6 +8657,59 @@ DEFUN_HIDDEN (no_ospf_retransmit_interval, return no_ip_ospf_retransmit_interval(self, vty, argc, argv); } +DEFPY (ip_ospf_gr_hdelay, + ip_ospf_gr_hdelay_cmd, + "ip ospf graceful-restart hello-delay (1-1800)", + IP_STR + "OSPF interface commands\n" + "Graceful Restart parameters\n" + "Delay the sending of the first hello packets.\n" + "Delay in seconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct ospf_if_params *params; + + params = IF_DEF_PARAMS(ifp); + + /* Note: new or updated value won't affect ongoing graceful restart. */ + SET_IF_PARAM(params, v_gr_hello_delay); + params->v_gr_hello_delay = hello_delay; + + return CMD_SUCCESS; +} + +DEFPY (no_ip_ospf_gr_hdelay, + no_ip_ospf_gr_hdelay_cmd, + "no ip ospf graceful-restart hello-delay [(1-1800)]", + NO_STR + IP_STR + "OSPF interface commands\n" + "Graceful Restart parameters\n" + "Delay the sending of the first hello packets.\n" + "Delay in seconds\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct ospf_if_params *params; + struct route_node *rn; + + params = IF_DEF_PARAMS(ifp); + UNSET_IF_PARAM(params, v_gr_hello_delay); + params->v_gr_hello_delay = OSPF_HELLO_DELAY_DEFAULT; + + for (rn = route_top(IF_OIFS(ifp)); rn; rn = route_next(rn)) { + struct ospf_interface *oi; + + oi = rn->info; + if (!oi) + continue; + + oi->gr.hello_delay.elapsed_seconds = 0; + EVENT_OFF(oi->gr.hello_delay.t_grace_send); + } + + return CMD_SUCCESS; +} + DEFUN (ip_ospf_transmit_delay, ip_ospf_transmit_delay_addr_cmd, "ip ospf transmit-delay (1-65535) [A.B.C.D]", @@ -11831,6 +11902,15 @@ static int config_write_interface_one(struct vty *vty, struct vrf *vrf) vty_out(vty, "\n"); } + /* Hello Graceful-Restart Delay print. */ + if (OSPF_IF_PARAM_CONFIGURED(params, + v_gr_hello_delay) && + params->v_gr_hello_delay != + OSPF_HELLO_DELAY_DEFAULT) + vty_out(vty, + " ip ospf graceful-restart hello-delay %u\n", + params->v_gr_hello_delay); + /* Router Priority print. */ if (OSPF_IF_PARAM_CONFIGURED(params, priority) && params->priority diff --git a/ospfd/ospf_zebra.c b/ospfd/ospf_zebra.c index 0b770a8364..27d74cd4fc 100644 --- a/ospfd/ospf_zebra.c +++ b/ospfd/ospf_zebra.c @@ -1252,12 +1252,18 @@ static int ospf_zebra_gr_update(struct ospf *ospf, int command, int ospf_zebra_gr_enable(struct ospf *ospf, uint32_t stale_time) { + if (IS_DEBUG_OSPF_GR) + zlog_debug("Zebra enable GR [stale time %u]", stale_time); + return ospf_zebra_gr_update(ospf, ZEBRA_CLIENT_GR_CAPABILITIES, stale_time); } int ospf_zebra_gr_disable(struct ospf *ospf) { + if (IS_DEBUG_OSPF_GR) + zlog_debug("Zebra disable GR"); + return ospf_zebra_gr_update(ospf, ZEBRA_CLIENT_GR_DISABLE, 0); } @@ -2120,10 +2126,20 @@ int ospf_zebra_label_manager_connect(void) static void ospf_zebra_connected(struct zclient *zclient) { + struct ospf *ospf; + struct listnode *node; + /* Send the client registration */ bfd_client_sendmsg(zclient, ZEBRA_BFD_CLIENT_REGISTER, VRF_DEFAULT); zclient_send_reg_requests(zclient, VRF_DEFAULT); + + /* Activate graceful restart if configured. */ + for (ALL_LIST_ELEMENTS_RO(om->ospf, node, ospf)) { + if (!ospf->gr_info.restart_support) + continue; + (void)ospf_zebra_gr_enable(ospf, ospf->gr_info.grace_period); + } } /* diff --git a/ospfd/ospfd.c b/ospfd/ospfd.c index 7e83714c0a..eae1f301a2 100644 --- a/ospfd/ospfd.c +++ b/ospfd/ospfd.c @@ -720,6 +720,7 @@ static void ospf_finish_final(struct ospf *ospf) if (!ospf->gr_info.prepare_in_progress) ospf_flush_self_originated_lsas_now(ospf); + XFREE(MTYPE_TMP, ospf->gr_info.exit_reason); /* Unregister redistribution */ for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { @@ -1131,6 +1132,17 @@ struct ospf_interface *add_ospf_interface(struct connected *co, && if_is_operative(co->ifp)) ospf_if_up(oi); + /* + * RFC 3623 - Section 5 ("Unplanned Outages"): + * "The grace-LSAs are encapsulated in Link State Update Packets + * and sent out to all interfaces, even though the restarted + * router has no adjacencies and no knowledge of previous + * adjacencies". + */ + if (oi->ospf->gr_info.restart_in_progress && + oi->ospf->gr_info.reason == OSPF_GR_UNKNOWN_RESTART) + ospf_gr_unplanned_start_interface(oi); + return oi; } diff --git a/ospfd/ospfd.h b/ospfd/ospfd.h index 1f8d1a32e6..36936b16f4 100644 --- a/ospfd/ospfd.h +++ b/ospfd/ospfd.h @@ -148,6 +148,8 @@ struct ospf_gr_info { bool prepare_in_progress; bool finishing_restart; uint32_t grace_period; + int reason; + char *exit_reason; struct event *t_grace_period; }; diff --git a/tests/topotests/all_protocol_startup/r1/show_ip_ospf_interface.ref b/tests/topotests/all_protocol_startup/r1/show_ip_ospf_interface.ref index cb63da4715..7e28f04e1c 100644 --- a/tests/topotests/all_protocol_startup/r1/show_ip_ospf_interface.ref +++ b/tests/topotests/all_protocol_startup/r1/show_ip_ospf_interface.ref @@ -10,6 +10,7 @@ r1-eth0 is up Timer intervals configured, Hello 1s, Dead 5s, Wait 5s, Retransmit 5 Hello due in XX.XXXs Neighbor Count is 0, Adjacent neighbor count is 0 + Graceful Restart hello delay: 10s r1-eth3 is up ifindex X, MTU 1500 bytes, BW XX Mbit <UP,BROADCAST,RUNNING,MULTICAST> Internet Address 192.168.3.1/26, Broadcast 192.168.3.63, Area 0.0.0.0 @@ -22,3 +23,4 @@ r1-eth3 is up Timer intervals configured, Hello 1s, Dead 5s, Wait 5s, Retransmit 5 Hello due in XX.XXXs Neighbor Count is 0, Adjacent neighbor count is 0 + Graceful Restart hello delay: 10s diff --git a/tests/topotests/all_protocol_startup/test_all_protocol_startup.py b/tests/topotests/all_protocol_startup/test_all_protocol_startup.py index 92bb99c8f2..4b7c4de806 100644 --- a/tests/topotests/all_protocol_startup/test_all_protocol_startup.py +++ b/tests/topotests/all_protocol_startup/test_all_protocol_startup.py @@ -198,6 +198,12 @@ def test_error_messages_daemons(): if fatal_error != "": pytest.skip(fatal_error) + if os.environ.get("TOPOTESTS_CHECK_STDERR") is None: + print( + "SKIPPED final check on StdErr output: Disabled (TOPOTESTS_CHECK_STDERR undefined)\n" + ) + pytest.skip("Skipping test for Stderr output") + print("\n\n** Check for error messages in daemons") print("******************************************\n") diff --git a/tests/topotests/conftest.py b/tests/topotests/conftest.py index 74e308dbc6..b78a2f1052 100755 --- a/tests/topotests/conftest.py +++ b/tests/topotests/conftest.py @@ -87,6 +87,12 @@ def pytest_addoption(parser): ) parser.addoption( + "--memleaks", + action="store_true", + help="Report memstat results as errors", + ) + + parser.addoption( "--pause", action="store_true", help="Pause after each test", @@ -189,7 +195,7 @@ def pytest_addoption(parser): ) -def check_for_memleaks(): +def check_for_valgrind_memleaks(): assert topotest.g_pytest_config.option.valgrind_memleaks leaks = [] @@ -232,11 +238,47 @@ def check_for_memleaks(): pytest.fail("valgrind memleaks found for daemons: " + " ".join(daemons)) +def check_for_memleaks(): + leaks = [] + tgen = get_topogen() # pylint: disable=redefined-outer-name + latest = [] + existing = [] + if tgen is not None: + logdir = tgen.logdir + if hasattr(tgen, "memstat_existing_files"): + existing = tgen.memstat_existing_files + latest = glob.glob(os.path.join(logdir, "*/*.err")) + + daemons = [] + for vfile in latest: + if vfile in existing: + continue + with open(vfile, encoding="ascii") as vf: + vfcontent = vf.read() + num = vfcontent.count("memstats:") + if num: + existing.append(vfile) # have summary don't check again + emsg = "{} types in {}".format(num, vfile) + leaks.append(emsg) + daemon = re.match(r".*test[a-z_A-Z0-9\+]*/(.*)\.err", vfile).group(1) + daemons.append("{}({})".format(daemon, num)) + + if tgen is not None: + tgen.memstat_existing_files = existing + + if leaks: + logger.error("memleaks found:\n\t%s", "\n\t".join(leaks)) + pytest.fail("memleaks found for daemons: " + " ".join(daemons)) + + @pytest.fixture(autouse=True, scope="module") def module_check_memtest(request): yield if request.config.option.valgrind_memleaks: if get_topogen() is not None: + check_for_valgrind_memleaks() + if request.config.option.memleaks: + if get_topogen() is not None: check_for_memleaks() @@ -264,6 +306,8 @@ def pytest_runtest_call(item: pytest.Item) -> None: # Check for leaks if requested if item.config.option.valgrind_memleaks: + check_for_valgrind_memleaks() + if item.config.option.memleaks: check_for_memleaks() @@ -370,10 +414,22 @@ def pytest_configure(config): if config.option.topology_only and is_xdist: pytest.exit("Cannot use --topology-only with distributed test mode") + pytest.exit("Cannot use --topology-only with distributed test mode") + # Check environment now that we have config if not diagnose_env(rundir): pytest.exit("environment has errors, please read the logs in %s" % rundir) + # slave TOPOTESTS_CHECK_MEMLEAK to memleaks flag + if config.option.memleaks: + if "TOPOTESTS_CHECK_MEMLEAK" not in os.environ: + os.environ["TOPOTESTS_CHECK_MEMLEAK"] = "/dev/null" + else: + if "TOPOTESTS_CHECK_MEMLEAK" in os.environ: + del os.environ["TOPOTESTS_CHECK_MEMLEAK"] + if "TOPOTESTS_CHECK_STDERR" in os.environ: + del os.environ["TOPOTESTS_CHECK_STDERR"] + @pytest.fixture(autouse=True, scope="session") def setup_session_auto(): diff --git a/tests/topotests/lib/topotest.py b/tests/topotests/lib/topotest.py index 967f09ecbd..9c594d3c8e 100644 --- a/tests/topotests/lib/topotest.py +++ b/tests/topotests/lib/topotest.py @@ -1469,21 +1469,22 @@ class Router(Node): if not running: break - if not running: - return "" - - logger.warning( - "%s: sending SIGBUS to: %s", self.name, ", ".join([x[0] for x in running]) - ) - for name, pid in running: - pidfile = "/var/run/{}/{}.pid".format(self.routertype, name) - logger.info("%s: killing %s", self.name, name) - self.cmd("kill -SIGBUS %d" % pid) - self.cmd("rm -- " + pidfile) - - sleep( - 0.5, "%s: waiting for daemons to exit/core after initial SIGBUS" % self.name - ) + if running: + logger.warning( + "%s: sending SIGBUS to: %s", + self.name, + ", ".join([x[0] for x in running]), + ) + for name, pid in running: + pidfile = "/var/run/{}/{}.pid".format(self.routertype, name) + logger.info("%s: killing %s", self.name, name) + self.cmd("kill -SIGBUS %d" % pid) + self.cmd("rm -- " + pidfile) + + sleep( + 0.5, + "%s: waiting for daemons to exit/core after initial SIGBUS" % self.name, + ) errors = self.checkRouterCores(reportOnce=True) if self.checkRouterVersion("<", minErrorVersion): @@ -1683,7 +1684,11 @@ class Router(Node): return self.getLog("out", daemon) def getLog(self, log, daemon): - return self.cmd("cat {}/{}/{}.{}".format(self.logdir, self.name, daemon, log)) + filename = "{}/{}/{}.{}".format(self.logdir, self.name, daemon, log) + log = "" + with open(filename) as file: + log = file.read() + return log def startRouterDaemons(self, daemons=None, tgen=None): "Starts FRR daemons for this router." @@ -1841,9 +1846,13 @@ class Router(Node): logger.info( "%s: %s %s launched in gdb window", self, self.routertype, daemon ) - elif daemon in perfds and (self.name in perfds[daemon] or "all" in perfds[daemon]): + elif daemon in perfds and ( + self.name in perfds[daemon] or "all" in perfds[daemon] + ): cmdopt += rediropt - cmd = " ".join(["perf record {} --".format(perf_options), binary, cmdopt]) + cmd = " ".join( + ["perf record {} --".format(perf_options), binary, cmdopt] + ) p = self.popen(cmd) self.perf_daemons[daemon] = p if p.poll() and p.returncode: diff --git a/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py b/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py index aa43b977b0..45e1bc8db3 100755 --- a/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py +++ b/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py @@ -430,6 +430,144 @@ def test_gr_rt7(): check_routers(restarting="rt7") +# +# Test rt1 performing an unplanned graceful restart +# +def test_unplanned_gr_rt1(): + logger.info("Test: verify rt1 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt1", ["ospf6d"], save_config=False) + start_router_daemons(tgen, "rt1", ["ospf6d"]) + + expect_grace_lsa(restarting="1.1.1.1", helper="rt2") + ensure_gr_is_in_zebra("rt1") + check_routers(restarting="rt1") + + +# +# Test rt2 performing an unplanned graceful restart +# +def test_unplanned_gr_rt2(): + logger.info("Test: verify rt2 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt2", ["ospf6d"], save_config=False) + start_router_daemons(tgen, "rt2", ["ospf6d"]) + + expect_grace_lsa(restarting="2.2.2.2", helper="rt1") + expect_grace_lsa(restarting="2.2.2.2", helper="rt3") + ensure_gr_is_in_zebra("rt2") + check_routers(restarting="rt2") + + +# +# Test rt3 performing an unplanned graceful restart +# +def test_unplanned_gr_rt3(): + logger.info("Test: verify rt3 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt3", ["ospf6d"], save_config=False) + start_router_daemons(tgen, "rt3", ["ospf6d"]) + + expect_grace_lsa(restarting="3.3.3.3", helper="rt2") + expect_grace_lsa(restarting="3.3.3.3", helper="rt4") + expect_grace_lsa(restarting="3.3.3.3", helper="rt6") + ensure_gr_is_in_zebra("rt3") + check_routers(restarting="rt3") + + +# +# Test rt4 performing an unplanned graceful restart +# +def test_unplanned_gr_rt4(): + logger.info("Test: verify rt4 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt4", ["ospf6d"], save_config=False) + start_router_daemons(tgen, "rt4", ["ospf6d"]) + + expect_grace_lsa(restarting="4.4.4.4", helper="rt3") + expect_grace_lsa(restarting="4.4.4.4", helper="rt5") + ensure_gr_is_in_zebra("rt4") + check_routers(restarting="rt4") + + +# +# Test rt5 performing an unplanned graceful restart +# +def test_unplanned_gr_rt5(): + logger.info("Test: verify rt5 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt5", ["ospf6d"], save_config=False) + start_router_daemons(tgen, "rt5", ["ospf6d"]) + + expect_grace_lsa(restarting="5.5.5.5", helper="rt4") + ensure_gr_is_in_zebra("rt5") + check_routers(restarting="rt5") + + +# +# Test rt6 performing an unplanned graceful restart +# +def test_unplanned_gr_rt6(): + logger.info("Test: verify rt6 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt6", ["ospf6d"], save_config=False) + start_router_daemons(tgen, "rt6", ["ospf6d"]) + + expect_grace_lsa(restarting="6.6.6.6", helper="rt3") + expect_grace_lsa(restarting="6.6.6.6", helper="rt7") + ensure_gr_is_in_zebra("rt6") + check_routers(restarting="rt6") + + +# +# Test rt7 performing an unplanned graceful restart +# +def test_unplanned_gr_rt7(): + logger.info("Test: verify rt7 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt7", ["ospf6d"], save_config=False) + start_router_daemons(tgen, "rt7", ["ospf6d"]) + + expect_grace_lsa(restarting="6.6.6.6", helper="rt6") + ensure_gr_is_in_zebra("rt7") + check_routers(restarting="rt7") + + # Memory leak test template def test_memory_leak(): "Run the memory leak test and report results." diff --git a/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py b/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py index ca2a3b287c..73185d501d 100755 --- a/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py +++ b/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py @@ -434,6 +434,144 @@ def test_gr_rt7(): check_routers(restarting="rt7") +# +# Test rt1 performing an unplanned graceful restart +# +def test_unplanned_gr_rt1(): + logger.info("Test: verify rt1 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt1", ["ospfd"], save_config=False) + start_router_daemons(tgen, "rt1", ["ospfd"]) + + expect_grace_lsa(restarting="1.1.1.1", area="0.0.0.1", helper="rt2") + ensure_gr_is_in_zebra("rt1") + check_routers(restarting="rt1") + + +# +# Test rt2 performing an unplanned graceful restart +# +def test_unplanned_gr_rt2(): + logger.info("Test: verify rt2 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt2", ["ospfd"], save_config=False) + start_router_daemons(tgen, "rt2", ["ospfd"]) + + expect_grace_lsa(restarting="2.2.2.2", area="0.0.0.1", helper="rt1") + expect_grace_lsa(restarting="2.2.2.2", area="0.0.0.0", helper="rt3") + ensure_gr_is_in_zebra("rt2") + check_routers(restarting="rt2") + + +# +# Test rt3 performing an unplanned graceful restart +# +def test_unplanned_gr_rt3(): + logger.info("Test: verify rt3 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt3", ["ospfd"], save_config=False) + start_router_daemons(tgen, "rt3", ["ospfd"]) + + expect_grace_lsa(restarting="3.3.3.3", area="0.0.0.0", helper="rt2") + expect_grace_lsa(restarting="3.3.3.3", area="0.0.0.0", helper="rt4") + expect_grace_lsa(restarting="3.3.3.3", area="0.0.0.0", helper="rt6") + ensure_gr_is_in_zebra("rt3") + check_routers(restarting="rt3") + + +# +# Test rt4 performing an unplanned graceful restart +# +def test_unplanned_gr_rt4(): + logger.info("Test: verify rt4 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt4", ["ospfd"], save_config=False) + start_router_daemons(tgen, "rt4", ["ospfd"]) + + expect_grace_lsa(restarting="4.4.4.4", area="0.0.0.0", helper="rt3") + expect_grace_lsa(restarting="4.4.4.4", area="0.0.0.2", helper="rt5") + ensure_gr_is_in_zebra("rt4") + check_routers(restarting="rt4") + + +# +# Test rt5 performing an unplanned graceful restart +# +def test_unplanned_gr_rt5(): + logger.info("Test: verify rt5 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt5", ["ospfd"], save_config=False) + start_router_daemons(tgen, "rt5", ["ospfd"]) + + expect_grace_lsa(restarting="5.5.5.5", area="0.0.0.2", helper="rt4") + ensure_gr_is_in_zebra("rt5") + check_routers(restarting="rt5") + + +# +# Test rt6 performing an unplanned graceful restart +# +def test_unplanned_gr_rt6(): + logger.info("Test: verify rt6 performing an unplanned graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt6", ["ospfd"], save_config=False) + start_router_daemons(tgen, "rt6", ["ospfd"]) + + expect_grace_lsa(restarting="6.6.6.6", area="0.0.0.0", helper="rt3") + expect_grace_lsa(restarting="6.6.6.6", area="0.0.0.3", helper="rt7") + ensure_gr_is_in_zebra("rt6") + check_routers(restarting="rt6") + + +# +# Test rt7 performing an unplanned graceful restart +# +def test_unplanned_gr_rt7(): + logger.info("Test: verify rt7 performing a graceful restart") + tgen = get_topogen() + + # Skip if previous fatal error condition is raised + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + kill_router_daemons(tgen, "rt7", ["ospfd"], save_config=False) + start_router_daemons(tgen, "rt7", ["ospfd"]) + + expect_grace_lsa(restarting="7.7.7.7", area="0.0.0.3", helper="rt6") + ensure_gr_is_in_zebra("rt7") + check_routers(restarting="rt7") + + # Memory leak test template def test_memory_leak(): "Run the memory leak test and report results." |
