]> git.puffer.fish Git - mirror/frr.git/commitdiff
bgpd: split soft reconfigure table task into several jobs to not block vtysh 8691/head
authorLouis Scalbert <louis.scalbert@6wind.com>
Wed, 4 Nov 2020 14:35:47 +0000 (15:35 +0100)
committerLouis Scalbert <louis.scalbert@6wind.com>
Mon, 7 Jun 2021 08:33:31 +0000 (10:33 +0200)
BGP configuration changes that imply recomputing the BGP route table
(e.g. modifying route-maps, setting bgp graceful-shutdown) might be a
long time process depending on the size of the BGP table and the
route-map numbers and complexity. For example, setups with full
Internet routes take something like one minute to reprocess all the
prefixes when graceful-shutdown is configured. During this time, a
"show bgp commands" request on vtysh results in blocking the shell until
the soft reconfigure table task is over.

This patch splits bgp_soft_reconfig_table task into thread jobs of 25K
prefixes.

Some tests on a full Internet route setup show that after reconfiguring
route-maps or graceful-shutdown, vtysh is not stucked anymore. We are
now able to request commands like "show bgp summary" after 1 or 2
seconds instead of 30 to 60s.

Signed-off-by: Louis Scalbert <louis.scalbert@6wind.com>
bgpd/bgp_route.c
bgpd/bgp_route.h
bgpd/bgp_table.h
bgpd/bgpd.c

index c61aedb560d11ab1b75ae62c20cf316ee4b99bc7..26f7583e797dd3a7f4db3c5f839ac96255565ec3 100644 (file)
@@ -125,6 +125,7 @@ static const struct message bgp_pmsi_tnltype_str[] = {
 };
 
 #define VRFID_NONE_STR "-"
+#define SOFT_RECONFIG_TASK_MAX_PREFIX 25000
 
 DEFINE_HOOK(bgp_process,
            (struct bgp * bgp, afi_t afi, safi_t safi, struct bgp_dest *bn,
@@ -3141,6 +3142,14 @@ void bgp_process(struct bgp *bgp, struct bgp_dest *dest, afi_t afi, safi_t safi)
                return;
        }
 
+       if (CHECK_FLAG(dest->flags, BGP_NODE_SOFT_RECONFIG)) {
+               if (BGP_DEBUG(update, UPDATE_OUT))
+                       zlog_debug(
+                               "Soft reconfigure table in progress for route %p",
+                               dest);
+               return;
+       }
+
        if (wq == NULL)
                return;
 
@@ -4591,6 +4600,60 @@ void bgp_announce_route_all(struct peer *peer)
                bgp_announce_route(peer, afi, safi);
 }
 
+/* Flag or unflag bgp_dest to determine whether it should be treated by
+ * bgp_soft_reconfig_table_task.
+ * Flag if flag is true. Unflag if flag is false.
+ */
+static void bgp_soft_reconfig_table_flag(struct bgp_table *table, bool flag)
+{
+       struct bgp_dest *dest;
+       struct bgp_adj_in *ain;
+
+       if (!table)
+               return;
+
+       for (dest = bgp_table_top(table); dest; dest = bgp_route_next(dest)) {
+               for (ain = dest->adj_in; ain; ain = ain->next) {
+                       if (ain->peer != NULL)
+                               break;
+               }
+               if (flag && ain != NULL && ain->peer != NULL)
+                       SET_FLAG(dest->flags, BGP_NODE_SOFT_RECONFIG);
+               else
+                       UNSET_FLAG(dest->flags, BGP_NODE_SOFT_RECONFIG);
+       }
+}
+
+static int bgp_soft_reconfig_table_update(struct peer *peer,
+                                         struct bgp_dest *dest,
+                                         struct bgp_adj_in *ain, afi_t afi,
+                                         safi_t safi, struct prefix_rd *prd)
+{
+       struct bgp_path_info *pi;
+       uint32_t num_labels = 0;
+       mpls_label_t *label_pnt = NULL;
+       struct bgp_route_evpn evpn;
+
+       for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
+               if (pi->peer == peer)
+                       break;
+
+       if (pi && pi->extra)
+               num_labels = pi->extra->num_labels;
+       if (num_labels)
+               label_pnt = &pi->extra->label[0];
+       if (pi)
+               memcpy(&evpn, bgp_attr_get_evpn_overlay(pi->attr),
+                      sizeof(evpn));
+       else
+               memset(&evpn, 0, sizeof(evpn));
+
+       return bgp_update(peer, bgp_dest_get_prefix(dest), ain->addpath_rx_id,
+                         ain->attr, afi, safi, ZEBRA_ROUTE_BGP,
+                         BGP_ROUTE_NORMAL, prd, label_pnt, num_labels, 1,
+                         &evpn);
+}
+
 static void bgp_soft_reconfig_table(struct peer *peer, afi_t afi, safi_t safi,
                                    struct bgp_table *table,
                                    struct prefix_rd *prd)
@@ -4607,32 +4670,8 @@ static void bgp_soft_reconfig_table(struct peer *peer, afi_t afi, safi_t safi,
                        if (ain->peer != peer)
                                continue;
 
-                       struct bgp_path_info *pi;
-                       uint32_t num_labels = 0;
-                       mpls_label_t *label_pnt = NULL;
-                       struct bgp_route_evpn evpn;
-
-                       for (pi = bgp_dest_get_bgp_path_info(dest); pi;
-                            pi = pi->next)
-                               if (pi->peer == peer)
-                                       break;
-
-                       if (pi && pi->extra)
-                               num_labels = pi->extra->num_labels;
-                       if (num_labels)
-                               label_pnt = &pi->extra->label[0];
-                       if (pi)
-                               memcpy(&evpn,
-                                      bgp_attr_get_evpn_overlay(pi->attr),
-                                      sizeof(evpn));
-                       else
-                               memset(&evpn, 0, sizeof(evpn));
-
-                       ret = bgp_update(peer, bgp_dest_get_prefix(dest),
-                                        ain->addpath_rx_id, ain->attr, afi,
-                                        safi, ZEBRA_ROUTE_BGP,
-                                        BGP_ROUTE_NORMAL, prd, label_pnt,
-                                        num_labels, 1, &evpn);
+                       ret = bgp_soft_reconfig_table_update(peer, dest, ain,
+                                                            afi, safi, prd);
 
                        if (ret < 0) {
                                bgp_dest_unlock_node(dest);
@@ -4641,18 +4680,201 @@ static void bgp_soft_reconfig_table(struct peer *peer, afi_t afi, safi_t safi,
                }
 }
 
+/* Do soft reconfig table per bgp table.
+ * Walk on SOFT_RECONFIG_TASK_MAX_PREFIX bgp_dest,
+ * when BGP_NODE_SOFT_RECONFIG is set,
+ * reconfig bgp_dest for list of table->soft_reconfig_peers peers.
+ * Schedule a new thread to continue the job.
+ * Without splitting the full job into several part,
+ * vtysh waits for the job to finish before responding to a BGP command
+ */
+static int bgp_soft_reconfig_table_task(struct thread *thread)
+{
+       uint32_t iter, max_iter;
+       int ret;
+       struct bgp_dest *dest;
+       struct bgp_adj_in *ain;
+       struct peer *peer;
+       struct bgp_table *table;
+       struct prefix_rd *prd;
+       struct listnode *node, *nnode;
+
+       table = THREAD_ARG(thread);
+       prd = NULL;
+
+       max_iter = SOFT_RECONFIG_TASK_MAX_PREFIX;
+       if (table->soft_reconfig_init) {
+               /* first call of the function with a new srta structure.
+                * Don't do any treatment this time on nodes
+                * in order vtysh to respond quickly
+                */
+               max_iter = 0;
+       }
+
+       for (iter = 0, dest = bgp_table_top(table); (dest && iter < max_iter);
+            dest = bgp_route_next(dest)) {
+               if (!CHECK_FLAG(dest->flags, BGP_NODE_SOFT_RECONFIG))
+                       continue;
+
+               UNSET_FLAG(dest->flags, BGP_NODE_SOFT_RECONFIG);
+
+               for (ain = dest->adj_in; ain; ain = ain->next) {
+                       for (ALL_LIST_ELEMENTS(table->soft_reconfig_peers, node,
+                                              nnode, peer)) {
+                               if (ain->peer != peer)
+                                       continue;
+
+                               ret = bgp_soft_reconfig_table_update(
+                                       peer, dest, ain, table->afi,
+                                       table->safi, prd);
+                               iter++;
+
+                               if (ret < 0) {
+                                       bgp_dest_unlock_node(dest);
+                                       listnode_delete(
+                                               table->soft_reconfig_peers,
+                                               peer);
+                                       bgp_announce_route(peer, table->afi,
+                                                          table->safi);
+                                       if (list_isempty(
+                                                   table->soft_reconfig_peers)) {
+                                               list_delete(
+                                                       &table->soft_reconfig_peers);
+                                               bgp_soft_reconfig_table_flag(
+                                                       table, false);
+                                               return 0;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /* we're either starting the initial iteration,
+        * or we're going to continue an ongoing iteration
+        */
+       if (dest || table->soft_reconfig_init) {
+               table->soft_reconfig_init = false;
+               thread_add_event(bm->master, bgp_soft_reconfig_table_task,
+                                table, 0, &table->soft_reconfig_thread);
+               return 0;
+       }
+       /* we're done, clean up the background iteration context info and
+       schedule route annoucement
+       */
+       for (ALL_LIST_ELEMENTS(table->soft_reconfig_peers, node, nnode, peer)) {
+               listnode_delete(table->soft_reconfig_peers, peer);
+               bgp_announce_route(peer, table->afi, table->safi);
+       }
+
+       list_delete(&table->soft_reconfig_peers);
+
+       return 0;
+}
+
+
+/* Cancel soft_reconfig_table task matching bgp instance, bgp_table
+ * and peer.
+ * - bgp cannot be NULL
+ * - if table and peer are NULL, cancel all threads within the bgp instance
+ * - if table is NULL and peer is not,
+ * remove peer in all threads within the bgp instance
+ * - if peer is NULL, cancel all threads matching table within the bgp instance
+ */
+void bgp_soft_reconfig_table_task_cancel(const struct bgp *bgp,
+                                        const struct bgp_table *table,
+                                        const struct peer *peer)
+{
+       struct peer *npeer;
+       struct listnode *node, *nnode;
+       int afi, safi;
+       struct bgp_table *ntable;
+
+       if (!bgp)
+               return;
+
+       FOREACH_AFI_SAFI (afi, safi) {
+               ntable = bgp->rib[afi][safi];
+               if (!ntable)
+                       continue;
+               if (table && table != ntable)
+                       continue;
+
+               for (ALL_LIST_ELEMENTS(ntable->soft_reconfig_peers, node, nnode,
+                                      npeer)) {
+                       if (peer && peer != npeer)
+                               continue;
+                       listnode_delete(ntable->soft_reconfig_peers, npeer);
+               }
+
+               if (!ntable->soft_reconfig_peers
+                   || !list_isempty(ntable->soft_reconfig_peers))
+                       continue;
+
+               list_delete(&ntable->soft_reconfig_peers);
+               bgp_soft_reconfig_table_flag(ntable, false);
+               BGP_TIMER_OFF(ntable->soft_reconfig_thread);
+       }
+}
+
 void bgp_soft_reconfig_in(struct peer *peer, afi_t afi, safi_t safi)
 {
        struct bgp_dest *dest;
        struct bgp_table *table;
+       struct listnode *node, *nnode;
+       struct peer *npeer;
+       struct peer_af *paf;
 
        if (peer->status != Established)
                return;
 
        if ((safi != SAFI_MPLS_VPN) && (safi != SAFI_ENCAP)
-           && (safi != SAFI_EVPN))
-               bgp_soft_reconfig_table(peer, afi, safi, NULL, NULL);
-       else
+           && (safi != SAFI_EVPN)) {
+               table = peer->bgp->rib[afi][safi];
+               if (!table)
+                       return;
+
+               table->soft_reconfig_init = true;
+
+               if (!table->soft_reconfig_peers)
+                       table->soft_reconfig_peers = list_new();
+               npeer = NULL;
+               /* add peer to the table soft_reconfig_peers if not already
+                * there
+                */
+               for (ALL_LIST_ELEMENTS(table->soft_reconfig_peers, node, nnode,
+                                      npeer)) {
+                       if (peer == npeer)
+                               break;
+               }
+               if (peer != npeer)
+                       listnode_add(table->soft_reconfig_peers, peer);
+
+               /* (re)flag all bgp_dest in table. Existing soft_reconfig_in job
+                * on table would start back at the beginning.
+                */
+               bgp_soft_reconfig_table_flag(table, true);
+
+               if (!table->soft_reconfig_thread)
+                       thread_add_event(bm->master,
+                                        bgp_soft_reconfig_table_task, table, 0,
+                                        &table->soft_reconfig_thread);
+               /* Cancel bgp_announce_route_timer_expired threads.
+                * bgp_announce_route_timer_expired threads have been scheduled
+                * to announce routes as soon as the soft_reconfigure process
+                * finishes.
+                * In this case, soft_reconfigure is also scheduled by using
+                * a thread but is planned after the
+                * bgp_announce_route_timer_expired threads. It means that,
+                * without cancelling the threads, the route announcement task
+                * would run before the soft reconfiguration one. That would
+                * useless and would block vtysh during several seconds. Route
+                * announcements are rescheduled as soon as the soft_reconfigure
+                * process finishes.
+                */
+               paf = peer_af_find(peer, afi, safi);
+               if (paf)
+                       bgp_stop_announce_route_timer(paf);
+       } else
                for (dest = bgp_table_top(peer->bgp->rib[afi][safi]); dest;
                     dest = bgp_route_next(dest)) {
                        table = bgp_dest_get_bgp_table_info(dest);
index 6d6008ff554d99582e92ce9e18d80fb3db3df1aa..d2704eac039c6c58bd516b5a0ee63e7be68d20c2 100644 (file)
@@ -603,6 +603,9 @@ extern void bgp_announce_route(struct peer *, afi_t, safi_t);
 extern void bgp_stop_announce_route_timer(struct peer_af *paf);
 extern void bgp_announce_route_all(struct peer *);
 extern void bgp_default_originate(struct peer *, afi_t, safi_t, int);
+extern void bgp_soft_reconfig_table_task_cancel(const struct bgp *bgp,
+                                               const struct bgp_table *table,
+                                               const struct peer *peer);
 extern void bgp_soft_reconfig_in(struct peer *, afi_t, safi_t);
 extern void bgp_clear_route(struct peer *, afi_t, safi_t);
 extern void bgp_clear_route_all(struct peer *);
index 68b460149c2c4f95d0097d0958702089a7ed83fe..fb906aa1fd0990d14e51324d21d56794852f2adc 100644 (file)
@@ -42,6 +42,13 @@ struct bgp_table {
 
        int lock;
 
+       /* soft_reconfig_table in progress */
+       bool soft_reconfig_init;
+       struct thread *soft_reconfig_thread;
+
+       /* list of peers on which soft_reconfig_table has to run */
+       struct list *soft_reconfig_peers;
+
        struct route_table *route_table;
        uint64_t version;
 };
@@ -96,7 +103,7 @@ struct bgp_node {
 
        mpls_label_t local_label;
 
-       uint8_t flags;
+       uint16_t flags;
 #define BGP_NODE_PROCESS_SCHEDULED     (1 << 0)
 #define BGP_NODE_USER_CLEAR             (1 << 1)
 #define BGP_NODE_LABEL_CHANGED          (1 << 2)
@@ -105,6 +112,7 @@ struct bgp_node {
 #define BGP_NODE_FIB_INSTALL_PENDING    (1 << 5)
 #define BGP_NODE_FIB_INSTALLED          (1 << 6)
 #define BGP_NODE_LABEL_REQUESTED        (1 << 7)
+#define BGP_NODE_SOFT_RECONFIG (1 << 8)
 
        struct bgp_addpath_node_data tx_addpath;
 
index 6f2f2c9f34534bc9952fb3d2f336dec119962ec1..cccea1103603df3fbe7c8e3f3f19ada5485c7f6c 100644 (file)
@@ -892,6 +892,8 @@ int peer_af_delete(struct peer *peer, afi_t afi, safi_t safi)
                return -1;
 
        bgp = peer->bgp;
+       bgp_soft_reconfig_table_task_cancel(bgp, bgp->rib[afi][safi], peer);
+
        bgp_stop_announce_route_timer(af);
 
        if (PAF_SUBGRP(af)) {
@@ -2390,6 +2392,8 @@ int peer_delete(struct peer *peer)
        bgp = peer->bgp;
        accept_peer = CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER);
 
+       bgp_soft_reconfig_table_task_cancel(bgp, NULL, peer);
+
        bgp_keepalives_off(peer);
        bgp_reads_off(peer);
        bgp_writes_off(peer);
@@ -3555,6 +3559,8 @@ int bgp_delete(struct bgp *bgp)
 
        assert(bgp);
 
+       bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL);
+
        /* make sure we withdraw any exported routes */
        vpn_leak_prechange(BGP_VPN_POLICY_DIR_TOVPN, AFI_IP, bgp_get_default(),
                           bgp);