]> git.puffer.fish Git - matthieu/frr.git/commitdiff
bgpd: backpressure - Optimize EVPN L2VNI remote routes processing
authorRajasekar Raja <rajasekarr@nvidia.com>
Wed, 28 Aug 2024 18:38:53 +0000 (11:38 -0700)
committerRajasekar Raja <rajasekarr@nvidia.com>
Mon, 9 Dec 2024 16:46:16 +0000 (08:46 -0800)
Anytime BGP gets a L2 VNI ADD from zebra,
 - Walking the entire global routing table per L2VNI is very expensive.
 - The next read (say of another VNI ADD) from the socket does
   not proceed unless this walk is complete.

So for triggers where a bulk of L2VNI's are flapped, this results in
huge output buffer FIFO growth spiking up the memory in zebra since bgp
is slow/busy processing the first message.

To avoid this, idea is to hookup the VPN off the bgp_master struct and
maintain a VPN FIFO list which is processed later on, where we walk a
chunk of VPNs and do the remote route install.

Note: So far in the L3 backpressure cases(#15524), we have considered
the fact that zebra is slow, and the buffer grows in the BGP.

However this is the reverse i.e. BGP is very busy processing the first
ZAPI message from zebra due to which the buffer grows huge in zebra
and memory spikes up.

Ticket :#3864372

Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
bgpd/bgp_evpn.c
bgpd/bgp_evpn.h
bgpd/bgp_evpn_private.h
bgpd/bgp_main.c
bgpd/bgp_zebra.c
bgpd/bgp_zebra.h
bgpd/bgpd.c
bgpd/bgpd.h

index f173bd01f20ff8628c4af4b64f2f8f04168e8af9..f4bae58724a2e54234d16f2b7db54ba47f183169 100644 (file)
@@ -3979,30 +3979,80 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install)
        return 0;
 }
 
+#define BGP_PROC_L2VNI_LIMIT 10
+static int install_evpn_remote_route_per_l2vni(struct bgp *bgp, struct bgp_path_info *pi,
+                                              const struct prefix_evpn *evp)
+{
+       int ret = 0;
+       uint8_t vni_iter = 0;
+       struct bgpevpn *t_vpn = NULL;
+       struct bgpevpn *t_vpn_next = NULL;
+
+       for (t_vpn = zebra_l2_vni_first(&bm->zebra_l2_vni_head);
+            t_vpn && vni_iter < BGP_PROC_L2VNI_LIMIT; t_vpn = t_vpn_next) {
+               t_vpn_next = zebra_l2_vni_next(&bm->zebra_l2_vni_head, t_vpn);
+               vni_iter++;
+               /*
+                * Skip install/uninstall if the route entry is not needed to
+                * be imported into the VNI i.e. RTs dont match
+                */
+               if (!is_route_matching_for_vni(bgp, t_vpn, pi))
+                       continue;
+
+               ret = install_evpn_route_entry(bgp, t_vpn, evp, pi);
+
+               if (ret) {
+                       flog_err(EC_BGP_EVPN_FAIL,
+                                "%u: Failed to install EVPN %s route in VNI %u during BP",
+                                bgp->vrf_id, bgp_evpn_route_type_str[evp->prefix.route_type].str,
+                                t_vpn->vni);
+                       zebra_l2_vni_del(&bm->zebra_l2_vni_head, t_vpn);
+
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 /*
  * Install or uninstall routes of specified type that are appropriate for this
  * particular VNI.
  */
-static int install_uninstall_routes_for_vni(struct bgp *bgp,
-                                           struct bgpevpn *vpn, bool install)
+int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install)
 {
        afi_t afi;
        safi_t safi;
        struct bgp_dest *rd_dest, *dest;
        struct bgp_table *table;
        struct bgp_path_info *pi;
-       int ret;
+       int ret = 0;
+       uint8_t count = 0;
+       bool walk_fifo = false;
 
        afi = AFI_L2VPN;
        safi = SAFI_EVPN;
 
-       /* Walk entire global routing table and evaluate routes which could be
+       if (!bgp) {
+               walk_fifo = true;
+               bgp = bgp_get_evpn();
+               if (!bgp) {
+                       zlog_warn("%s: No BGP EVPN instance found...", __func__);
+
+                       return -1;
+               }
+       }
+
+       if (BGP_DEBUG(zebra, ZEBRA))
+               zlog_debug("%s: Total %u L2VNI VPNs pending to be processed for remote route installation",
+                          __func__, (uint32_t)zebra_l2_vni_count(&bm->zebra_l2_vni_head));
+       /*
+        * Walk entire global routing table and evaluate routes which could be
         * imported into this VPN. Note that we cannot just look at the routes
-        * for
-        * the VNI's RD - remote routes applicable for this VNI could have any
-        * RD.
+        * for the VNI's RD - remote routes applicable for this VNI could have
+        * any RD.
+        * Note: EVPN routes are a 2-level table.
         */
-       /* EVPN routes are a 2-level table. */
        for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest;
             rd_dest = bgp_route_next(rd_dest)) {
                table = bgp_dest_get_bgp_table_info(rd_dest);
@@ -4015,54 +4065,80 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp,
                                (const struct prefix_evpn *)bgp_dest_get_prefix(
                                        dest);
 
-                       if (evp->prefix.route_type != BGP_EVPN_IMET_ROUTE &&
-                           evp->prefix.route_type != BGP_EVPN_AD_ROUTE &&
-                           evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
+                       /* Proceed only for AD, MAC_IP and IMET routes */
+                       switch (evp->prefix.route_type) {
+                       case BGP_EVPN_AD_ROUTE:
+                       case BGP_EVPN_MAC_IP_ROUTE:
+                       case BGP_EVPN_IMET_ROUTE:
+                               break;
+                       case BGP_EVPN_ES_ROUTE:
+                       case BGP_EVPN_IP_PREFIX_ROUTE:
                                continue;
+                       }
 
                        for (pi = bgp_dest_get_bgp_path_info(dest); pi;
                             pi = pi->next) {
-                               /* Consider "valid" remote routes applicable for
-                                * this VNI. */
-                               if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID)
-                                     && pi->type == ZEBRA_ROUTE_BGP
-                                     && pi->sub_type == BGP_ROUTE_NORMAL))
-                                       continue;
-
-                               if (!is_route_matching_for_vni(bgp, vpn, pi))
+                               /*
+                                * Skip install/uninstall if
+                                * - Not a valid remote routes
+                                * - Install & evpn route matchesi macvrf SOO
+                                */
+                               if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) &&
+                                     pi->type == ZEBRA_ROUTE_BGP &&
+                                     pi->sub_type == BGP_ROUTE_NORMAL) ||
+                                   (install && bgp_evpn_route_matches_macvrf_soo(pi, evp)))
                                        continue;
 
-                               if (install) {
-                                       if (bgp_evpn_route_matches_macvrf_soo(
-                                                   pi, evp))
+                               if (walk_fifo) {
+                                       ret = install_evpn_remote_route_per_l2vni(bgp, pi, evp);
+                                       if (ret) {
+                                               bgp_dest_unlock_node(rd_dest);
+                                               bgp_dest_unlock_node(dest);
+                                               return ret;
+                                       }
+                               } else {
+                                       /*
+                                        * Skip install/uninstall if the route
+                                        * entry is not needed to be imported
+                                        * into the VNI i.e. RTs dont match
+                                        */
+                                       if (!is_route_matching_for_vni(bgp, vpn, pi))
                                                continue;
 
-                                       ret = install_evpn_route_entry(bgp, vpn,
-                                                                      evp, pi);
-                               } else
-                                       ret = uninstall_evpn_route_entry(
-                                               bgp, vpn, evp, pi);
-
-                               if (ret) {
-                                       flog_err(EC_BGP_EVPN_FAIL,
-                                                "%u: Failed to %s EVPN %s route in VNI %u",
-                                                bgp->vrf_id,
-                                                install ? "install"
-                                                        : "uninstall",
-                                                evp->prefix.route_type ==
-                                                                BGP_EVPN_MAC_IP_ROUTE
-                                                        ? "MACIP"
-                                                        : "IMET",
-                                                vpn->vni);
-
-                                       bgp_dest_unlock_node(rd_dest);
-                                       bgp_dest_unlock_node(dest);
-                                       return ret;
+                                       if (install)
+                                               ret = install_evpn_route_entry(bgp, vpn, evp, pi);
+                                       else
+                                               ret = uninstall_evpn_route_entry(bgp, vpn, evp, pi);
+
+                                       if (ret) {
+                                               flog_err(EC_BGP_EVPN_FAIL,
+                                                        "%u: Failed to %s EVPN %s route in VNI %u",
+                                                        bgp->vrf_id,
+                                                        install ? "install" : "uninstall",
+                                                        bgp_evpn_route_type_str[evp->prefix.route_type]
+                                                                .str,
+                                                        vpn->vni);
+
+                                               bgp_dest_unlock_node(rd_dest);
+                                               bgp_dest_unlock_node(dest);
+                                               return ret;
+                                       }
                                }
                        }
                }
        }
 
+       if (walk_fifo) {
+               while (count < BGP_PROC_L2VNI_LIMIT) {
+                       vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head);
+                       if (!vpn)
+                               return 0;
+
+                       UNSET_FLAG(vpn->flags, VNI_FLAG_ADD);
+                       count++;
+               }
+       }
+
        return 0;
 }
 
@@ -7024,6 +7100,45 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id)
        return 0;
 }
 
+static void bgp_evpn_l2vni_remote_route_processing(struct bgpevpn *vpn)
+{
+       /*
+        * Anytime BGP gets a Bulk of L2 VNIs ADD/UPD from zebra,
+        *  - Walking the entire global routing table per VNI is very expensive.
+        *  - The next read (say of another VNI ADD/UPD) from the socket does
+        *    not proceed unless this walk is complete.
+        *  This results in huge output buffer FIFO growth spiking up the
+        *  memory in zebra.
+        *
+        * To avoid this, idea is to hookup the VPN off the struct bgp_master
+        * and maintain a VPN FIFO list which is processed later on, where we
+        * walk a chunk of VPNs and do the remote route install.
+        */
+       if (!CHECK_FLAG(vpn->flags, VNI_FLAG_ADD)) {
+               zebra_l2_vni_add_tail(&bm->zebra_l2_vni_head, vpn);
+               SET_FLAG(vpn->flags, VNI_FLAG_ADD);
+       }
+
+       if (BGP_DEBUG(zebra, ZEBRA))
+               zlog_debug("Scheduling L2VNI ADD to be processed later for VNI %u", vpn->vni);
+
+       /*
+        * If there are no VNI's in the bgp VPN FIFO list i.e. an update
+        * for an already processed VNI comes in, schedule the remote
+        * route install immediately.
+        *
+        * In all other cases, it is ok to schedule the remote route install
+        * after a small sleep. This is to give benefit of doubt in case more
+        * L2VNI ADD events come.
+        */
+       if (zebra_l2_vni_count(&bm->zebra_l2_vni_head))
+               event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL,
+                                    20, &bm->t_bgp_zebra_l2_vni);
+       else
+               event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, 0,
+                               &bm->t_bgp_zebra_l2_vni);
+}
+
 /*
  * When bgp instance goes down also clean up what might have been left over
  * from evpn.
@@ -7047,6 +7162,10 @@ int bgp_evpn_local_vni_del(struct bgp *bgp, vni_t vni)
        if (!vpn)
                return 0;
 
+       /* Remove the VPN from the bgp VPN FIFO (if exists) */
+       UNSET_FLAG(vpn->flags, VNI_FLAG_ADD);
+       zebra_l2_vni_del(&bm->zebra_l2_vni_head, vpn);
+
        /* Remove all local EVPN routes and schedule for processing (to
         * withdraw from peers).
         */
@@ -7203,12 +7322,6 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni,
                }
        }
 
-       /* If we have learnt and retained remote routes (VTEPs, MACs) for this
-        * VNI,
-        * install them.
-        */
-       install_routes_for_vni(bgp, vpn);
-
        /* If we are advertising gateway mac-ip
           It needs to be conveyed again to zebra */
        bgp_zebra_advertise_gw_macip(bgp, vpn->advertise_gw_macip, vpn->vni);
@@ -7216,6 +7329,8 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni,
        /* advertise svi mac-ip knob to zebra */
        bgp_zebra_advertise_svi_macip(bgp, vpn->advertise_svi_macip, vpn->vni);
 
+       bgp_evpn_l2vni_remote_route_processing(vpn);
+
        return 0;
 }
 
@@ -7245,8 +7360,17 @@ void bgp_evpn_flood_control_change(struct bgp *bgp)
  */
 void bgp_evpn_cleanup_on_disable(struct bgp *bgp)
 {
-       hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *,
-                                            void *))cleanup_vni_on_disable,
+       struct bgpevpn *vpn = NULL;
+       uint32_t vni_count = zebra_l2_vni_count(&bm->zebra_l2_vni_head);
+
+       /* Cleanup VNI FIFO list from this bgp instance */
+       while (vni_count) {
+               vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head);
+               UNSET_FLAG(vpn->flags, VNI_FLAG_ADD);
+               vni_count--;
+       }
+
+       hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, void *))cleanup_vni_on_disable,
                     bgp);
 }
 
index 1a333a5a09a37052be8575a3f688e4665a0795e0..75dde616ce78cb6ab1a047de161654967231c4ae 100644 (file)
@@ -200,4 +200,5 @@ bool bgp_evpn_skip_vrf_import_of_local_es(struct bgp *bgp_vrf, const struct pref
 int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, const struct prefix_evpn *evp,
                                      struct bgp_path_info *parent_pi);
 extern void bgp_zebra_evpn_pop_items_from_announce_fifo(struct bgpevpn *vpn);
+extern int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install);
 #endif /* _QUAGGA_BGP_EVPN_H */
index f1dee8d2bcf9db802fa222baa620980a3f9f3194..568d3d45eed3bb5d02b3434aff72bc4dfe94fc6b 100644 (file)
@@ -60,8 +60,9 @@ struct bgpevpn {
 #define VNI_FLAG_RD_CFGD           0x4  /* RD is user configured. */
 #define VNI_FLAG_IMPRT_CFGD        0x8  /* Import RT is user configured */
 #define VNI_FLAG_EXPRT_CFGD        0x10 /* Export RT is user configured */
-#define VNI_FLAG_USE_TWO_LABELS    0x20 /* Attach both L2-VNI and L3-VNI if
-                                          needed for this VPN */
+/* Attach both L2-VNI and L3-VNI if needed for this VPN */
+#define VNI_FLAG_USE_TWO_LABELS 0x20
+#define VNI_FLAG_ADD           0x40 /* L2VNI Add */
 
        struct bgp *bgp_vrf; /* back pointer to the vrf instance */
 
@@ -115,11 +116,15 @@ struct bgpevpn {
        /* List of local ESs */
        struct list *local_es_evi_list;
 
+       struct zebra_l2_vni_item zl2vni;
+
        QOBJ_FIELDS;
 };
 
 DECLARE_QOBJ_TYPE(bgpevpn);
 
+DECLARE_LIST(zebra_l2_vni, struct bgpevpn, zl2vni);
+
 /* Mapping of Import RT to VNIs.
  * The Import RTs of all VNIs are maintained in a hash table with each
  * RT linking to all VNIs that will import routes matching this RT.
index 535d2fc5f4342ea105551079578e437d7084475d..9d89fd6f96bcc60d68dca8d538e717937b00a466 100644 (file)
@@ -207,6 +207,7 @@ static __attribute__((__noreturn__)) void bgp_exit(int status)
        bgp_nhg_finish();
 
        zebra_announce_fini(&bm->zebra_announce_head);
+       zebra_l2_vni_fini(&bm->zebra_l2_vni_head);
 
        /* reverse bgp_dump_init */
        bgp_dump_finish();
index ac4a6bb03bf6260464cc5ecf6dc03353a4ccc6bd..90e2d5af7bf6c49df126f39aebbb4d1cf5d982f8 100644 (file)
@@ -3029,6 +3029,23 @@ static void bgp_zebra_connected(struct zclient *zclient)
        BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer);
 }
 
+void bgp_zebra_process_remote_routes_for_l2vni(struct event *e)
+{
+       /*
+        * If we have learnt and retained remote routes (VTEPs, MACs)
+        * for this VNI, install them.
+        */
+       install_uninstall_routes_for_vni(NULL, NULL, true);
+
+       /*
+        * If there are VNIs still pending to be processed, schedule them
+        * after a small sleep so that CPU can be used for other purposes.
+        */
+       if (zebra_l2_vni_count(&bm->zebra_l2_vni_head))
+               event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL,
+                                    20, &bm->t_bgp_zebra_l2_vni);
+}
+
 static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS)
 {
        esi_t esi;
index 8deecba747b310ee6e2f7b1eb88242fe44d6a39c..993d002998f2fd7ad41508ebc17b58bb2e2909cf 100644 (file)
@@ -135,4 +135,5 @@ extern void bgp_zebra_release_label_range(uint32_t start, uint32_t end);
 extern enum zclient_send_status
 bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info,
                          struct bgp *bgp);
+extern void bgp_zebra_process_remote_routes_for_l2vni(struct event *e);
 #endif /* _QUAGGA_BGP_ZEBRA_H */
index 7b21c29ea663480b15236b9c27b27be9a55e5abe..44640c84f2563d49df0711c4955ff5aa722aca92 100644 (file)
@@ -3966,11 +3966,14 @@ int bgp_delete(struct bgp *bgp)
        afi_t afi;
        safi_t safi;
        int i;
+       uint32_t vni_count;
+       struct bgpevpn *vpn = NULL;
        struct bgp_dest *dest = NULL;
        struct bgp_dest *dest_next = NULL;
        struct bgp_table *dest_table = NULL;
        struct graceful_restart_info *gr_info;
-       uint32_t cnt_before, cnt_after;
+       uint32_t b_ann_cnt = 0, b_l2_cnt = 0;
+       uint32_t a_ann_cnt = 0, a_l2_cnt = 0;
 
        assert(bgp);
 
@@ -3978,7 +3981,7 @@ int bgp_delete(struct bgp *bgp)
         * Iterate the pending dest list and remove all the dest pertaininig to
         * the bgp under delete.
         */
-       cnt_before = zebra_announce_count(&bm->zebra_announce_head);
+       b_ann_cnt = zebra_announce_count(&bm->zebra_announce_head);
        for (dest = zebra_announce_first(&bm->zebra_announce_head); dest;
             dest = dest_next) {
                dest_next = zebra_announce_next(&bm->zebra_announce_head, dest);
@@ -3990,10 +3993,28 @@ int bgp_delete(struct bgp *bgp)
                }
        }
 
-       cnt_after = zebra_announce_count(&bm->zebra_announce_head);
-       if (BGP_DEBUG(zebra, ZEBRA))
-               zlog_debug("Zebra Announce Fifo cleanup count before %u and after %u during BGP %s deletion",
-                          cnt_before, cnt_after, bgp->name_pretty);
+       /*
+        * Pop all VPNs yet to be processed for remote routes install if the
+        * bgp-evpn instance is getting deleted
+        */
+       if (bgp == bgp_get_evpn()) {
+               b_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head);
+               vni_count = b_l2_cnt;
+               while (vni_count) {
+                       vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head);
+                       UNSET_FLAG(vpn->flags, VNI_FLAG_ADD);
+                       vni_count--;
+               }
+       }
+
+       if (BGP_DEBUG(zebra, ZEBRA)) {
+               a_ann_cnt = zebra_announce_count(&bm->zebra_announce_head);
+               a_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head);
+               zlog_debug("FIFO Cleanup Count during BGP %s deletion :: "
+                          "Zebra Announce - before %u after %u :: "
+                          "BGP L2_VNI - before %u after %u",
+                          bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt);
+       }
 
        bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL);
 
@@ -8492,6 +8513,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
        bm = &bgp_master;
 
        zebra_announce_init(&bm->zebra_announce_head);
+       zebra_l2_vni_init(&bm->zebra_l2_vni_head);
        bm->bgp = list_new();
        bm->listen_sockets = list_new();
        bm->port = BGP_PORT_DEFAULT;
@@ -8515,6 +8537,7 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
        bm->stalepath_time = BGP_DEFAULT_STALEPATH_TIME;
        bm->select_defer_time = BGP_DEFAULT_SELECT_DEFERRAL_TIME;
        bm->rib_stale_time = BGP_DEFAULT_RIB_STALE_TIME;
+       bm->t_bgp_zebra_l2_vni = NULL;
 
        bgp_mac_init();
        /* init the rd id space.
@@ -8762,6 +8785,7 @@ void bgp_terminate(void)
        EVENT_OFF(bm->t_bgp_sync_label_manager);
        EVENT_OFF(bm->t_bgp_start_label_manager);
        EVENT_OFF(bm->t_bgp_zebra_route);
+       EVENT_OFF(bm->t_bgp_zebra_l2_vni);
 
        bgp_mac_finish();
 }
index bb56fd355a051ae8b8c308048d864b5f99eea2c2..fd2bd95e5f18acca8b1b6c2ee5263d336d3731f7 100644 (file)
@@ -19,6 +19,7 @@
 #include "asn.h"
 
 PREDECL_LIST(zebra_announce);
+PREDECL_LIST(zebra_l2_vni);
 
 /* For union sockunion.  */
 #include "queue.h"
@@ -204,6 +205,10 @@ struct bgp_master {
        /* To preserve ordering of installations into zebra across all Vrfs */
        struct zebra_announce_head zebra_announce_head;
 
+       struct event *t_bgp_zebra_l2_vni;
+       /* To preserve ordering of processing of L2 VNIs in BGP */
+       struct zebra_l2_vni_head zebra_l2_vni_head;
+
        QOBJ_FIELDS;
 };
 DECLARE_QOBJ_TYPE(bgp_master);