]> git.puffer.fish Git - mirror/frr.git/commitdiff
lib, bgpd: changes for EAD-per-ES fragmentation
authorAnuradha Karuppiah <anuradhak@nvidia.com>
Fri, 4 Jun 2021 00:28:43 +0000 (17:28 -0700)
committerDonald Sharp <sharpd@nvidia.com>
Fri, 18 Mar 2022 11:37:06 +0000 (07:37 -0400)
The EAD-per-ES route carries ECs for all the ES-EVI RTs. As the number of VNIs
increase all RTs do not fit into a standard BGP UPDATE (4K) so the route needs
to be fragmented.

Each fragment is associated with a separate RD and frag-id -
1. Local ES-per-EAD -
ES route table - {ES-frag-ID, ESI, ET=0xffffffff, VTEP-IP}
global route table - {RD-=ES-frag-RD, ESI, ET=0xffffffff}
2. Remote ES-per-EAD -
VNI route table - {ESI, ET=0xffffffff, VTEP-IP}
global route table - {RD-=ES-frag-RD, ESI, ET=0xffffffff}

Note: The fragment ID is abandoned in the per-VNI routing table. At this
point that is acceptable as we dont expect more than one-ES-per-EAD fragment
to be imported into the per-VNI routing table. But that may need to be
re-worked at a later point.

CLI changes (sample with 4 VNIs per-fragment for experimental pruposes) -
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
root@torm-11:mgmt:~# vtysh -c "show bgp l2vpn evpn es 03:44:38:39:ff:ff:01:00:00:01"
ESI: 03:44:38:39:ff:ff:01:00:00:01
 Type: LR
 RD: 27.0.0.21:3
 Originator-IP: 27.0.0.21
 Local ES DF preference: 50000
 VNI Count: 10
 Remote VNI Count: 10
 VRF Count: 3
 MACIP EVI Path Count: 33
 MACIP Global Path Count: 198
 Inconsistent VNI VTEP Count: 0
 Inconsistencies: -
 Fragments: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
  27.0.0.21:3 EVIs: 4
  27.0.0.21:13 EVIs: 4
  27.0.0.21:22 EVIs: 2
 VTEPs:
  27.0.0.22 flags: EA df_alg: preference df_pref: 32767
  27.0.0.23 flags: EA df_alg: preference df_pref: 32767

root@torm-11:mgmt:~# vtysh -c "show bgp l2vpn evpn es-evi vni 1002 detail"
VNI: 1002 ESI: 03:44:38:39:ff:ff:01:00:00:01
 Type: LR
 ES fragment RD: 27.0.0.21:13 >>>>>>>>>>>>>>>>>>>>>>>>>
 Inconsistencies: -
 VTEPs: 27.0.0.22(EV),27.0.0.23(EV)

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

PS: The number of EVIs per-fragment has been set to 128 and may need further
tuning.

Ticket: #2632967

Signed-off-by: Anuradha Karuppiah <anuradhak@nvidia.com>
bgpd/bgp_evpn_mh.c
bgpd/bgp_evpn_mh.h
bgpd/bgp_evpn_private.h
bgpd/bgp_evpn_vty.c
bgpd/bgp_memory.c
bgpd/bgp_memory.h
lib/prefix.c
lib/prefix.h

index 8bb0e5705d84e902f8c21ab8b02e9128d223ba86..5cca3f210d01e2cf9d6518c824420912f677a7e1 100644 (file)
@@ -462,7 +462,9 @@ int bgp_evpn_mh_route_update(struct bgp *bgp, struct bgp_evpn_es *es,
  * ESR).
  */
 static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es,
-               struct bgpevpn *vpn, struct prefix_evpn *p)
+                                   struct bgpevpn *vpn,
+                                   struct bgp_evpn_es_frag *es_frag,
+                                   struct prefix_evpn *p)
 {
        afi_t afi = AFI_L2VPN;
        safi_t safi = SAFI_EVPN;
@@ -477,7 +479,7 @@ static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es,
                prd = &vpn->prd;
        } else {
                rt_table = es->route_table;
-               prd = &es->prd;
+               prd = &es_frag->prd;
        }
 
        /* First, locate the route node within the ESI or VNI.
@@ -680,7 +682,7 @@ static int bgp_evpn_type4_route_update(struct bgp *bgp,
                struct bgp_path_info *global_pi;
 
                dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi,
-                                               p, &es->prd);
+                                               p, &es->es_base_frag->prd);
                bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, dest,
                                         attr_new, 1, &global_pi,
                                         &route_changed);
@@ -699,7 +701,11 @@ static int bgp_evpn_type4_route_update(struct bgp *bgp,
 static int bgp_evpn_type4_route_delete(struct bgp *bgp,
                struct bgp_evpn_es *es, struct prefix_evpn *p)
 {
-       return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p);
+       if (!es->es_base_frag)
+               return -1;
+
+       return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */,
+                                       es->es_base_frag, p);
 }
 
 /* Process remote/received EVPN type-4 route (advertise or withdraw)  */
@@ -845,8 +851,9 @@ static int bgp_evpn_type4_remote_routes_import(struct bgp *bgp,
  */
 
 /* Extended communities associated with EAD-per-ES */
-static void bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es *es,
-               struct attr *attr)
+static void
+bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es_frag *es_frag,
+                                     struct attr *attr)
 {
        struct ecommunity ecom_encap;
        struct ecommunity ecom_esi_label;
@@ -886,7 +893,8 @@ static void bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es *es,
                        bgp_attr_set_ecommunity(
                                attr, ecommunity_merge(attr->ecommunity, ecom));
        } else {
-               for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) {
+               for (ALL_LIST_ELEMENTS_RO(es_frag->es_evi_frag_list, evi_node,
+                                         es_evi)) {
                        if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
                                continue;
                        for (ALL_LIST_ELEMENTS_RO(es_evi->vpn->export_rtl,
@@ -931,9 +939,10 @@ static void bgp_evpn_type1_evi_route_extcomm_build(struct bgp_evpn_es *es,
 /* Update EVPN EAD (type-1) route -
  * vpn - valid for EAD-EVI routes and NULL for EAD-ES routes
  */
-static int bgp_evpn_type1_route_update(struct bgp *bgp,
-               struct bgp_evpn_es *es, struct bgpevpn *vpn,
-               struct prefix_evpn *p)
+static int bgp_evpn_type1_route_update(struct bgp *bgp, struct bgp_evpn_es *es,
+                                      struct bgpevpn *vpn,
+                                      struct bgp_evpn_es_frag *es_frag,
+                                      struct prefix_evpn *p)
 {
        int ret = 0;
        afi_t afi = AFI_L2VPN;
@@ -979,7 +988,7 @@ static int bgp_evpn_type1_route_update(struct bgp *bgp,
                /* MPLS label is 0 for EAD-ES route */
 
                /* Set up extended community */
-               bgp_evpn_type1_es_route_extcomm_build(es, &attr);
+               bgp_evpn_type1_es_route_extcomm_build(es_frag, &attr);
 
                /* First, create (or fetch) route node within the ES. */
                /* NOTE: There is no RD here. */
@@ -995,7 +1004,7 @@ static int bgp_evpn_type1_route_update(struct bgp *bgp,
                                "%u ERROR: Failed to updated EAD-EVI route ESI: %s VTEP %pI4",
                                bgp->vrf_id, es->esi_str, &es->originator_ip);
                }
-               global_rd = &es->prd;
+               global_rd = &es_frag->prd;
        }
 
 
@@ -1039,54 +1048,86 @@ static int bgp_evpn_type1_route_update(struct bgp *bgp,
  * table and advertise these routes to peers.
  */
 
+static void bgp_evpn_ead_es_route_update(struct bgp *bgp,
+                                        struct bgp_evpn_es *es)
+{
+       struct listnode *node;
+       struct bgp_evpn_es_frag *es_frag;
+       struct prefix_evpn p;
+
+       build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
+                               es->originator_ip);
+       for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+               if (!listcount(es_frag->es_evi_frag_list))
+                       continue;
+
+               p.prefix.ead_addr.frag_id = es_frag->rd_id;
+               if (bgp_evpn_type1_route_update(bgp, es, NULL, es_frag, &p))
+                       flog_err(
+                               EC_BGP_EVPN_ROUTE_CREATE,
+                               "EAD-ES route creation failure for ESI %s frag %u",
+                               es->esi_str, es_frag->rd_id);
+       }
+}
+
+static void bgp_evpn_ead_evi_route_update(struct bgp *bgp,
+                                         struct bgp_evpn_es *es,
+                                         struct bgpevpn *vpn,
+                                         struct prefix_evpn *p)
+{
+       if (bgp_evpn_type1_route_update(bgp, es, vpn, NULL, p))
+               flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+                        "EAD-EVI route creation failure for ESI %s VNI %u",
+                        es->esi_str, vpn->vni);
+}
+
 void update_type1_routes_for_evi(struct bgp *bgp, struct bgpevpn *vpn)
 {
        struct prefix_evpn p;
        struct bgp_evpn_es *es;
        struct bgp_evpn_es_evi *es_evi;
-       struct bgp_evpn_es_evi *es_evi_next;
 
-       RB_FOREACH_SAFE(es_evi, bgp_es_evi_rb_head,
-                       &vpn->es_evi_rb_tree, es_evi_next) {
+
+       RB_FOREACH (es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree) {
                es = es_evi->es;
 
+               if (es_evi->vpn != vpn)
+                       continue;
+
                /* Update EAD-ES */
-               if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) {
-                       build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
-                                               &es->esi, es->originator_ip);
-                       if (bgp_evpn_type1_route_update(bgp, es, NULL, &p))
-                               flog_err(EC_BGP_EVPN_ROUTE_CREATE,
-                                       "%u: EAD-ES route update failure for ESI %s VNI %u",
-                                       bgp->vrf_id, es->esi_str,
-                                       es_evi->vpn->vni);
-               }
+               bgp_evpn_ead_es_route_update(bgp, es);
 
                /* Update EAD-EVI */
                if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
                        build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
                                                &es->esi, es->originator_ip);
-                       if (bgp_evpn_type1_route_update(bgp, es, es_evi->vpn,
-                                                       &p))
-                               flog_err(EC_BGP_EVPN_ROUTE_DELETE,
-                                       "%u: EAD-EVI route update failure for ESI %s VNI %u",
-                                       bgp->vrf_id, es->esi_str,
-                                       es_evi->vpn->vni);
+                       bgp_evpn_ead_evi_route_update(bgp, es, vpn, &p);
                }
        }
 }
 
 /* Delete local Type-1 route */
-static int bgp_evpn_type1_es_route_delete(struct bgp *bgp,
-               struct bgp_evpn_es *es, struct prefix_evpn *p)
+static void bgp_evpn_ead_es_route_delete(struct bgp *bgp,
+                                        struct bgp_evpn_es *es)
 {
-       return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p);
+       struct listnode *node;
+       struct bgp_evpn_es_frag *es_frag;
+       struct prefix_evpn p;
+
+       build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
+                               es->originator_ip);
+       for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+               p.prefix.ead_addr.frag_id = es_frag->rd_id;
+               bgp_evpn_mh_route_delete(bgp, es, NULL, es_frag, &p);
+       }
 }
 
-static int bgp_evpn_type1_evi_route_delete(struct bgp *bgp,
-               struct bgp_evpn_es *es, struct bgpevpn *vpn,
-               struct prefix_evpn *p)
+static int bgp_evpn_ead_evi_route_delete(struct bgp *bgp,
+                                        struct bgp_evpn_es *es,
+                                        struct bgpevpn *vpn,
+                                        struct prefix_evpn *p)
 {
-       return bgp_evpn_mh_route_delete(bgp, es, vpn, p);
+       return bgp_evpn_mh_route_delete(bgp, es, vpn, NULL, p);
 }
 
 /* Generate EAD-EVI for all VNIs */
@@ -1112,10 +1153,7 @@ static void bgp_evpn_local_type1_evi_route_add(struct bgp *bgp,
        for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) {
                if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
                        continue;
-               if (bgp_evpn_type1_route_update(bgp, es, es_evi->vpn, &p))
-                       flog_err(EC_BGP_EVPN_ROUTE_CREATE,
-                                       "%u: Type4 route creation failure for ESI %s",
-                                       bgp->vrf_id, es->esi_str);
+               bgp_evpn_ead_evi_route_update(bgp, es, es_evi->vpn, &p);
        }
 }
 
@@ -1140,7 +1178,7 @@ static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp,
        for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) {
                if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
                        continue;
-               if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, &p))
+               if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, NULL, &p))
                        flog_err(EC_BGP_EVPN_ROUTE_CREATE,
                                        "%u: Type4 route creation failure for ESI %s",
                                        bgp->vrf_id, es->esi_str);
@@ -1209,7 +1247,6 @@ void bgp_evpn_mh_config_ead_export_rt(struct bgp *bgp,
 {
        struct listnode *node, *nnode, *node_to_del;
        struct ecommunity *ecom;
-       struct prefix_evpn p;
        struct bgp_evpn_es *es;
 
        if (del) {
@@ -1255,9 +1292,6 @@ void bgp_evpn_mh_config_ead_export_rt(struct bgp *bgp,
                    !bgp_evpn_local_es_is_active(es))
                        continue;
 
-               build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
-                                       es->originator_ip);
-
                if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
                        zlog_debug(
                                "local ES %s del/re-add EAD route on export RT change",
@@ -1267,10 +1301,10 @@ void bgp_evpn_mh_config_ead_export_rt(struct bgp *bgp,
                 * withdraw EAD-ES. XXX - this should technically not be
                 * needed; can be removed after testing
                 */
-               bgp_evpn_type1_es_route_delete(bgp, es, &p);
+               bgp_evpn_ead_es_route_delete(bgp, es);
 
                /* generate EAD-ES */
-               bgp_evpn_type1_route_update(bgp, es, NULL, &p);
+               bgp_evpn_ead_es_route_update(bgp, es);
        }
 }
 
@@ -1687,6 +1721,167 @@ bgp_evpn_es_path_update_on_es_vrf_chg(struct bgp_evpn_es_vrf *es_vrf,
        }
 }
 
+static void bgp_evpn_es_frag_free(struct bgp_evpn_es_frag *es_frag)
+{
+       struct bgp_evpn_es *es = es_frag->es;
+
+       if (es->es_base_frag == es_frag)
+               es->es_base_frag = NULL;
+
+       if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+               zlog_debug("es %s frag %u free", es->esi_str, es_frag->rd_id);
+       list_delete_node(es->es_frag_list, &es_frag->es_listnode);
+
+       /* EVIs that are advertised using the info in this fragment */
+       list_delete(&es_frag->es_evi_frag_list);
+
+       bf_release_index(bm->rd_idspace, es_frag->rd_id);
+
+
+       XFREE(MTYPE_BGP_EVPN_ES_FRAG, es_frag);
+}
+
+static void bgp_evpn_es_frag_free_unused(struct bgp_evpn_es_frag *es_frag)
+{
+       if ((es_frag->es->es_base_frag == es_frag) ||
+           listcount(es_frag->es_evi_frag_list))
+               return;
+
+       bgp_evpn_es_frag_free(es_frag);
+}
+
+static void bgp_evpn_es_frag_free_all(struct bgp_evpn_es *es)
+{
+       struct listnode *node;
+       struct listnode *nnode;
+       struct bgp_evpn_es_frag *es_frag;
+
+       for (ALL_LIST_ELEMENTS(es->es_frag_list, node, nnode, es_frag))
+               bgp_evpn_es_frag_free(es_frag);
+}
+
+static struct bgp_evpn_es_frag *bgp_evpn_es_frag_new(struct bgp_evpn_es *es)
+{
+       struct bgp_evpn_es_frag *es_frag;
+       char buf[BGP_EVPN_PREFIX_RD_LEN];
+       struct bgp *bgp;
+
+       es_frag = XCALLOC(MTYPE_BGP_EVPN_ES_FRAG, sizeof(*es_frag));
+       bf_assign_index(bm->rd_idspace, es_frag->rd_id);
+       es_frag->prd.family = AF_UNSPEC;
+       es_frag->prd.prefixlen = 64;
+       bgp = bgp_get_evpn();
+       snprintfrr(buf, sizeof(buf), "%pI4:%hu", &bgp->router_id,
+                  es_frag->rd_id);
+       (void)str2prefix_rd(buf, &es_frag->prd);
+
+       /* EVIs that are advertised using the info in this fragment */
+       es_frag->es_evi_frag_list = list_new();
+       listset_app_node_mem(es_frag->es_evi_frag_list);
+
+       /* Link the fragment to the parent ES */
+       es_frag->es = es;
+       listnode_init(&es_frag->es_listnode, es_frag);
+       listnode_add(es->es_frag_list, &es_frag->es_listnode);
+
+       if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+               zlog_debug("es %s frag %u new", es->esi_str, es_frag->rd_id);
+       return es_frag;
+}
+
+static struct bgp_evpn_es_frag *
+bgp_evpn_es_find_frag_with_space(struct bgp_evpn_es *es)
+{
+       struct listnode *node;
+       struct bgp_evpn_es_frag *es_frag;
+
+       for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+               if (listcount(es_frag->es_evi_frag_list)
+                   < BGP_EVPN_MAX_EVI_PER_ES_FRAG)
+                       return es_frag;
+       }
+
+       /* No frags where found with space; allocate a new one */
+       return bgp_evpn_es_frag_new(es);
+}
+
+/* Link the ES-EVI to one of the ES fragments */
+static void bgp_evpn_es_frag_evi_add(struct bgp_evpn_es_evi *es_evi)
+{
+       struct bgp_evpn_es_frag *es_frag;
+       struct bgp_evpn_es *es = es_evi->es;
+
+       if (es_evi->es_frag ||
+           !(CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)))
+               return;
+
+       es_frag = bgp_evpn_es_find_frag_with_space(es);
+
+       es_evi->es_frag = es_frag;
+       listnode_init(&es_evi->es_frag_listnode, es_evi);
+       listnode_add(es_frag->es_evi_frag_list, &es_evi->es_frag_listnode);
+
+       if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+               zlog_debug("es %s vni %d linked to frag %u", es->esi_str,
+                          es_evi->vpn->vni, es_frag->rd_id);
+}
+
+/* UnLink the ES-EVI from the ES fragment */
+static void bgp_evpn_es_frag_evi_del(struct bgp_evpn_es_evi *es_evi,
+                                    bool send_ead_del_if_empty)
+{
+       struct bgp_evpn_es_frag *es_frag = es_evi->es_frag;
+       struct prefix_evpn p;
+       struct bgp_evpn_es *es;
+       struct bgp *bgp;
+
+       if (!es_frag)
+               return;
+
+       es = es_frag->es;
+       es_evi->es_frag = NULL;
+       if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+               zlog_debug("es %s vni %d unlinked from frag %u", es->esi_str,
+                          es_evi->vpn->vni, es_frag->rd_id);
+
+       list_delete_node(es_frag->es_evi_frag_list, &es_evi->es_frag_listnode);
+
+       /*
+        * if there are no other EVIs on the fragment deleted the EAD-ES for
+        * the fragment
+        */
+       if (send_ead_del_if_empty && !listcount(es_frag->es_evi_frag_list)) {
+               bgp = bgp_get_evpn();
+
+               if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+                       zlog_debug("es %s frag %u ead-es route delete",
+                                  es->esi_str, es_frag->rd_id);
+               build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
+                                       es->originator_ip);
+               p.prefix.ead_addr.frag_id = es_frag->rd_id;
+               bgp_evpn_mh_route_delete(bgp, es, NULL, es_frag, &p);
+       }
+
+       /* We don't attempt to coalesce frags that may not be full. Instead we
+        * only free up the frag when it is completely empty.
+        */
+       bgp_evpn_es_frag_free_unused(es_frag);
+}
+
+/* Link the ES-EVIs to one of the ES fragments */
+static void bgp_evpn_es_frag_evi_update_all(struct bgp_evpn_es *es, bool add)
+{
+       struct listnode *node;
+       struct bgp_evpn_es_evi *es_evi;
+
+       for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, node, es_evi)) {
+               if (add)
+                       bgp_evpn_es_frag_evi_add(es_evi);
+               else
+                       bgp_evpn_es_frag_evi_del(es_evi, false);
+       }
+}
+
 /* compare ES-IDs for the global ES RB tree */
 static int bgp_es_rb_cmp(const struct bgp_evpn_es *es1,
                const struct bgp_evpn_es *es2)
@@ -1744,6 +1939,8 @@ static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi)
        listset_app_node_mem(es->macip_evi_path_list);
        es->macip_global_path_list = list_new();
        listset_app_node_mem(es->macip_global_path_list);
+       es->es_frag_list = list_new();
+       listset_app_node_mem(es->es_frag_list);
 
        QOBJ_REG(es, bgp_evpn_es);
 
@@ -1770,6 +1967,7 @@ static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller)
        list_delete(&es->es_vtep_list);
        list_delete(&es->macip_evi_path_list);
        list_delete(&es->macip_global_path_list);
+       list_delete(&es->es_frag_list);
        bgp_table_unlock(es->route_table);
 
        /* remove the entry from various databases */
@@ -1789,7 +1987,6 @@ static inline bool bgp_evpn_is_es_local_and_non_bypass(struct bgp_evpn_es *es)
 /* init local info associated with the ES */
 static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es)
 {
-       char buf[BGP_EVPN_PREFIX_RD_LEN];
        bool old_is_local;
        bool is_local;
 
@@ -1802,12 +1999,12 @@ static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es)
        listnode_init(&es->es_listnode, es);
        listnode_add(bgp_mh_info->local_es_list, &es->es_listnode);
 
-       /* auto derive RD for this es */
-       bf_assign_index(bm->rd_idspace, es->rd_id);
-       es->prd.family = AF_UNSPEC;
-       es->prd.prefixlen = 64;
-       snprintfrr(buf, sizeof(buf), "%pI4:%hu", &bgp->router_id, es->rd_id);
-       (void)str2prefix_rd(buf, &es->prd);
+       /* setup the first ES fragment; more fragments may be allocated based
+        * on the the number of EVI entries
+        */
+       es->es_base_frag = bgp_evpn_es_frag_new(es);
+       /* distribute ES-EVIs to one or more ES fragments */
+       bgp_evpn_es_frag_evi_update_all(es, true);
 
        is_local = bgp_evpn_is_es_local_and_non_bypass(es);
        if (old_is_local != is_local)
@@ -1823,6 +2020,11 @@ static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es, bool finish)
        if (!CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL))
                return;
 
+       /* clear the es frag references and free them up */
+       bgp_evpn_es_frag_evi_update_all(es, false);
+       es->es_base_frag = NULL;
+       bgp_evpn_es_frag_free_all(es);
+
        old_is_local = bgp_evpn_is_es_local_and_non_bypass(es);
        UNSET_FLAG(es->flags, BGP_EVPNES_LOCAL);
 
@@ -1833,8 +2035,6 @@ static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es, bool finish)
        /* remove from the ES local list */
        list_delete_node(bgp_mh_info->local_es_list, &es->es_listnode);
 
-       bf_release_index(bm->rd_idspace, es->rd_id);
-
        bgp_evpn_es_free(es, __func__);
 }
 
@@ -1995,14 +2195,7 @@ static void bgp_evpn_local_es_deactivate(struct bgp *bgp,
                bgp_evpn_local_type1_evi_route_del(bgp, es);
 
        /* withdraw EAD-ES */
-       build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
-                       &es->esi, es->originator_ip);
-       ret = bgp_evpn_type1_es_route_delete(bgp, es, &p);
-       if (ret) {
-               flog_err(EC_BGP_EVPN_ROUTE_DELETE,
-                               "%u failed to delete type-1 route for ESI %s",
-                               bgp->vrf_id, es->esi_str);
-       }
+       bgp_evpn_ead_es_route_delete(bgp, es);
 
        bgp_evpn_mac_update_on_es_oper_chg(es);
 }
@@ -2048,9 +2241,7 @@ static void bgp_evpn_local_es_activate(struct bgp *bgp, struct bgp_evpn_es *es,
                bgp_evpn_local_type1_evi_route_add(bgp, es);
 
                /* generate EAD-ES */
-               build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
-                                       es->originator_ip);
-               (void)bgp_evpn_type1_route_update(bgp, es, NULL, &p);
+               bgp_evpn_ead_es_route_update(bgp, es);
        }
 
        bgp_evpn_mac_update_on_es_oper_chg(es);
@@ -2233,6 +2424,41 @@ int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
        return 0;
 }
 
+static void bgp_evpn_es_json_frag_fill(json_object *json_frags,
+                                      struct bgp_evpn_es *es)
+{
+       json_object *json_frag;
+       char buf1[RD_ADDRSTRLEN];
+       struct listnode *node;
+       struct bgp_evpn_es_frag *es_frag;
+
+       for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+               json_frag = json_object_new_object();
+
+               json_object_string_add(
+                       json_frag, "rd",
+                       prefix_rd2str(&es_frag->prd, buf1, sizeof(buf1)));
+               json_object_int_add(json_frag, "eviCount",
+                                   listcount(es_frag->es_evi_frag_list));
+
+               json_object_array_add(json_frags, json_frag);
+       }
+}
+
+static void bgp_evpn_es_frag_show_detail(struct vty *vty,
+                                        struct bgp_evpn_es *es)
+{
+       struct listnode *node;
+       char buf1[RD_ADDRSTRLEN];
+       struct bgp_evpn_es_frag *es_frag;
+
+       for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) {
+               vty_out(vty, "  %s EVIs: %d\n",
+                       prefix_rd2str(&es_frag->prd, buf1, sizeof(buf1)),
+                       listcount(es_frag->es_evi_frag_list));
+       }
+}
+
 static char *bgp_evpn_es_vteps_str(char *vtep_str, struct bgp_evpn_es *es,
                                   uint8_t vtep_str_size)
 {
@@ -2342,9 +2568,11 @@ static void bgp_evpn_es_show_entry(struct vty *vty,
                json_object *json_types;
 
                json_object_string_add(json, "esi", es->esi_str);
-               json_object_string_add(json, "rd",
-                               prefix_rd2str(&es->prd, buf1,
-                                       sizeof(buf1)));
+               if (es->es_base_frag)
+                       json_object_string_add(
+                               json, "rd",
+                               prefix_rd2str(&es->es_base_frag->prd, buf1,
+                                             sizeof(buf1)));
 
                if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) {
                        json_types = json_object_new_array();
@@ -2381,8 +2609,9 @@ static void bgp_evpn_es_show_entry(struct vty *vty,
 
                bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str));
 
-               if (es->flags & BGP_EVPNES_LOCAL)
-                       prefix_rd2str(&es->prd, buf1, sizeof(buf1));
+               if (es->es_base_frag)
+                       prefix_rd2str(&es->es_base_frag->prd, buf1,
+                                     sizeof(buf1));
                else
                        strlcpy(buf1, "-", sizeof(buf1));
 
@@ -2399,6 +2628,7 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                json_object *json_flags;
                json_object *json_incons;
                json_object *json_vteps;
+               json_object *json_frags;
                struct listnode *node;
                struct bgp_evpn_es_vtep *es_vtep;
 
@@ -2437,6 +2667,11 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                        }
                        json_object_object_add(json, "vteps", json_vteps);
                }
+               if (listcount(es->es_frag_list)) {
+                       json_frags = json_object_new_array();
+                       bgp_evpn_es_json_frag_fill(json_frags, es);
+                       json_object_object_add(json, "fragments", json_frags);
+               }
                if (es->inconsistencies) {
                        json_incons = json_object_new_array();
                        if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST)
@@ -2456,8 +2691,9 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                if (es->flags & BGP_EVPNES_REMOTE)
                        strlcat(type_str, "R", sizeof(type_str));
 
-               if (es->flags & BGP_EVPNES_LOCAL)
-                       prefix_rd2str(&es->prd, buf1, sizeof(buf1));
+               if (es->es_base_frag)
+                       prefix_rd2str(&es->es_base_frag->prd, buf1,
+                                     sizeof(buf1));
                else
                        strlcpy(buf1, "-", sizeof(buf1));
 
@@ -2490,6 +2726,10 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                }
                vty_out(vty, " Inconsistencies: %s\n",
                                incons_str);
+               if (listcount(es->es_frag_list)) {
+                       vty_out(vty, " Fragments:\n");
+                       bgp_evpn_es_frag_show_detail(vty, es);
+               }
                if (listcount(es->es_vtep_list)) {
                        vty_out(vty, " VTEPs:\n");
                        bgp_evpn_es_vteps_show_detail(vty, es);
@@ -3362,7 +3602,7 @@ bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi)
         */
        if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | BGP_EVPNES_EVI_REMOTE))
                return es_evi;
-
+       bgp_evpn_es_frag_evi_del(es_evi, false);
        bgp_evpn_es_vrf_deref(es_evi);
 
        /* remove from the ES's VNI list */
@@ -3391,6 +3631,7 @@ static void bgp_evpn_es_evi_local_info_set(struct bgp_evpn_es_evi *es_evi)
        SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL);
        listnode_init(&es_evi->l2vni_listnode, es_evi);
        listnode_add(vpn->local_es_evi_list, &es_evi->l2vni_listnode);
+       bgp_evpn_es_frag_evi_add(es_evi);
 }
 
 /* clear any local info associated with the ES-EVI */
@@ -3449,24 +3690,19 @@ bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi)
 
        bgp = bgp_get_evpn();
 
+       /* remove the es_evi from the es_frag before sending the update */
+       bgp_evpn_es_frag_evi_del(es_evi, true);
        if (bgp) {
                /* update EAD-ES with new list of VNIs */
-               if (bgp_evpn_local_es_is_active(es)) {
-                       build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
-                                       &es->esi, es->originator_ip);
-                       if (bgp_evpn_type1_route_update(bgp, es, NULL, &p))
-                               flog_err(EC_BGP_EVPN_ROUTE_CREATE,
-                                       "%u: EAD-ES route update failure for ESI %s VNI %u",
-                                       bgp->vrf_id, es->esi_str,
-                                       es_evi->vpn->vni);
-               }
+               if (bgp_evpn_local_es_is_active(es))
+                       bgp_evpn_ead_es_route_update(bgp, es);
 
                /* withdraw and delete EAD-EVI */
                if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
                        build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
                                        &es->esi, es->originator_ip);
-                       if (bgp_evpn_type1_evi_route_delete(bgp,
-                                               es, es_evi->vpn, &p))
+                       if (bgp_evpn_ead_evi_route_delete(bgp, es, es_evi->vpn,
+                                                         &p))
                                flog_err(EC_BGP_EVPN_ROUTE_DELETE,
                                        "%u: EAD-EVI route deletion failure for ESI %s VNI %u",
                                        bgp->vrf_id, es->esi_str,
@@ -3566,21 +3802,12 @@ int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni)
        if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
                build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, &es->esi,
                                        es->originator_ip);
-               if (bgp_evpn_type1_route_update(bgp, es, vpn, &p))
-                       flog_err(EC_BGP_EVPN_ROUTE_CREATE,
-                                       "%u: EAD-EVI route creation failure for ESI %s VNI %u",
-                                       bgp->vrf_id, es->esi_str, vni);
+               bgp_evpn_ead_evi_route_update(bgp, es, vpn, &p);
        }
 
        /* update EAD-ES */
-       build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
-                       &es->esi, es->originator_ip);
-       if (bgp_evpn_local_es_is_active(es)) {
-               if (bgp_evpn_type1_route_update(bgp, es, NULL, &p))
-                       flog_err(EC_BGP_EVPN_ROUTE_CREATE,
-                                       "%u: EAD-ES route creation failure for ESI %s VNI %u",
-                                       bgp->vrf_id, es->esi_str, vni);
-       }
+       if (bgp_evpn_local_es_is_active(es))
+               bgp_evpn_ead_es_route_update(bgp, es);
 
        return 0;
 }
@@ -3857,11 +4084,18 @@ static void bgp_evpn_es_evi_show_entry(struct vty *vty,
 static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty,
                struct bgp_evpn_es_evi *es_evi, json_object *json)
 {
+       char buf1[RD_ADDRSTRLEN];
+
        if (json) {
                json_object *json_flags;
 
                /* Add the "brief" info first */
                bgp_evpn_es_evi_show_entry(vty, es_evi, json);
+               if (es_evi->es_frag)
+                       json_object_string_add(
+                               json, "esFragmentRd",
+                               prefix_rd2str(&es_evi->es_frag->prd, buf1,
+                                             sizeof(buf1)));
                if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) {
                        json_flags = json_object_new_array();
                        json_array_string_add(json_flags, "es-vtep-mismatch");
@@ -3884,6 +4118,10 @@ static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty,
                vty_out(vty, "VNI: %d ESI: %s\n",
                                es_evi->vpn->vni, es_evi->es->esi_str);
                vty_out(vty, " Type: %s\n", type_str);
+               if (es_evi->es_frag)
+                       vty_out(vty, " ES fragment RD: %s\n",
+                               prefix_rd2str(&es_evi->es_frag->prd, buf1,
+                                             sizeof(buf1)));
                vty_out(vty, " Inconsistencies: %s\n",
                        (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) ?
                        "es-vtep-mismatch":"-");
index 5d328802171a6626f66f4932a9c8c1ab8125dcc1..7a3a4bde944556d9f69b4665dead175098691734 100644 (file)
 
 #define BGP_EVPN_MH_USE_ES_L3NHG_DEF true
 
+/* XXX - tune this */
+#define BGP_EVPN_MAX_EVI_PER_ES_FRAG 128
+
+/* An ES can result in multiple EAD-per-ES route. Each EAD fragment is
+ * associated with an unique RD
+ */
+struct bgp_evpn_es_frag {
+       /* frag is associated with a parent ES */
+       struct bgp_evpn_es *es;
+
+       /* Id for deriving the RD automatically for this ES fragment */
+       uint16_t rd_id;
+       /* RD for this ES fragment */
+       struct prefix_rd prd;
+
+       /* Memory used for linking bgp_evpn_es_rd to bgp_evpn_es->rd_list */
+       struct listnode es_listnode;
+
+       /* List of ES-EVIs associated with this fragment */
+       struct list *es_evi_frag_list;
+};
+
 /* Ethernet Segment entry -
  * - Local and remote ESs are maintained in a global RB tree,
  * bgp_mh_info->es_rb_tree using ESI as key
@@ -79,11 +101,9 @@ struct bgp_evpn_es {
         */
        struct listnode pend_es_listnode;
 
-       /* [EVPNES_LOCAL] Id for deriving the RD automatically for this ESI */
-       uint16_t rd_id;
-
-       /* [EVPNES_LOCAL] RD for this ES */
-       struct prefix_rd prd;
+       /* [EVPNES_LOCAL] List of RDs for this ES (bgp_evpn_es_rd) */
+       struct list *es_frag_list;
+       struct bgp_evpn_es_frag *es_base_frag;
 
        /* [EVPNES_LOCAL] originator ip address  */
        struct in_addr originator_ip;
@@ -203,6 +223,8 @@ struct bgp_evpn_es_vrf {
  */
 struct bgp_evpn_es_evi {
        struct bgp_evpn_es *es;
+       /* Only applicableif EVI_LOCAL */
+       struct bgp_evpn_es_frag *es_frag;
        struct bgpevpn *vpn;
 
        /* ES-EVI flags */
@@ -224,6 +246,10 @@ struct bgp_evpn_es_evi {
         */
        struct listnode es_listnode;
 
+       /* memory used for linking the es_evi to
+        * es_evi->es_frag->es_evi_frag_list
+        */
+       struct listnode es_frag_listnode;
        /* list of PEs (bgp_evpn_es_evi_vtep) attached to the ES for this VNI */
        struct list *es_evi_vtep_list;
 
index 0614dbaea470377f593b5a9f107c5a41aedb7215..fd30cc2db3ae1fa3fa60f89b61fba42ca671f7c4 100644 (file)
@@ -532,6 +532,7 @@ static inline void evpn_type1_prefix_global_copy(struct prefix_evpn *global_p,
        memcpy(global_p, vni_p, sizeof(*global_p));
        global_p->prefix.ead_addr.ip.ipa_type = 0;
        global_p->prefix.ead_addr.ip.ipaddr_v4.s_addr = INADDR_ANY;
+       global_p->prefix.ead_addr.frag_id = 0;
 }
 
 /* EAD prefix in the global table doesn't include the VTEP-IP so
index 8622e13aa7a80370b605a0e74e991a854a529c51..4ab188832c7e7124b6d0a616fd7b1732a9d95401 100644 (file)
@@ -358,7 +358,7 @@ static void bgp_evpn_show_route_header(struct vty *vty, struct bgp *bgp,
                "Status codes: s suppressed, d damped, h history, * valid, > best, i - internal\n");
        vty_out(vty, "Origin codes: i - IGP, e - EGP, ? - incomplete\n");
        vty_out(vty,
-               "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]\n");
+               "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]:[Frag-id]\n");
        vty_out(vty,
                "EVPN type-2 prefix: [2]:[EthTag]:[MAClen]:[MAC]:[IPlen]:[IP]\n");
        vty_out(vty, "EVPN type-3 prefix: [3]:[EthTag]:[IPlen]:[OrigIP]\n");
@@ -2712,7 +2712,7 @@ static void evpn_show_route_rd(struct vty *vty, struct bgp *bgp,
                        /* RD header and legend - once overall. */
                        if (rd_header && !json) {
                                vty_out(vty,
-                                       "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]\n");
+                                       "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]:[Frag-id]\n");
                                vty_out(vty,
                                        "EVPN type-2 prefix: [2]:[EthTag]:[MAClen]:[MAC]\n");
                                vty_out(vty,
index ffb1ec162b5d480f8b953e199a81aabdef20d1c2..a994b536c4116c34f69fd487f779ff96ef004d5d 100644 (file)
@@ -126,6 +126,7 @@ DEFINE_MTYPE(BGPD, BGP_EVPN_PATH_NH_INFO, "BGP EVPN PATH NH Information");
 DEFINE_MTYPE(BGPD, BGP_EVPN_NH, "BGP EVPN Nexthop");
 DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI_VTEP, "BGP EVPN ES-EVI VTEP");
 DEFINE_MTYPE(BGPD, BGP_EVPN_ES, "BGP EVPN ESI Information");
+DEFINE_MTYPE(BGPD, BGP_EVPN_ES_FRAG, "BGP EVPN ES Fragment Information");
 DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI, "BGP EVPN ES-per-EVI Information");
 DEFINE_MTYPE(BGPD, BGP_EVPN_ES_VRF, "BGP EVPN ES-per-VRF Information");
 DEFINE_MTYPE(BGPD, BGP_EVPN_IMPORT_RT, "BGP EVPN Import RT");
index 63e7b40ef786456dd7faf9182cbdd4e2e9b6437f..76b2f9f56a9ec7d17297619f8a0191d155dce554 100644 (file)
@@ -115,6 +115,7 @@ DECLARE_MTYPE(LCOMMUNITY_VAL);
 
 DECLARE_MTYPE(BGP_EVPN_MH_INFO);
 DECLARE_MTYPE(BGP_EVPN_ES);
+DECLARE_MTYPE(BGP_EVPN_ES_FRAG);
 DECLARE_MTYPE(BGP_EVPN_ES_EVI);
 DECLARE_MTYPE(BGP_EVPN_ES_VRF);
 DECLARE_MTYPE(BGP_EVPN_ES_VTEP);
index 89c5be8f381134081db4293321697407f02d6aed..4db0c2478b1f382f533953d85ad40fe1289ef963 100644 (file)
@@ -915,12 +915,13 @@ static const char *prefixevpn_ead2str(const struct prefix_evpn *p, char *str,
        char buf1[INET6_ADDRSTRLEN];
 
        family = IS_IPADDR_V4(&p->prefix.ead_addr.ip) ? AF_INET : AF_INET6;
-       snprintf(str, size, "[%d]:[%u]:[%s]:[%d]:[%s]", p->prefix.route_type,
-                p->prefix.ead_addr.eth_tag,
+       snprintf(str, size, "[%d]:[%u]:[%s]:[%d]:[%s]:[%u]",
+                p->prefix.route_type, p->prefix.ead_addr.eth_tag,
                 esi_to_str(&p->prefix.ead_addr.esi, buf, sizeof(buf)),
                 (family == AF_INET) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN,
                 inet_ntop(family, &p->prefix.ead_addr.ip.ipaddr_v4, buf1,
-                          sizeof(buf1)));
+                          sizeof(buf1)),
+                p->prefix.ead_addr.frag_id);
        return str;
 }
 
index b3545a72b4701eab1566f5d06e16b59758742c51..816a1517e17a15c49f0ea5b3a282bbd581136820 100644 (file)
@@ -95,6 +95,7 @@ struct evpn_ead_addr {
        esi_t esi;
        uint32_t eth_tag;
        struct ipaddr ip;
+       uint16_t frag_id;
 };
 
 struct evpn_macip_addr {