From 7b0db0e43fd470e2d386c36722069c0f8730f6c8 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Thu, 3 Jun 2021 17:28:43 -0700 Subject: [PATCH] lib, bgpd: changes for EAD-per-ES fragmentation The EAD-per-ES route carries ECs for all the ES-EVI RTs. As the number of VNIs increase all RTs do not fit into a standard BGP UPDATE (4K) so the route needs to be fragmented. Each fragment is associated with a separate RD and frag-id - 1. Local ES-per-EAD - ES route table - {ES-frag-ID, ESI, ET=0xffffffff, VTEP-IP} global route table - {RD-=ES-frag-RD, ESI, ET=0xffffffff} 2. Remote ES-per-EAD - VNI route table - {ESI, ET=0xffffffff, VTEP-IP} global route table - {RD-=ES-frag-RD, ESI, ET=0xffffffff} Note: The fragment ID is abandoned in the per-VNI routing table. At this point that is acceptable as we dont expect more than one-ES-per-EAD fragment to be imported into the per-VNI routing table. But that may need to be re-worked at a later point. CLI changes (sample with 4 VNIs per-fragment for experimental pruposes) - >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> root@torm-11:mgmt:~# vtysh -c "show bgp l2vpn evpn es 03:44:38:39:ff:ff:01:00:00:01" ESI: 03:44:38:39:ff:ff:01:00:00:01 Type: LR RD: 27.0.0.21:3 Originator-IP: 27.0.0.21 Local ES DF preference: 50000 VNI Count: 10 Remote VNI Count: 10 VRF Count: 3 MACIP EVI Path Count: 33 MACIP Global Path Count: 198 Inconsistent VNI VTEP Count: 0 Inconsistencies: - Fragments: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 27.0.0.21:3 EVIs: 4 27.0.0.21:13 EVIs: 4 27.0.0.21:22 EVIs: 2 VTEPs: 27.0.0.22 flags: EA df_alg: preference df_pref: 32767 27.0.0.23 flags: EA df_alg: preference df_pref: 32767 root@torm-11:mgmt:~# vtysh -c "show bgp l2vpn evpn es-evi vni 1002 detail" VNI: 1002 ESI: 03:44:38:39:ff:ff:01:00:00:01 Type: LR ES fragment RD: 27.0.0.21:13 >>>>>>>>>>>>>>>>>>>>>>>>> Inconsistencies: - VTEPs: 27.0.0.22(EV),27.0.0.23(EV) >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PS: The number of EVIs per-fragment has been set to 128 and may need further tuning. Ticket: #2632967 Signed-off-by: Anuradha Karuppiah --- bgpd/bgp_evpn_mh.c | 436 +++++++++++++++++++++++++++++++--------- bgpd/bgp_evpn_mh.h | 36 +++- bgpd/bgp_evpn_private.h | 1 + bgpd/bgp_evpn_vty.c | 4 +- bgpd/bgp_memory.c | 1 + bgpd/bgp_memory.h | 1 + lib/prefix.c | 7 +- lib/prefix.h | 1 + 8 files changed, 378 insertions(+), 109 deletions(-) diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c index 8bb0e5705d..5cca3f210d 100644 --- a/bgpd/bgp_evpn_mh.c +++ b/bgpd/bgp_evpn_mh.c @@ -462,7 +462,9 @@ int bgp_evpn_mh_route_update(struct bgp *bgp, struct bgp_evpn_es *es, * ESR). */ static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es, - struct bgpevpn *vpn, struct prefix_evpn *p) + struct bgpevpn *vpn, + struct bgp_evpn_es_frag *es_frag, + struct prefix_evpn *p) { afi_t afi = AFI_L2VPN; safi_t safi = SAFI_EVPN; @@ -477,7 +479,7 @@ static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es, prd = &vpn->prd; } else { rt_table = es->route_table; - prd = &es->prd; + prd = &es_frag->prd; } /* First, locate the route node within the ESI or VNI. @@ -680,7 +682,7 @@ static int bgp_evpn_type4_route_update(struct bgp *bgp, struct bgp_path_info *global_pi; dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, - p, &es->prd); + p, &es->es_base_frag->prd); bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, dest, attr_new, 1, &global_pi, &route_changed); @@ -699,7 +701,11 @@ static int bgp_evpn_type4_route_update(struct bgp *bgp, static int bgp_evpn_type4_route_delete(struct bgp *bgp, struct bgp_evpn_es *es, struct prefix_evpn *p) { - return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p); + if (!es->es_base_frag) + return -1; + + return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, + es->es_base_frag, p); } /* Process remote/received EVPN type-4 route (advertise or withdraw) */ @@ -845,8 +851,9 @@ static int bgp_evpn_type4_remote_routes_import(struct bgp *bgp, */ /* Extended communities associated with EAD-per-ES */ -static void bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es *es, - struct attr *attr) +static void +bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es_frag *es_frag, + struct attr *attr) { struct ecommunity ecom_encap; struct ecommunity ecom_esi_label; @@ -886,7 +893,8 @@ static void bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es *es, bgp_attr_set_ecommunity( attr, ecommunity_merge(attr->ecommunity, ecom)); } else { - for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) { + for (ALL_LIST_ELEMENTS_RO(es_frag->es_evi_frag_list, evi_node, + es_evi)) { if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) continue; for (ALL_LIST_ELEMENTS_RO(es_evi->vpn->export_rtl, @@ -931,9 +939,10 @@ static void bgp_evpn_type1_evi_route_extcomm_build(struct bgp_evpn_es *es, /* Update EVPN EAD (type-1) route - * vpn - valid for EAD-EVI routes and NULL for EAD-ES routes */ -static int bgp_evpn_type1_route_update(struct bgp *bgp, - struct bgp_evpn_es *es, struct bgpevpn *vpn, - struct prefix_evpn *p) +static int bgp_evpn_type1_route_update(struct bgp *bgp, struct bgp_evpn_es *es, + struct bgpevpn *vpn, + struct bgp_evpn_es_frag *es_frag, + struct prefix_evpn *p) { int ret = 0; afi_t afi = AFI_L2VPN; @@ -979,7 +988,7 @@ static int bgp_evpn_type1_route_update(struct bgp *bgp, /* MPLS label is 0 for EAD-ES route */ /* Set up extended community */ - bgp_evpn_type1_es_route_extcomm_build(es, &attr); + bgp_evpn_type1_es_route_extcomm_build(es_frag, &attr); /* First, create (or fetch) route node within the ES. */ /* NOTE: There is no RD here. */ @@ -995,7 +1004,7 @@ static int bgp_evpn_type1_route_update(struct bgp *bgp, "%u ERROR: Failed to updated EAD-EVI route ESI: %s VTEP %pI4", bgp->vrf_id, es->esi_str, &es->originator_ip); } - global_rd = &es->prd; + global_rd = &es_frag->prd; } @@ -1039,54 +1048,86 @@ static int bgp_evpn_type1_route_update(struct bgp *bgp, * table and advertise these routes to peers. */ +static void bgp_evpn_ead_es_route_update(struct bgp *bgp, + struct bgp_evpn_es *es) +{ + struct listnode *node; + struct bgp_evpn_es_frag *es_frag; + struct prefix_evpn p; + + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi, + es->originator_ip); + for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) { + if (!listcount(es_frag->es_evi_frag_list)) + continue; + + p.prefix.ead_addr.frag_id = es_frag->rd_id; + if (bgp_evpn_type1_route_update(bgp, es, NULL, es_frag, &p)) + flog_err( + EC_BGP_EVPN_ROUTE_CREATE, + "EAD-ES route creation failure for ESI %s frag %u", + es->esi_str, es_frag->rd_id); + } +} + +static void bgp_evpn_ead_evi_route_update(struct bgp *bgp, + struct bgp_evpn_es *es, + struct bgpevpn *vpn, + struct prefix_evpn *p) +{ + if (bgp_evpn_type1_route_update(bgp, es, vpn, NULL, p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "EAD-EVI route creation failure for ESI %s VNI %u", + es->esi_str, vpn->vni); +} + void update_type1_routes_for_evi(struct bgp *bgp, struct bgpevpn *vpn) { struct prefix_evpn p; struct bgp_evpn_es *es; struct bgp_evpn_es_evi *es_evi; - struct bgp_evpn_es_evi *es_evi_next; - RB_FOREACH_SAFE(es_evi, bgp_es_evi_rb_head, - &vpn->es_evi_rb_tree, es_evi_next) { + + RB_FOREACH (es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree) { es = es_evi->es; + if (es_evi->vpn != vpn) + continue; + /* Update EAD-ES */ - if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) { - build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, - &es->esi, es->originator_ip); - if (bgp_evpn_type1_route_update(bgp, es, NULL, &p)) - flog_err(EC_BGP_EVPN_ROUTE_CREATE, - "%u: EAD-ES route update failure for ESI %s VNI %u", - bgp->vrf_id, es->esi_str, - es_evi->vpn->vni); - } + bgp_evpn_ead_es_route_update(bgp, es); /* Update EAD-EVI */ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) { build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, &es->esi, es->originator_ip); - if (bgp_evpn_type1_route_update(bgp, es, es_evi->vpn, - &p)) - flog_err(EC_BGP_EVPN_ROUTE_DELETE, - "%u: EAD-EVI route update failure for ESI %s VNI %u", - bgp->vrf_id, es->esi_str, - es_evi->vpn->vni); + bgp_evpn_ead_evi_route_update(bgp, es, vpn, &p); } } } /* Delete local Type-1 route */ -static int bgp_evpn_type1_es_route_delete(struct bgp *bgp, - struct bgp_evpn_es *es, struct prefix_evpn *p) +static void bgp_evpn_ead_es_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es) { - return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p); + struct listnode *node; + struct bgp_evpn_es_frag *es_frag; + struct prefix_evpn p; + + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi, + es->originator_ip); + for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) { + p.prefix.ead_addr.frag_id = es_frag->rd_id; + bgp_evpn_mh_route_delete(bgp, es, NULL, es_frag, &p); + } } -static int bgp_evpn_type1_evi_route_delete(struct bgp *bgp, - struct bgp_evpn_es *es, struct bgpevpn *vpn, - struct prefix_evpn *p) +static int bgp_evpn_ead_evi_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es, + struct bgpevpn *vpn, + struct prefix_evpn *p) { - return bgp_evpn_mh_route_delete(bgp, es, vpn, p); + return bgp_evpn_mh_route_delete(bgp, es, vpn, NULL, p); } /* Generate EAD-EVI for all VNIs */ @@ -1112,10 +1153,7 @@ static void bgp_evpn_local_type1_evi_route_add(struct bgp *bgp, for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) { if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) continue; - if (bgp_evpn_type1_route_update(bgp, es, es_evi->vpn, &p)) - flog_err(EC_BGP_EVPN_ROUTE_CREATE, - "%u: Type4 route creation failure for ESI %s", - bgp->vrf_id, es->esi_str); + bgp_evpn_ead_evi_route_update(bgp, es, es_evi->vpn, &p); } } @@ -1140,7 +1178,7 @@ static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp, for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) { if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) continue; - if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, &p)) + if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, NULL, &p)) flog_err(EC_BGP_EVPN_ROUTE_CREATE, "%u: Type4 route creation failure for ESI %s", bgp->vrf_id, es->esi_str); @@ -1209,7 +1247,6 @@ void bgp_evpn_mh_config_ead_export_rt(struct bgp *bgp, { struct listnode *node, *nnode, *node_to_del; struct ecommunity *ecom; - struct prefix_evpn p; struct bgp_evpn_es *es; if (del) { @@ -1255,9 +1292,6 @@ void bgp_evpn_mh_config_ead_export_rt(struct bgp *bgp, !bgp_evpn_local_es_is_active(es)) continue; - build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi, - es->originator_ip); - if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) zlog_debug( "local ES %s del/re-add EAD route on export RT change", @@ -1267,10 +1301,10 @@ void bgp_evpn_mh_config_ead_export_rt(struct bgp *bgp, * withdraw EAD-ES. XXX - this should technically not be * needed; can be removed after testing */ - bgp_evpn_type1_es_route_delete(bgp, es, &p); + bgp_evpn_ead_es_route_delete(bgp, es); /* generate EAD-ES */ - bgp_evpn_type1_route_update(bgp, es, NULL, &p); + bgp_evpn_ead_es_route_update(bgp, es); } } @@ -1687,6 +1721,167 @@ bgp_evpn_es_path_update_on_es_vrf_chg(struct bgp_evpn_es_vrf *es_vrf, } } +static void bgp_evpn_es_frag_free(struct bgp_evpn_es_frag *es_frag) +{ + struct bgp_evpn_es *es = es_frag->es; + + if (es->es_base_frag == es_frag) + es->es_base_frag = NULL; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s frag %u free", es->esi_str, es_frag->rd_id); + list_delete_node(es->es_frag_list, &es_frag->es_listnode); + + /* EVIs that are advertised using the info in this fragment */ + list_delete(&es_frag->es_evi_frag_list); + + bf_release_index(bm->rd_idspace, es_frag->rd_id); + + + XFREE(MTYPE_BGP_EVPN_ES_FRAG, es_frag); +} + +static void bgp_evpn_es_frag_free_unused(struct bgp_evpn_es_frag *es_frag) +{ + if ((es_frag->es->es_base_frag == es_frag) || + listcount(es_frag->es_evi_frag_list)) + return; + + bgp_evpn_es_frag_free(es_frag); +} + +static void bgp_evpn_es_frag_free_all(struct bgp_evpn_es *es) +{ + struct listnode *node; + struct listnode *nnode; + struct bgp_evpn_es_frag *es_frag; + + for (ALL_LIST_ELEMENTS(es->es_frag_list, node, nnode, es_frag)) + bgp_evpn_es_frag_free(es_frag); +} + +static struct bgp_evpn_es_frag *bgp_evpn_es_frag_new(struct bgp_evpn_es *es) +{ + struct bgp_evpn_es_frag *es_frag; + char buf[BGP_EVPN_PREFIX_RD_LEN]; + struct bgp *bgp; + + es_frag = XCALLOC(MTYPE_BGP_EVPN_ES_FRAG, sizeof(*es_frag)); + bf_assign_index(bm->rd_idspace, es_frag->rd_id); + es_frag->prd.family = AF_UNSPEC; + es_frag->prd.prefixlen = 64; + bgp = bgp_get_evpn(); + snprintfrr(buf, sizeof(buf), "%pI4:%hu", &bgp->router_id, + es_frag->rd_id); + (void)str2prefix_rd(buf, &es_frag->prd); + + /* EVIs that are advertised using the info in this fragment */ + es_frag->es_evi_frag_list = list_new(); + listset_app_node_mem(es_frag->es_evi_frag_list); + + /* Link the fragment to the parent ES */ + es_frag->es = es; + listnode_init(&es_frag->es_listnode, es_frag); + listnode_add(es->es_frag_list, &es_frag->es_listnode); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s frag %u new", es->esi_str, es_frag->rd_id); + return es_frag; +} + +static struct bgp_evpn_es_frag * +bgp_evpn_es_find_frag_with_space(struct bgp_evpn_es *es) +{ + struct listnode *node; + struct bgp_evpn_es_frag *es_frag; + + for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) { + if (listcount(es_frag->es_evi_frag_list) + < BGP_EVPN_MAX_EVI_PER_ES_FRAG) + return es_frag; + } + + /* No frags where found with space; allocate a new one */ + return bgp_evpn_es_frag_new(es); +} + +/* Link the ES-EVI to one of the ES fragments */ +static void bgp_evpn_es_frag_evi_add(struct bgp_evpn_es_evi *es_evi) +{ + struct bgp_evpn_es_frag *es_frag; + struct bgp_evpn_es *es = es_evi->es; + + if (es_evi->es_frag || + !(CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))) + return; + + es_frag = bgp_evpn_es_find_frag_with_space(es); + + es_evi->es_frag = es_frag; + listnode_init(&es_evi->es_frag_listnode, es_evi); + listnode_add(es_frag->es_evi_frag_list, &es_evi->es_frag_listnode); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vni %d linked to frag %u", es->esi_str, + es_evi->vpn->vni, es_frag->rd_id); +} + +/* UnLink the ES-EVI from the ES fragment */ +static void bgp_evpn_es_frag_evi_del(struct bgp_evpn_es_evi *es_evi, + bool send_ead_del_if_empty) +{ + struct bgp_evpn_es_frag *es_frag = es_evi->es_frag; + struct prefix_evpn p; + struct bgp_evpn_es *es; + struct bgp *bgp; + + if (!es_frag) + return; + + es = es_frag->es; + es_evi->es_frag = NULL; + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vni %d unlinked from frag %u", es->esi_str, + es_evi->vpn->vni, es_frag->rd_id); + + list_delete_node(es_frag->es_evi_frag_list, &es_evi->es_frag_listnode); + + /* + * if there are no other EVIs on the fragment deleted the EAD-ES for + * the fragment + */ + if (send_ead_del_if_empty && !listcount(es_frag->es_evi_frag_list)) { + bgp = bgp_get_evpn(); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s frag %u ead-es route delete", + es->esi_str, es_frag->rd_id); + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi, + es->originator_ip); + p.prefix.ead_addr.frag_id = es_frag->rd_id; + bgp_evpn_mh_route_delete(bgp, es, NULL, es_frag, &p); + } + + /* We don't attempt to coalesce frags that may not be full. Instead we + * only free up the frag when it is completely empty. + */ + bgp_evpn_es_frag_free_unused(es_frag); +} + +/* Link the ES-EVIs to one of the ES fragments */ +static void bgp_evpn_es_frag_evi_update_all(struct bgp_evpn_es *es, bool add) +{ + struct listnode *node; + struct bgp_evpn_es_evi *es_evi; + + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, node, es_evi)) { + if (add) + bgp_evpn_es_frag_evi_add(es_evi); + else + bgp_evpn_es_frag_evi_del(es_evi, false); + } +} + /* compare ES-IDs for the global ES RB tree */ static int bgp_es_rb_cmp(const struct bgp_evpn_es *es1, const struct bgp_evpn_es *es2) @@ -1744,6 +1939,8 @@ static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi) listset_app_node_mem(es->macip_evi_path_list); es->macip_global_path_list = list_new(); listset_app_node_mem(es->macip_global_path_list); + es->es_frag_list = list_new(); + listset_app_node_mem(es->es_frag_list); QOBJ_REG(es, bgp_evpn_es); @@ -1770,6 +1967,7 @@ static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller) list_delete(&es->es_vtep_list); list_delete(&es->macip_evi_path_list); list_delete(&es->macip_global_path_list); + list_delete(&es->es_frag_list); bgp_table_unlock(es->route_table); /* remove the entry from various databases */ @@ -1789,7 +1987,6 @@ static inline bool bgp_evpn_is_es_local_and_non_bypass(struct bgp_evpn_es *es) /* init local info associated with the ES */ static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es) { - char buf[BGP_EVPN_PREFIX_RD_LEN]; bool old_is_local; bool is_local; @@ -1802,12 +1999,12 @@ static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es) listnode_init(&es->es_listnode, es); listnode_add(bgp_mh_info->local_es_list, &es->es_listnode); - /* auto derive RD for this es */ - bf_assign_index(bm->rd_idspace, es->rd_id); - es->prd.family = AF_UNSPEC; - es->prd.prefixlen = 64; - snprintfrr(buf, sizeof(buf), "%pI4:%hu", &bgp->router_id, es->rd_id); - (void)str2prefix_rd(buf, &es->prd); + /* setup the first ES fragment; more fragments may be allocated based + * on the the number of EVI entries + */ + es->es_base_frag = bgp_evpn_es_frag_new(es); + /* distribute ES-EVIs to one or more ES fragments */ + bgp_evpn_es_frag_evi_update_all(es, true); is_local = bgp_evpn_is_es_local_and_non_bypass(es); if (old_is_local != is_local) @@ -1823,6 +2020,11 @@ static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es, bool finish) if (!CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) return; + /* clear the es frag references and free them up */ + bgp_evpn_es_frag_evi_update_all(es, false); + es->es_base_frag = NULL; + bgp_evpn_es_frag_free_all(es); + old_is_local = bgp_evpn_is_es_local_and_non_bypass(es); UNSET_FLAG(es->flags, BGP_EVPNES_LOCAL); @@ -1833,8 +2035,6 @@ static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es, bool finish) /* remove from the ES local list */ list_delete_node(bgp_mh_info->local_es_list, &es->es_listnode); - bf_release_index(bm->rd_idspace, es->rd_id); - bgp_evpn_es_free(es, __func__); } @@ -1995,14 +2195,7 @@ static void bgp_evpn_local_es_deactivate(struct bgp *bgp, bgp_evpn_local_type1_evi_route_del(bgp, es); /* withdraw EAD-ES */ - build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, - &es->esi, es->originator_ip); - ret = bgp_evpn_type1_es_route_delete(bgp, es, &p); - if (ret) { - flog_err(EC_BGP_EVPN_ROUTE_DELETE, - "%u failed to delete type-1 route for ESI %s", - bgp->vrf_id, es->esi_str); - } + bgp_evpn_ead_es_route_delete(bgp, es); bgp_evpn_mac_update_on_es_oper_chg(es); } @@ -2048,9 +2241,7 @@ static void bgp_evpn_local_es_activate(struct bgp *bgp, struct bgp_evpn_es *es, bgp_evpn_local_type1_evi_route_add(bgp, es); /* generate EAD-ES */ - build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi, - es->originator_ip); - (void)bgp_evpn_type1_route_update(bgp, es, NULL, &p); + bgp_evpn_ead_es_route_update(bgp, es); } bgp_evpn_mac_update_on_es_oper_chg(es); @@ -2233,6 +2424,41 @@ int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi, return 0; } +static void bgp_evpn_es_json_frag_fill(json_object *json_frags, + struct bgp_evpn_es *es) +{ + json_object *json_frag; + char buf1[RD_ADDRSTRLEN]; + struct listnode *node; + struct bgp_evpn_es_frag *es_frag; + + for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) { + json_frag = json_object_new_object(); + + json_object_string_add( + json_frag, "rd", + prefix_rd2str(&es_frag->prd, buf1, sizeof(buf1))); + json_object_int_add(json_frag, "eviCount", + listcount(es_frag->es_evi_frag_list)); + + json_object_array_add(json_frags, json_frag); + } +} + +static void bgp_evpn_es_frag_show_detail(struct vty *vty, + struct bgp_evpn_es *es) +{ + struct listnode *node; + char buf1[RD_ADDRSTRLEN]; + struct bgp_evpn_es_frag *es_frag; + + for (ALL_LIST_ELEMENTS_RO(es->es_frag_list, node, es_frag)) { + vty_out(vty, " %s EVIs: %d\n", + prefix_rd2str(&es_frag->prd, buf1, sizeof(buf1)), + listcount(es_frag->es_evi_frag_list)); + } +} + static char *bgp_evpn_es_vteps_str(char *vtep_str, struct bgp_evpn_es *es, uint8_t vtep_str_size) { @@ -2342,9 +2568,11 @@ static void bgp_evpn_es_show_entry(struct vty *vty, json_object *json_types; json_object_string_add(json, "esi", es->esi_str); - json_object_string_add(json, "rd", - prefix_rd2str(&es->prd, buf1, - sizeof(buf1))); + if (es->es_base_frag) + json_object_string_add( + json, "rd", + prefix_rd2str(&es->es_base_frag->prd, buf1, + sizeof(buf1))); if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) { json_types = json_object_new_array(); @@ -2381,8 +2609,9 @@ static void bgp_evpn_es_show_entry(struct vty *vty, bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str)); - if (es->flags & BGP_EVPNES_LOCAL) - prefix_rd2str(&es->prd, buf1, sizeof(buf1)); + if (es->es_base_frag) + prefix_rd2str(&es->es_base_frag->prd, buf1, + sizeof(buf1)); else strlcpy(buf1, "-", sizeof(buf1)); @@ -2399,6 +2628,7 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty, json_object *json_flags; json_object *json_incons; json_object *json_vteps; + json_object *json_frags; struct listnode *node; struct bgp_evpn_es_vtep *es_vtep; @@ -2437,6 +2667,11 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty, } json_object_object_add(json, "vteps", json_vteps); } + if (listcount(es->es_frag_list)) { + json_frags = json_object_new_array(); + bgp_evpn_es_json_frag_fill(json_frags, es); + json_object_object_add(json, "fragments", json_frags); + } if (es->inconsistencies) { json_incons = json_object_new_array(); if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST) @@ -2456,8 +2691,9 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty, if (es->flags & BGP_EVPNES_REMOTE) strlcat(type_str, "R", sizeof(type_str)); - if (es->flags & BGP_EVPNES_LOCAL) - prefix_rd2str(&es->prd, buf1, sizeof(buf1)); + if (es->es_base_frag) + prefix_rd2str(&es->es_base_frag->prd, buf1, + sizeof(buf1)); else strlcpy(buf1, "-", sizeof(buf1)); @@ -2490,6 +2726,10 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty, } vty_out(vty, " Inconsistencies: %s\n", incons_str); + if (listcount(es->es_frag_list)) { + vty_out(vty, " Fragments:\n"); + bgp_evpn_es_frag_show_detail(vty, es); + } if (listcount(es->es_vtep_list)) { vty_out(vty, " VTEPs:\n"); bgp_evpn_es_vteps_show_detail(vty, es); @@ -3362,7 +3602,7 @@ bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi) */ if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | BGP_EVPNES_EVI_REMOTE)) return es_evi; - + bgp_evpn_es_frag_evi_del(es_evi, false); bgp_evpn_es_vrf_deref(es_evi); /* remove from the ES's VNI list */ @@ -3391,6 +3631,7 @@ static void bgp_evpn_es_evi_local_info_set(struct bgp_evpn_es_evi *es_evi) SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL); listnode_init(&es_evi->l2vni_listnode, es_evi); listnode_add(vpn->local_es_evi_list, &es_evi->l2vni_listnode); + bgp_evpn_es_frag_evi_add(es_evi); } /* clear any local info associated with the ES-EVI */ @@ -3449,24 +3690,19 @@ bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi) bgp = bgp_get_evpn(); + /* remove the es_evi from the es_frag before sending the update */ + bgp_evpn_es_frag_evi_del(es_evi, true); if (bgp) { /* update EAD-ES with new list of VNIs */ - if (bgp_evpn_local_es_is_active(es)) { - build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, - &es->esi, es->originator_ip); - if (bgp_evpn_type1_route_update(bgp, es, NULL, &p)) - flog_err(EC_BGP_EVPN_ROUTE_CREATE, - "%u: EAD-ES route update failure for ESI %s VNI %u", - bgp->vrf_id, es->esi_str, - es_evi->vpn->vni); - } + if (bgp_evpn_local_es_is_active(es)) + bgp_evpn_ead_es_route_update(bgp, es); /* withdraw and delete EAD-EVI */ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) { build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, &es->esi, es->originator_ip); - if (bgp_evpn_type1_evi_route_delete(bgp, - es, es_evi->vpn, &p)) + if (bgp_evpn_ead_evi_route_delete(bgp, es, es_evi->vpn, + &p)) flog_err(EC_BGP_EVPN_ROUTE_DELETE, "%u: EAD-EVI route deletion failure for ESI %s VNI %u", bgp->vrf_id, es->esi_str, @@ -3566,21 +3802,12 @@ int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni) if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) { build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, &es->esi, es->originator_ip); - if (bgp_evpn_type1_route_update(bgp, es, vpn, &p)) - flog_err(EC_BGP_EVPN_ROUTE_CREATE, - "%u: EAD-EVI route creation failure for ESI %s VNI %u", - bgp->vrf_id, es->esi_str, vni); + bgp_evpn_ead_evi_route_update(bgp, es, vpn, &p); } /* update EAD-ES */ - build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, - &es->esi, es->originator_ip); - if (bgp_evpn_local_es_is_active(es)) { - if (bgp_evpn_type1_route_update(bgp, es, NULL, &p)) - flog_err(EC_BGP_EVPN_ROUTE_CREATE, - "%u: EAD-ES route creation failure for ESI %s VNI %u", - bgp->vrf_id, es->esi_str, vni); - } + if (bgp_evpn_local_es_is_active(es)) + bgp_evpn_ead_es_route_update(bgp, es); return 0; } @@ -3857,11 +4084,18 @@ static void bgp_evpn_es_evi_show_entry(struct vty *vty, static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty, struct bgp_evpn_es_evi *es_evi, json_object *json) { + char buf1[RD_ADDRSTRLEN]; + if (json) { json_object *json_flags; /* Add the "brief" info first */ bgp_evpn_es_evi_show_entry(vty, es_evi, json); + if (es_evi->es_frag) + json_object_string_add( + json, "esFragmentRd", + prefix_rd2str(&es_evi->es_frag->prd, buf1, + sizeof(buf1))); if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) { json_flags = json_object_new_array(); json_array_string_add(json_flags, "es-vtep-mismatch"); @@ -3884,6 +4118,10 @@ static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty, vty_out(vty, "VNI: %d ESI: %s\n", es_evi->vpn->vni, es_evi->es->esi_str); vty_out(vty, " Type: %s\n", type_str); + if (es_evi->es_frag) + vty_out(vty, " ES fragment RD: %s\n", + prefix_rd2str(&es_evi->es_frag->prd, buf1, + sizeof(buf1))); vty_out(vty, " Inconsistencies: %s\n", (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) ? "es-vtep-mismatch":"-"); diff --git a/bgpd/bgp_evpn_mh.h b/bgpd/bgp_evpn_mh.h index 5d32880217..7a3a4bde94 100644 --- a/bgpd/bgp_evpn_mh.h +++ b/bgpd/bgp_evpn_mh.h @@ -35,6 +35,28 @@ #define BGP_EVPN_MH_USE_ES_L3NHG_DEF true +/* XXX - tune this */ +#define BGP_EVPN_MAX_EVI_PER_ES_FRAG 128 + +/* An ES can result in multiple EAD-per-ES route. Each EAD fragment is + * associated with an unique RD + */ +struct bgp_evpn_es_frag { + /* frag is associated with a parent ES */ + struct bgp_evpn_es *es; + + /* Id for deriving the RD automatically for this ES fragment */ + uint16_t rd_id; + /* RD for this ES fragment */ + struct prefix_rd prd; + + /* Memory used for linking bgp_evpn_es_rd to bgp_evpn_es->rd_list */ + struct listnode es_listnode; + + /* List of ES-EVIs associated with this fragment */ + struct list *es_evi_frag_list; +}; + /* Ethernet Segment entry - * - Local and remote ESs are maintained in a global RB tree, * bgp_mh_info->es_rb_tree using ESI as key @@ -79,11 +101,9 @@ struct bgp_evpn_es { */ struct listnode pend_es_listnode; - /* [EVPNES_LOCAL] Id for deriving the RD automatically for this ESI */ - uint16_t rd_id; - - /* [EVPNES_LOCAL] RD for this ES */ - struct prefix_rd prd; + /* [EVPNES_LOCAL] List of RDs for this ES (bgp_evpn_es_rd) */ + struct list *es_frag_list; + struct bgp_evpn_es_frag *es_base_frag; /* [EVPNES_LOCAL] originator ip address */ struct in_addr originator_ip; @@ -203,6 +223,8 @@ struct bgp_evpn_es_vrf { */ struct bgp_evpn_es_evi { struct bgp_evpn_es *es; + /* Only applicableif EVI_LOCAL */ + struct bgp_evpn_es_frag *es_frag; struct bgpevpn *vpn; /* ES-EVI flags */ @@ -224,6 +246,10 @@ struct bgp_evpn_es_evi { */ struct listnode es_listnode; + /* memory used for linking the es_evi to + * es_evi->es_frag->es_evi_frag_list + */ + struct listnode es_frag_listnode; /* list of PEs (bgp_evpn_es_evi_vtep) attached to the ES for this VNI */ struct list *es_evi_vtep_list; diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index 0614dbaea4..fd30cc2db3 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -532,6 +532,7 @@ static inline void evpn_type1_prefix_global_copy(struct prefix_evpn *global_p, memcpy(global_p, vni_p, sizeof(*global_p)); global_p->prefix.ead_addr.ip.ipa_type = 0; global_p->prefix.ead_addr.ip.ipaddr_v4.s_addr = INADDR_ANY; + global_p->prefix.ead_addr.frag_id = 0; } /* EAD prefix in the global table doesn't include the VTEP-IP so diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c index 8622e13aa7..4ab188832c 100644 --- a/bgpd/bgp_evpn_vty.c +++ b/bgpd/bgp_evpn_vty.c @@ -358,7 +358,7 @@ static void bgp_evpn_show_route_header(struct vty *vty, struct bgp *bgp, "Status codes: s suppressed, d damped, h history, * valid, > best, i - internal\n"); vty_out(vty, "Origin codes: i - IGP, e - EGP, ? - incomplete\n"); vty_out(vty, - "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]\n"); + "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]:[Frag-id]\n"); vty_out(vty, "EVPN type-2 prefix: [2]:[EthTag]:[MAClen]:[MAC]:[IPlen]:[IP]\n"); vty_out(vty, "EVPN type-3 prefix: [3]:[EthTag]:[IPlen]:[OrigIP]\n"); @@ -2712,7 +2712,7 @@ static void evpn_show_route_rd(struct vty *vty, struct bgp *bgp, /* RD header and legend - once overall. */ if (rd_header && !json) { vty_out(vty, - "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]\n"); + "EVPN type-1 prefix: [1]:[EthTag]:[ESI]:[IPlen]:[VTEP-IP]:[Frag-id]\n"); vty_out(vty, "EVPN type-2 prefix: [2]:[EthTag]:[MAClen]:[MAC]\n"); vty_out(vty, diff --git a/bgpd/bgp_memory.c b/bgpd/bgp_memory.c index ffb1ec162b..a994b536c4 100644 --- a/bgpd/bgp_memory.c +++ b/bgpd/bgp_memory.c @@ -126,6 +126,7 @@ DEFINE_MTYPE(BGPD, BGP_EVPN_PATH_NH_INFO, "BGP EVPN PATH NH Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_NH, "BGP EVPN Nexthop"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI_VTEP, "BGP EVPN ES-EVI VTEP"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES, "BGP EVPN ESI Information"); +DEFINE_MTYPE(BGPD, BGP_EVPN_ES_FRAG, "BGP EVPN ES Fragment Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI, "BGP EVPN ES-per-EVI Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES_VRF, "BGP EVPN ES-per-VRF Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_IMPORT_RT, "BGP EVPN Import RT"); diff --git a/bgpd/bgp_memory.h b/bgpd/bgp_memory.h index 63e7b40ef7..76b2f9f56a 100644 --- a/bgpd/bgp_memory.h +++ b/bgpd/bgp_memory.h @@ -115,6 +115,7 @@ DECLARE_MTYPE(LCOMMUNITY_VAL); DECLARE_MTYPE(BGP_EVPN_MH_INFO); DECLARE_MTYPE(BGP_EVPN_ES); +DECLARE_MTYPE(BGP_EVPN_ES_FRAG); DECLARE_MTYPE(BGP_EVPN_ES_EVI); DECLARE_MTYPE(BGP_EVPN_ES_VRF); DECLARE_MTYPE(BGP_EVPN_ES_VTEP); diff --git a/lib/prefix.c b/lib/prefix.c index 89c5be8f38..4db0c2478b 100644 --- a/lib/prefix.c +++ b/lib/prefix.c @@ -915,12 +915,13 @@ static const char *prefixevpn_ead2str(const struct prefix_evpn *p, char *str, char buf1[INET6_ADDRSTRLEN]; family = IS_IPADDR_V4(&p->prefix.ead_addr.ip) ? AF_INET : AF_INET6; - snprintf(str, size, "[%d]:[%u]:[%s]:[%d]:[%s]", p->prefix.route_type, - p->prefix.ead_addr.eth_tag, + snprintf(str, size, "[%d]:[%u]:[%s]:[%d]:[%s]:[%u]", + p->prefix.route_type, p->prefix.ead_addr.eth_tag, esi_to_str(&p->prefix.ead_addr.esi, buf, sizeof(buf)), (family == AF_INET) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN, inet_ntop(family, &p->prefix.ead_addr.ip.ipaddr_v4, buf1, - sizeof(buf1))); + sizeof(buf1)), + p->prefix.ead_addr.frag_id); return str; } diff --git a/lib/prefix.h b/lib/prefix.h index b3545a72b4..816a1517e1 100644 --- a/lib/prefix.h +++ b/lib/prefix.h @@ -95,6 +95,7 @@ struct evpn_ead_addr { esi_t esi; uint32_t eth_tag; struct ipaddr ip; + uint16_t frag_id; }; struct evpn_macip_addr { -- 2.39.5