diff options
115 files changed, 11478 insertions, 2633 deletions
diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index 08e50fc4f2..948055e375 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -718,6 +718,9 @@ bool attrhash_cmp(const void *p1, const void *p2) && IPV4_ADDR_SAME(&attr1->originator_id, &attr2->originator_id) && overlay_index_same(attr1, attr2) + && !memcmp(&attr1->esi, &attr2->esi, sizeof(esi_t)) + && attr1->es_flags == attr2->es_flags + && attr1->mm_sync_seqnum == attr2->mm_sync_seqnum && attr1->nh_ifindex == attr2->nh_ifindex && attr1->nh_lla_ifindex == attr2->nh_lla_ifindex && attr1->distance == attr2->distance @@ -2186,6 +2189,7 @@ bgp_attr_ext_communities(struct bgp_attr_parser_args *args) struct attr *const attr = args->attr; const bgp_size_t length = args->length; uint8_t sticky = 0; + bool proxy = false; if (length == 0) { attr->ecommunity = NULL; @@ -2223,7 +2227,9 @@ bgp_attr_ext_communities(struct bgp_attr_parser_args *args) attr->router_flag = 1; /* Check EVPN Neighbor advertisement flags, R-bit */ - bgp_attr_evpn_na_flag(attr, &attr->router_flag); + bgp_attr_evpn_na_flag(attr, &attr->router_flag, &proxy); + if (proxy) + attr->es_flags |= ATTR_ES_PROXY_ADVERT; /* Extract the Rmac, if any */ if (bgp_attr_rmac(attr, &attr->rmac)) { diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h index 94531313ae..1b2c75fbef 100644 --- a/bgpd/bgp_attr.h +++ b/bgpd/bgp_attr.h @@ -215,6 +215,30 @@ struct attr { /* NA router flag (R-bit) support in EVPN */ uint8_t router_flag; + /* ES info */ + uint8_t es_flags; + /* Path is not "locally-active" on the advertising VTEP. This is + * translated into an ARP-ND ECOM. + */ +#define ATTR_ES_PROXY_ADVERT (1 << 0) + /* Destination ES is present locally. This flag is set on local + * paths and sync paths + */ +#define ATTR_ES_IS_LOCAL (1 << 1) + /* There are one or more non-best paths from ES peers. Note that + * this flag is only set on the local MAC-IP paths in the VNI + * route table (not set in the global routing table). And only + * non-proxy advertisements from an ES peer can result in this + * flag being set. + */ +#define ATTR_ES_PEER_ACTIVE (1 << 2) + /* There are one or more non-best proxy paths from ES peers */ +#define ATTR_ES_PEER_PROXY (1 << 3) + /* An ES peer has router bit set - only applicable if + * ATTR_ES_PEER_ACTIVE is set + */ +#define ATTR_ES_PEER_ROUTER (1 << 4) + /* route tag */ route_tag_t tag; @@ -241,6 +265,13 @@ struct attr { /* EVPN MAC Mobility sequence number, if any. */ uint32_t mm_seqnum; + /* highest MM sequence number rxed in a MAC-IP route from an + * ES peer (this includes both proxy and non-proxy MAC-IP + * advertisements from ES peers). + * This is only applicable to local paths in the VNI routing + * table and derived from other imported/non-best paths. + */ + uint32_t mm_sync_seqnum; /* EVPN local router-mac */ struct ethaddr rmac; @@ -253,6 +284,9 @@ struct attr { /* Link bandwidth value, if any. */ uint32_t link_bw; + + /* EVPN ES */ + esi_t esi; }; /* rmap_change_flags definition */ diff --git a/bgpd/bgp_attr_evpn.c b/bgpd/bgp_attr_evpn.c index 65072088ae..aa0c59f3a7 100644 --- a/bgpd/bgp_attr_evpn.c +++ b/bgpd/bgp_attr_evpn.c @@ -54,47 +54,27 @@ void bgp_add_routermac_ecom(struct attr *attr, struct ethaddr *routermac) * format accepted: AA:BB:CC:DD:EE:FF:GG:HH:II:JJ * if id is null, check only is done */ -bool str2esi(const char *str, struct eth_segment_id *id) +bool str2esi(const char *str, esi_t *id) { - unsigned int a[ESI_LEN]; + unsigned int a[ESI_BYTES]; int i; if (!str) return false; if (sscanf(str, "%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x", a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7, a + 8, a + 9) - != ESI_LEN) { + != ESI_BYTES) { /* error in incoming str length */ return false; } /* valid mac address */ if (!id) return true; - for (i = 0; i < ESI_LEN; ++i) + for (i = 0; i < ESI_BYTES; ++i) id->val[i] = a[i] & 0xff; return true; } -char *esi2str(struct eth_segment_id *id) -{ - char *ptr; - uint8_t *val; - - if (!id) - return NULL; - - val = id->val; - ptr = XMALLOC(MTYPE_TMP, - (ESI_LEN * 2 + ESI_LEN - 1 + 1) * sizeof(char)); - - snprintf(ptr, (ESI_LEN * 2 + ESI_LEN - 1 + 1), - "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", val[0], - val[1], val[2], val[3], val[4], val[5], val[6], val[7], val[8], - val[9]); - - return ptr; -} - char *ecom_mac2str(char *ecom_mac) { char *en; @@ -215,7 +195,8 @@ uint32_t bgp_attr_mac_mobility_seqnum(struct attr *attr, uint8_t *sticky) /* * return true if attr contains router flag extended community */ -void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag) +void bgp_attr_evpn_na_flag(struct attr *attr, + uint8_t *router_flag, bool *proxy) { struct ecommunity *ecom; int i; @@ -237,10 +218,14 @@ void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag) if (type == ECOMMUNITY_ENCODE_EVPN && sub_type == ECOMMUNITY_EVPN_SUBTYPE_ND) { val = *pnt++; - if (val & ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG) { + + if (val & ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG) *router_flag = 1; - break; - } + + if (val & ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG) + *proxy = true; + + break; } } } @@ -292,14 +277,3 @@ extern bool is_zero_gw_ip(const union gw_addr *gw_ip, const afi_t afi) return false; } - -extern bool is_zero_esi(const struct eth_segment_id *esi) -{ - int i; - - for (i = 0; i < ESI_LEN; i++) - if (esi->val[i]) - return false; - - return true; -} diff --git a/bgpd/bgp_attr_evpn.h b/bgpd/bgp_attr_evpn.h index c1bfd83765..19c028a826 100644 --- a/bgpd/bgp_attr_evpn.h +++ b/bgpd/bgp_attr_evpn.h @@ -21,38 +21,20 @@ #ifndef _QUAGGA_BGP_ATTR_EVPN_H #define _QUAGGA_BGP_ATTR_EVPN_H -/* value of first byte of ESI */ -#define ESI_TYPE_ARBITRARY 0 /* */ -#define ESI_TYPE_LACP 1 /* <> */ -#define ESI_TYPE_BRIDGE 2 /* <Root bridge Mac-6B>:<Root Br Priority-2B>:00 */ -#define ESI_TYPE_MAC 3 /* <Syst Mac Add-6B>:<Local Discriminator Value-3B> */ -#define ESI_TYPE_ROUTER 4 /* <RouterId-4B>:<Local Discriminator Value-4B> */ -#define ESI_TYPE_AS 5 /* <AS-4B>:<Local Discriminator Value-4B> */ - -#define MAX_ESI {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff} -#define ESI_LEN 10 - #define MAX_ET 0xffffffff struct attr; -/* EVPN ESI */ -struct eth_segment_id { - uint8_t val[ESI_LEN]; -}; - union gw_addr { struct in_addr ipv4; struct in6_addr ipv6; }; struct bgp_route_evpn { - struct eth_segment_id eth_s_id; union gw_addr gw_ip; }; -extern bool str2esi(const char *str, struct eth_segment_id *id); -extern char *esi2str(struct eth_segment_id *id); +extern bool str2esi(const char *str, esi_t *id); extern char *ecom_mac2str(char *ecom_mac); extern void bgp_add_routermac_ecom(struct attr *attr, @@ -64,9 +46,9 @@ extern uint32_t bgp_attr_mac_mobility_seqnum(struct attr *attr, uint8_t *sticky); extern uint8_t bgp_attr_default_gw(struct attr *attr); -extern void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag); +extern void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag, + bool *proxy); extern bool is_zero_gw_ip(const union gw_addr *gw_ip, afi_t afi); -extern bool is_zero_esi(const struct eth_segment_id *esi); #endif /* _QUAGGA_BGP_ATTR_EVPN_H */ diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c index 9f32d450b9..255a7f238b 100644 --- a/bgpd/bgp_debug.c +++ b/bgpd/bgp_debug.c @@ -64,6 +64,7 @@ unsigned long conf_bgp_debug_flowspec; unsigned long conf_bgp_debug_labelpool; unsigned long conf_bgp_debug_pbr; unsigned long conf_bgp_debug_graceful_restart; +unsigned long conf_bgp_debug_evpn_mh; unsigned long term_bgp_debug_as4; unsigned long term_bgp_debug_neighbor_events; @@ -82,6 +83,7 @@ unsigned long term_bgp_debug_flowspec; unsigned long term_bgp_debug_labelpool; unsigned long term_bgp_debug_pbr; unsigned long term_bgp_debug_graceful_restart; +unsigned long term_bgp_debug_evpn_mh; struct list *bgp_debug_neighbor_events_peers = NULL; struct list *bgp_debug_keepalive_peers = NULL; @@ -2006,6 +2008,57 @@ DEFUN (no_debug_bgp_pbr, return CMD_SUCCESS; } +DEFPY (debug_bgp_evpn_mh, + debug_bgp_evpn_mh_cmd, + "[no$no] debug bgp evpn mh <es$es|route$rt>", + NO_STR + DEBUG_STR + BGP_STR + "EVPN\n" + "Multihoming\n" + "Ethernet Segment debugging\n" + "Route debugging\n") +{ + if (es) { + if (vty->node == CONFIG_NODE) { + if (no) + DEBUG_OFF(evpn_mh, EVPN_MH_ES); + else + DEBUG_ON(evpn_mh, EVPN_MH_ES); + } else { + if (no) { + TERM_DEBUG_OFF(evpn_mh, EVPN_MH_ES); + vty_out(vty, + "BGP EVPN-MH ES debugging is off\n"); + } else { + TERM_DEBUG_ON(evpn_mh, EVPN_MH_ES); + vty_out(vty, + "BGP EVPN-MH ES debugging is on\n"); + } + } + } + if (rt) { + if (vty->node == CONFIG_NODE) { + if (no) + DEBUG_OFF(evpn_mh, EVPN_MH_RT); + else + DEBUG_ON(evpn_mh, EVPN_MH_RT); + } else { + if (no) { + TERM_DEBUG_OFF(evpn_mh, EVPN_MH_RT); + vty_out(vty, + "BGP EVPN-MH route debugging is off\n"); + } else { + TERM_DEBUG_ON(evpn_mh, EVPN_MH_RT); + vty_out(vty, + "BGP EVPN-MH route debugging is on\n"); + } + } + } + + return CMD_SUCCESS; +} + DEFUN (debug_bgp_labelpool, debug_bgp_labelpool_cmd, "debug bgp labelpool", @@ -2085,6 +2138,8 @@ DEFUN (no_debug_bgp, TERM_DEBUG_OFF(pbr, PBR); TERM_DEBUG_OFF(pbr, PBR_ERROR); TERM_DEBUG_OFF(graceful_restart, GRACEFUL_RESTART); + TERM_DEBUG_OFF(evpn_mh, EVPN_MH_ES); + TERM_DEBUG_OFF(evpn_mh, EVPN_MH_RT); vty_out(vty, "All possible debugging has been turned off\n"); @@ -2169,6 +2224,11 @@ DEFUN_NOSH (show_debugging_bgp, if (BGP_DEBUG(pbr, PBR_ERROR)) vty_out(vty, " BGP policy based routing error debugging is on\n"); + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + vty_out(vty, " BGP EVPN-MH ES debugging is on\n"); + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + vty_out(vty, " BGP EVPN-MH route debugging is on\n"); + vty_out(vty, "\n"); return CMD_SUCCESS; } @@ -2284,6 +2344,16 @@ static int bgp_config_write_debug(struct vty *vty) vty_out(vty, "debug bgp graceful-restart\n"); write++; } + + if (CONF_BGP_DEBUG(evpn_mh, EVPN_MH_ES)) { + vty_out(vty, "debug bgp evpn mh es\n"); + write++; + } + if (CONF_BGP_DEBUG(evpn_mh, EVPN_MH_RT)) { + vty_out(vty, "debug bgp evpn mh route\n"); + write++; + } + return write; } @@ -2410,6 +2480,8 @@ void bgp_debug_init(void) install_element(ENABLE_NODE, &no_debug_bgp_pbr_cmd); install_element(CONFIG_NODE, &no_debug_bgp_pbr_cmd); + install_element(ENABLE_NODE, &debug_bgp_evpn_mh_cmd); + install_element(CONFIG_NODE, &debug_bgp_evpn_mh_cmd); } /* Return true if this prefix is on the per_prefix_list of prefixes to debug diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h index 69f25566a9..f16cfee4f2 100644 --- a/bgpd/bgp_debug.h +++ b/bgpd/bgp_debug.h @@ -77,6 +77,7 @@ extern unsigned long conf_bgp_debug_flowspec; extern unsigned long conf_bgp_debug_labelpool; extern unsigned long conf_bgp_debug_pbr; extern unsigned long conf_bgp_debug_graceful_restart; +extern unsigned long conf_bgp_debug_evpn_mh; extern unsigned long term_bgp_debug_as4; extern unsigned long term_bgp_debug_neighbor_events; @@ -93,6 +94,7 @@ extern unsigned long term_bgp_debug_flowspec; extern unsigned long term_bgp_debug_labelpool; extern unsigned long term_bgp_debug_pbr; extern unsigned long term_bgp_debug_graceful_restart; +extern unsigned long term_bgp_debug_evpn_mh; extern struct list *bgp_debug_neighbor_events_peers; extern struct list *bgp_debug_keepalive_peers; @@ -129,6 +131,8 @@ struct bgp_debug_filter { #define BGP_DEBUG_LABELPOOL 0x01 #define BGP_DEBUG_PBR 0x01 #define BGP_DEBUG_PBR_ERROR 0x02 +#define BGP_DEBUG_EVPN_MH_ES 0x01 +#define BGP_DEBUG_EVPN_MH_RT 0x02 #define BGP_DEBUG_PACKET_SEND 0x01 #define BGP_DEBUG_PACKET_SEND_DETAIL 0x02 diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c index d6c311bfa0..f2aac3646c 100644 --- a/bgpd/bgp_ecommunity.c +++ b/bgpd/bgp_ecommunity.c @@ -810,6 +810,35 @@ char *ecommunity_ecom2str(struct ecommunity *ecom, int format, int filter) ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG)) strlcpy(encbuf, "ND:Router Flag", sizeof(encbuf)); + if (CHECK_FLAG( + flags, + ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG)) + strlcpy(encbuf, "ND:Proxy", + sizeof(encbuf)); + } else if (*pnt + == ECOMMUNITY_EVPN_SUBTYPE_ES_IMPORT_RT) { + struct ethaddr mac; + + pnt++; + memcpy(&mac, pnt, ETH_ALEN); + snprintf(encbuf, + sizeof(encbuf), + "ES-Import-Rt:%02x:%02x:%02x:%02x:%02x:%02x", + (uint8_t)mac.octet[0], + (uint8_t)mac.octet[1], + (uint8_t)mac.octet[2], + (uint8_t)mac.octet[3], + (uint8_t)mac.octet[4], + (uint8_t)mac.octet[5]); + } else if (*pnt + == ECOMMUNITY_EVPN_SUBTYPE_ESI_LABEL) { + uint8_t flags = *++pnt; + + snprintf(encbuf, + sizeof(encbuf), "ESI-label-Rt:%s", + (flags & + ECOMMUNITY_EVPN_SUBTYPE_ESI_SA_FLAG) ? + "SA":"AA"); } else unk_ecom = 1; } else if (type == ECOMMUNITY_ENCODE_REDIRECT_IP_NH) { @@ -865,21 +894,6 @@ char *ecommunity_ecom2str(struct ecommunity *ecom, int format, int filter) } else if (sub_type == ECOMMUNITY_TRAFFIC_MARKING) { snprintf(encbuf, sizeof(encbuf), "FS:marking %u", *(pnt + 5)); - } else if (*pnt - == ECOMMUNITY_EVPN_SUBTYPE_ES_IMPORT_RT) { - struct ethaddr mac; - - memcpy(&mac, pnt, ETH_ALEN); - - snprintf( - encbuf, sizeof(encbuf), - "ES-Import-Rt:%02x:%02x:%02x:%02x:%02x:%02x", - (uint8_t)mac.octet[0], - (uint8_t)mac.octet[1], - (uint8_t)mac.octet[2], - (uint8_t)mac.octet[3], - (uint8_t)mac.octet[4], - (uint8_t)mac.octet[5]); } else unk_ecom = 1; } else if (type == ECOMMUNITY_ENCODE_AS_NON_TRANS) { diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h index 7deae8e746..812bcc46e7 100644 --- a/bgpd/bgp_ecommunity.h +++ b/bgpd/bgp_ecommunity.h @@ -73,8 +73,12 @@ #define ECOMMUNITY_EVPN_SUBTYPE_ND 0x08 #define ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY_FLAG_STICKY 0x01 -#define ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG 0x01 -#define ECOMMUNITY_EVPN_SUBTYPE_ND_OVERRIDE_FLAG 0x02 + +#define ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG 0x01 +#define ECOMMUNITY_EVPN_SUBTYPE_ND_OVERRIDE_FLAG 0x02 +#define ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG 0x04 + +#define ECOMMUNITY_EVPN_SUBTYPE_ESI_SA_FLAG (1 << 0) /* single-active */ /* Low-order octet of the Extended Communities type field for OPAQUE types */ #define ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP 0x0c diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 8c5d6421f1..4a5d5c3b6e 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -40,6 +40,7 @@ #include "bgpd/bgp_label.h" #include "bgpd/bgp_evpn.h" #include "bgpd/bgp_evpn_private.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_ecommunity.h" #include "bgpd/bgp_encap_types.h" #include "bgpd/bgp_debug.h" @@ -54,61 +55,24 @@ /* * Definitions and external declarations. */ -extern struct zclient *zclient; - DEFINE_QOBJ_TYPE(bgpevpn) -DEFINE_QOBJ_TYPE(evpnes) +DEFINE_QOBJ_TYPE(bgp_evpn_es) /* * Static function declarations */ -static void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi, - struct bgp_dest *dest, - struct bgp_path_info **pi); static int delete_all_vni_routes(struct bgp *bgp, struct bgpevpn *vpn); +static void bgp_evpn_update_type2_route_entry(struct bgp *bgp, + struct bgpevpn *vpn, + struct bgp_node *rn, struct bgp_path_info *local_pi, + const char *caller); +static struct in_addr zero_vtep_ip; /* * Private functions. */ -/* compare two IPV4 VTEP IPs */ -static int evpn_vtep_ip_cmp(void *p1, void *p2) -{ - const struct in_addr *ip1 = p1; - const struct in_addr *ip2 = p2; - - return ip1->s_addr - ip2->s_addr; -} - -/* - * Make hash key for ESI. - */ -static unsigned int esi_hash_keymake(const void *p) -{ - const struct evpnes *pes = p; - const void *pnt = (void *)pes->esi.val; - - return jhash(pnt, ESI_BYTES, 0xa5a5a55a); -} - -/* - * Compare two ESIs. - */ -static bool esi_cmp(const void *p1, const void *p2) -{ - const struct evpnes *pes1 = p1; - const struct evpnes *pes2 = p2; - - if (pes1 == NULL && pes2 == NULL) - return true; - - if (pes1 == NULL || pes2 == NULL) - return false; - - return (memcmp(pes1->esi.val, pes2->esi.val, ESI_BYTES) == 0); -} - /* * Make vni hash key. */ @@ -133,7 +97,7 @@ static bool vni_hash_cmp(const void *p1, const void *p2) return (vpn1->vni == vpn2->vni); } -static int vni_list_cmp(void *p1, void *p2) +int vni_list_cmp(void *p1, void *p2) { const struct bgpevpn *vpn1 = p1; const struct bgpevpn *vpn2 = p2; @@ -579,19 +543,54 @@ static void evpn_convert_nexthop_to_ipv6(struct attr *attr) attr->mp_nexthop_len = IPV6_MAX_BYTELEN; } +struct bgp_node *bgp_global_evpn_node_get( + struct bgp_table *table, afi_t afi, + safi_t safi, const struct prefix_evpn *evp, + struct prefix_rd *prd) +{ + struct prefix_evpn global_p; + + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) { + /* prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy of the prefix + */ + evpn_type1_prefix_global_copy(&global_p, evp); + evp = &global_p; + } + return bgp_afi_node_get(table, afi, safi, (struct prefix *)evp, prd); +} + +struct bgp_node *bgp_global_evpn_node_lookup( + struct bgp_table *table, afi_t afi, + safi_t safi, const struct prefix_evpn *evp, + struct prefix_rd *prd) +{ + struct prefix_evpn global_p; + + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) { + /* prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy of the prefix + */ + evpn_type1_prefix_global_copy(&global_p, evp); + evp = &global_p; + } + return bgp_afi_node_lookup(table, afi, safi, (struct prefix *)evp, prd); +} + /* * Add (update) or delete MACIP from zebra. */ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, const struct prefix_evpn *p, struct in_addr remote_vtep_ip, int add, - uint8_t flags, uint32_t seq) + uint8_t flags, uint32_t seq, esi_t *esi) { struct stream *s; int ipa_len; char buf1[ETHER_ADDR_STRLEN]; char buf2[INET6_ADDRSTRLEN]; char buf3[INET6_ADDRSTRLEN]; + static struct in_addr zero_remote_vtep_ip; /* Check socket. */ if (!zclient || zclient->sock < 0) @@ -605,6 +604,9 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, __func__); return 0; } + + if (!esi) + esi = zero_esi; s = zclient->obuf; stream_reset(s); @@ -622,13 +624,20 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, stream_putl(s, ipa_len); stream_put(s, &p->prefix.macip_addr.ip.ip.addr, ipa_len); } - stream_put_in_addr(s, &remote_vtep_ip); + /* If the ESI is valid that becomes the nexthop; tape out the + * VTEP-IP for that case + */ + if (bgp_evpn_is_esi_valid(esi)) + stream_put_in_addr(s, &zero_remote_vtep_ip); + else + stream_put_in_addr(s, &remote_vtep_ip); /* TX flags - MAC sticky status and/or gateway mac */ /* Also TX the sequence number of the best route. */ if (add) { stream_putc(s, flags); stream_putl(s, seq); + stream_put(s, esi, sizeof(esi_t)); } stream_putw_at(s, 0, stream_get_endp(s)); @@ -698,40 +707,6 @@ static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn, } /* - * Build extended community for EVPN ES (type-4) route - */ -static void build_evpn_type4_route_extcomm(struct evpnes *es, - struct attr *attr) -{ - struct ecommunity ecom_encap; - struct ecommunity ecom_es_rt; - struct ecommunity_val eval; - struct ecommunity_val eval_es_rt; - bgp_encap_types tnl_type; - struct ethaddr mac; - - /* Encap */ - tnl_type = BGP_ENCAP_TYPE_VXLAN; - memset(&ecom_encap, 0, sizeof(ecom_encap)); - encode_encap_extcomm(tnl_type, &eval); - ecom_encap.size = 1; - ecom_encap.val = (uint8_t *)eval.val; - attr->ecommunity = ecommunity_dup(&ecom_encap); - - /* ES import RT */ - memset(&mac, 0, sizeof(struct ethaddr)); - memset(&ecom_es_rt, 0, sizeof(ecom_es_rt)); - es_get_system_mac(&es->esi, &mac); - encode_es_rt_extcomm(&eval_es_rt, &mac); - ecom_es_rt.size = 1; - ecom_es_rt.val = (uint8_t *)eval_es_rt.val; - attr->ecommunity = - ecommunity_merge(attr->ecommunity, &ecom_es_rt); - - attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); -} - -/* * Build extended communities for EVPN prefix route. */ static void build_evpn_type5_route_extcomm(struct bgp *bgp_vrf, @@ -800,6 +775,7 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr, struct ecommunity_val eval_default_gw; struct ecommunity_val eval_rmac; struct ecommunity_val eval_na; + bool proxy; bgp_encap_types tnl_type; struct listnode *node, *nnode; @@ -861,9 +837,10 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr, ecommunity_merge(attr->ecommunity, &ecom_default_gw); } - if (attr->router_flag) { + proxy = !!(attr->es_flags & ATTR_ES_PROXY_ADVERT); + if (attr->router_flag || proxy) { memset(&ecom_na, 0, sizeof(ecom_na)); - encode_na_flag_extcomm(&eval_na, attr->router_flag); + encode_na_flag_extcomm(&eval_na, attr->router_flag, proxy); ecom_na.size = 1; ecom_na.val = (uint8_t *)eval_na.val; attr->ecommunity = ecommunity_merge(attr->ecommunity, @@ -934,19 +911,60 @@ static int evpn_zebra_install(struct bgp *bgp, struct bgpevpn *vpn, int ret; uint8_t flags; int flood_control; + uint32_t seq; if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) { flags = 0; - if (pi->attr->sticky) - SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); - if (pi->attr->default_gw) - SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); - if (is_evpn_prefix_ipaddr_v6(p) && - pi->attr->router_flag) - SET_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); + + if (pi->sub_type == BGP_ROUTE_IMPORTED) { + if (pi->attr->sticky) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); + if (pi->attr->default_gw) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + if (is_evpn_prefix_ipaddr_v6(p) && + pi->attr->router_flag) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); + + seq = mac_mobility_seqnum(pi->attr); + /* if local ES notify zebra that this is a sync path */ + if (bgp_evpn_attr_is_local_es(pi->attr)) { + SET_FLAG(flags, ZEBRA_MACIP_TYPE_SYNC_PATH); + if (bgp_evpn_attr_is_proxy(pi->attr)) + SET_FLAG(flags, + ZEBRA_MACIP_TYPE_PROXY_ADVERT); + } + } else { + if (!bgp_evpn_attr_is_sync(pi->attr)) + return 0; + + /* if a local path is being turned around and sent + * to zebra it is because it is a sync path on + * a local ES + */ + SET_FLAG(flags, ZEBRA_MACIP_TYPE_SYNC_PATH); + /* supply the highest peer seq number to zebra + * for MM seq syncing + */ + seq = bgp_evpn_attr_get_sync_seq(pi->attr); + /* if any of the paths from the peer have the ROUTER + * flag set install the local entry as a router entry + */ + if (is_evpn_prefix_ipaddr_v6(p) && + (pi->attr->es_flags & + ATTR_ES_PEER_ROUTER)) + SET_FLAG(flags, + ZEBRA_MACIP_TYPE_ROUTER_FLAG); + + if (!(pi->attr->es_flags & ATTR_ES_PEER_ACTIVE)) + SET_FLAG(flags, + ZEBRA_MACIP_TYPE_PROXY_ADVERT); + } + ret = bgp_zebra_send_remote_macip( - bgp, vpn, p, pi->attr->nexthop, 1, flags, - mac_mobility_seqnum(pi->attr)); + bgp, vpn, p, pi->attr->nexthop, 1, flags, + seq, bgp_evpn_attr_get_esi(pi->attr)); + } else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) { + ret = bgp_evpn_remote_es_evi_add(bgp, vpn, p); } else { switch (pi->attr->pmsi_tnl_type) { case PMSI_TNLTYPE_INGR_REPL: @@ -976,7 +994,9 @@ static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn, if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) ret = bgp_zebra_send_remote_macip(bgp, vpn, p, remote_vtep_ip, - 0, 0, 0); + 0, 0, 0, NULL); + else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) + ret = bgp_evpn_remote_es_evi_del(bgp, vpn, p); else ret = bgp_zebra_send_remote_vtep(bgp, vpn, p, VXLAN_FLOOD_DISABLED, 0); @@ -991,19 +1011,36 @@ static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn, */ static void evpn_delete_old_local_route(struct bgp *bgp, struct bgpevpn *vpn, struct bgp_dest *dest, - struct bgp_path_info *old_local) + struct bgp_path_info *old_local, + struct bgp_path_info *new_select) { struct bgp_dest *global_dest; struct bgp_path_info *pi; afi_t afi = AFI_L2VPN; safi_t safi = SAFI_EVPN; + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) { + char prefix_buf[PREFIX_STRLEN]; + char esi_buf[ESI_STR_LEN]; + char esi_buf2[ESI_STR_LEN]; + struct prefix_evpn *evp = (struct prefix_evpn *)&dest->p; + + zlog_debug("local path deleted %s es %s; new-path-es %s", + prefix2str(evp, + prefix_buf, sizeof(prefix_buf)), + esi_to_str(&old_local->attr->esi, + esi_buf, sizeof(esi_buf)), + new_select ? esi_to_str(&new_select->attr->esi, + esi_buf2, sizeof(esi_buf2)) : ""); + } + /* Locate route node in the global EVPN routing table. Note that * this table is a 2-level tree (RD-level + Prefix-level) similar to * L3VPN routes. */ - global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - bgp_dest_get_prefix(dest), &vpn->prd); + global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)bgp_dest_get_prefix(dest), + &vpn->prd); if (global_dest) { /* Delete route entry in the global EVPN table. */ delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi); @@ -1020,172 +1057,12 @@ static void evpn_delete_old_local_route(struct bgp *bgp, struct bgpevpn *vpn, bgp_path_info_delete(dest, old_local); } -static struct in_addr *es_vtep_new(struct in_addr vtep) -{ - struct in_addr *ip; - - ip = XCALLOC(MTYPE_BGP_EVPN_ES_VTEP, sizeof(struct in_addr)); - - ip->s_addr = vtep.s_addr; - return ip; -} - -static void es_vtep_free(struct in_addr *ip) -{ - XFREE(MTYPE_BGP_EVPN_ES_VTEP, ip); -} - -/* check if VTEP is already part of the list */ -static int is_vtep_present_in_list(struct list *list, - struct in_addr vtep) -{ - struct listnode *node = NULL; - struct in_addr *tmp; - - for (ALL_LIST_ELEMENTS_RO(list, node, tmp)) { - if (tmp->s_addr == vtep.s_addr) - return 1; - } - return 0; -} - -/* - * Best path for ES route was changed, - * update the list of VTEPs for this ES - */ -static int evpn_es_install_vtep(struct bgp *bgp, struct evpnes *es, - const struct prefix_evpn *p, - struct in_addr rvtep) -{ - struct in_addr *vtep_ip; - - if (is_vtep_present_in_list(es->vtep_list, rvtep)) - return 0; - - - vtep_ip = es_vtep_new(rvtep); - if (vtep_ip) - listnode_add_sort(es->vtep_list, vtep_ip); - return 0; -} - -/* - * Best path for ES route was changed, - * update the list of VTEPs for this ES - */ -static int evpn_es_uninstall_vtep(struct bgp *bgp, - struct evpnes *es, - struct prefix_evpn *p, - struct in_addr rvtep) -{ - struct listnode *node, *nnode, *node_to_del = NULL; - struct in_addr *tmp; - - for (ALL_LIST_ELEMENTS(es->vtep_list, node, nnode, tmp)) { - if (tmp->s_addr == rvtep.s_addr) { - es_vtep_free(tmp); - node_to_del = node; - } - } - - if (node_to_del) - list_delete_node(es->vtep_list, node_to_del); - - return 0; -} - -/* - * Calculate the best path for a ES(type-4) route. - */ -static int evpn_es_route_select_install(struct bgp *bgp, struct evpnes *es, - struct bgp_dest *dest) -{ - int ret = 0; - afi_t afi = AFI_L2VPN; - safi_t safi = SAFI_EVPN; - struct bgp_path_info *old_select; /* old best */ - struct bgp_path_info *new_select; /* new best */ - struct bgp_path_info_pair old_and_new; - - /* Compute the best path. */ - bgp_best_selection(bgp, dest, &bgp->maxpaths[afi][safi], &old_and_new, - afi, safi); - old_select = old_and_new.old; - new_select = old_and_new.new; - - /* - * If the best path hasn't changed - see if something needs to be - * updated - */ - if (old_select && old_select == new_select - && old_select->type == ZEBRA_ROUTE_BGP - && old_select->sub_type == BGP_ROUTE_IMPORTED - && !CHECK_FLAG(dest->flags, BGP_NODE_USER_CLEAR) - && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED) - && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) { - if (bgp_zebra_has_route_changed(old_select)) { - ret = evpn_es_install_vtep( - bgp, es, - (const struct prefix_evpn *)bgp_dest_get_prefix( - dest), - old_select->attr->nexthop); - } - UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG); - UNSET_FLAG(old_select->flags, BGP_PATH_LINK_BW_CHG); - bgp_zebra_clear_route_change_flags(dest); - return ret; - } - - /* If the user did a "clear" this flag will be set */ - UNSET_FLAG(dest->flags, BGP_NODE_USER_CLEAR); - - /* - * bestpath has changed; update relevant fields and install or uninstall - * into the zebra RIB. - */ - if (old_select || new_select) - bgp_bump_version(dest); - - if (old_select) - bgp_path_info_unset_flag(dest, old_select, BGP_PATH_SELECTED); - if (new_select) { - bgp_path_info_set_flag(dest, new_select, BGP_PATH_SELECTED); - bgp_path_info_unset_flag(dest, new_select, - BGP_PATH_ATTR_CHANGED); - UNSET_FLAG(new_select->flags, BGP_PATH_MULTIPATH_CHG); - UNSET_FLAG(new_select->flags, BGP_PATH_LINK_BW_CHG); - } - - if (new_select && new_select->type == ZEBRA_ROUTE_BGP - && new_select->sub_type == BGP_ROUTE_IMPORTED) { - ret = evpn_es_install_vtep( - bgp, es, - (const struct prefix_evpn *)bgp_dest_get_prefix(dest), - new_select->attr->nexthop); - } else { - if (old_select && old_select->type == ZEBRA_ROUTE_BGP - && old_select->sub_type == BGP_ROUTE_IMPORTED) - ret = evpn_es_uninstall_vtep( - bgp, es, - (struct prefix_evpn *)bgp_dest_get_prefix(dest), - old_select->attr->nexthop); - } - - /* Clear any route change flags. */ - bgp_zebra_clear_route_change_flags(dest); - - /* Reap old select bgp_path_info, if it has been removed */ - if (old_select && CHECK_FLAG(old_select->flags, BGP_PATH_REMOVED)) - bgp_path_info_reap(dest, old_select); - - return ret; -} - /* * Calculate the best path for an EVPN route. Install/update best path in zebra, * if appropriate. + * Note: vpn is NULL for local EAD-ES routes. */ -static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, +int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, struct bgp_dest *dest) { struct bgp_path_info *old_select, *new_select; @@ -1201,12 +1078,15 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, new_select = old_and_new.new; /* If the best path hasn't changed - see if there is still something to - * update - * to zebra RIB. + * update to zebra RIB. + * Remote routes and SYNC route (i.e. local routes with + * SYNCED_FROM_PEER flag) need to updated to zebra on any attr + * change. */ if (old_select && old_select == new_select && old_select->type == ZEBRA_ROUTE_BGP - && old_select->sub_type == BGP_ROUTE_IMPORTED + && (old_select->sub_type == BGP_ROUTE_IMPORTED || + bgp_evpn_attr_is_sync(old_select->attr)) && !CHECK_FLAG(dest->flags, BGP_NODE_USER_CLEAR) && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED) && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) { @@ -1241,8 +1121,12 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, UNSET_FLAG(new_select->flags, BGP_PATH_LINK_BW_CHG); } + /* a local entry with the SYNC flag also results in a MAC-IP update + * to zebra + */ if (new_select && new_select->type == ZEBRA_ROUTE_BGP - && new_select->sub_type == BGP_ROUTE_IMPORTED) { + && (new_select->sub_type == BGP_ROUTE_IMPORTED || + bgp_evpn_attr_is_sync(new_select->attr))) { ret = evpn_zebra_install( bgp, vpn, (struct prefix_evpn *)bgp_dest_get_prefix(dest), @@ -1255,10 +1139,13 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, * need to do an implicit delete and withdraw that route from * peers. */ - if (old_select && old_select->peer == bgp->peer_self - && old_select->type == ZEBRA_ROUTE_BGP - && old_select->sub_type == BGP_ROUTE_STATIC) - evpn_delete_old_local_route(bgp, vpn, dest, old_select); + if (new_select->sub_type == BGP_ROUTE_IMPORTED && + old_select && old_select->peer == bgp->peer_self + && old_select->type == ZEBRA_ROUTE_BGP + && old_select->sub_type == BGP_ROUTE_STATIC + && vpn) + evpn_delete_old_local_route(bgp, vpn, dest, + old_select, new_select); } else { if (old_select && old_select->type == ZEBRA_ROUTE_BGP && old_select->sub_type == BGP_ROUTE_IMPORTED) @@ -1279,222 +1166,21 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, return ret; } -/* - * Return true if the local ri for this rn is of type gateway mac - */ -static int evpn_route_is_def_gw(struct bgp *bgp, struct bgp_dest *dest) -{ - struct bgp_path_info *tmp_pi = NULL; - struct bgp_path_info *local_pi = NULL; - - local_pi = NULL; - for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi; - tmp_pi = tmp_pi->next) { - if (tmp_pi->peer == bgp->peer_self - && tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_STATIC) - local_pi = tmp_pi; - } - - if (!local_pi) - return 0; - - return local_pi->attr->default_gw; -} - - -/* - * Return true if the local ri for this rn has sticky set - */ -static int evpn_route_is_sticky(struct bgp *bgp, struct bgp_dest *dest) +static struct bgp_path_info *bgp_evpn_route_get_local_path( + struct bgp *bgp, struct bgp_dest *dest) { struct bgp_path_info *tmp_pi; - struct bgp_path_info *local_pi; - - local_pi = NULL; - for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi; - tmp_pi = tmp_pi->next) { - if (tmp_pi->peer == bgp->peer_self - && tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_STATIC) - local_pi = tmp_pi; - } - - if (!local_pi) - return 0; - - return local_pi->attr->sticky; -} - -/* - * create or update EVPN type4 route entry. - * This could be in the ES table or the global table. - * TODO: handle remote ES (type4) routes as well - */ -static int update_evpn_type4_route_entry(struct bgp *bgp, struct evpnes *es, - afi_t afi, safi_t safi, - struct bgp_dest *dest, - struct attr *attr, int add, - struct bgp_path_info **ri, - int *route_changed) -{ - char buf[ESI_STR_LEN]; - char buf1[INET6_ADDRSTRLEN]; - struct bgp_path_info *tmp_pi = NULL; - struct bgp_path_info *local_pi = NULL; /* local route entry if any */ - struct bgp_path_info *remote_pi = NULL; /* remote route entry if any */ - struct attr *attr_new = NULL; - const struct prefix_evpn *evp = NULL; - - *ri = NULL; - *route_changed = 1; - evp = (const struct prefix_evpn *)bgp_dest_get_prefix(dest); + struct bgp_path_info *local_pi = NULL; - /* locate the local and remote entries if any */ for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi; - tmp_pi = tmp_pi->next) { - if (tmp_pi->peer == bgp->peer_self - && tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_STATIC) + tmp_pi = tmp_pi->next) { + if (bgp_evpn_is_path_local(bgp, tmp_pi)) { local_pi = tmp_pi; - if (tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_IMPORTED - && CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID)) - remote_pi = tmp_pi; - } - - /* we don't expect to see a remote_ri at this point. - * An ES route has esi + vtep_ip as the key, - * We shouldn't see the same route from any other vtep. - */ - if (remote_pi) { - flog_err( - EC_BGP_ES_INVALID, - "%u ERROR: local es route for ESI: %s Vtep %s also learnt from remote", - bgp->vrf_id, - esi_to_str(&evp->prefix.es_addr.esi, buf, sizeof(buf)), - ipaddr2str(&es->originator_ip, buf1, sizeof(buf1))); - return -1; - } - - if (!local_pi && !add) - return 0; - - /* create or update the entry */ - if (!local_pi) { - - /* Add or update attribute to hash */ - attr_new = bgp_attr_intern(attr); - - /* Create new route with its attribute. */ - tmp_pi = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0, - bgp->peer_self, attr_new, dest); - SET_FLAG(tmp_pi->flags, BGP_PATH_VALID); - - /* add the newly created path to the route-node */ - bgp_path_info_add(dest, tmp_pi); - } else { - tmp_pi = local_pi; - if (attrhash_cmp(tmp_pi->attr, attr) - && !CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED)) - *route_changed = 0; - else { - /* The attribute has changed. - * Add (or update) attribute to hash. */ - attr_new = bgp_attr_intern(attr); - bgp_path_info_set_flag(dest, tmp_pi, - BGP_PATH_ATTR_CHANGED); - - /* Restore route, if needed. */ - if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED)) - bgp_path_info_restore(dest, tmp_pi); - - /* Unintern existing, set to new. */ - bgp_attr_unintern(&tmp_pi->attr); - tmp_pi->attr = attr_new; - tmp_pi->uptime = bgp_clock(); + break; } } - /* Return back the route entry. */ - *ri = tmp_pi; - return 0; -} - -/* update evpn es (type-4) route */ -static int update_evpn_type4_route(struct bgp *bgp, - struct evpnes *es, - struct prefix_evpn *p) -{ - int ret = 0; - int route_changed = 0; - char buf[ESI_STR_LEN]; - char buf1[INET6_ADDRSTRLEN]; - afi_t afi = AFI_L2VPN; - safi_t safi = SAFI_EVPN; - struct attr attr; - struct attr *attr_new = NULL; - struct bgp_dest *dest = NULL; - struct bgp_path_info *pi = NULL; - - memset(&attr, 0, sizeof(struct attr)); - - /* Build path-attribute for this route. */ - bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); - attr.nexthop = es->originator_ip.ipaddr_v4; - attr.mp_nexthop_global_in = es->originator_ip.ipaddr_v4; - attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - - /* Set up extended community. */ - build_evpn_type4_route_extcomm(es, &attr); - - /* First, create (or fetch) route node within the ESI. */ - /* NOTE: There is no RD here. */ - dest = bgp_node_get(es->route_table, (struct prefix *)p); - - /* Create or update route entry. */ - ret = update_evpn_type4_route_entry(bgp, es, afi, safi, dest, &attr, 1, - &pi, &route_changed); - if (ret != 0) { - flog_err(EC_BGP_ES_INVALID, - "%u ERROR: Failed to updated ES route ESI: %s VTEP %s", - bgp->vrf_id, - esi_to_str(&p->prefix.es_addr.esi, buf, sizeof(buf)), - ipaddr2str(&es->originator_ip, buf1, sizeof(buf1))); - } - - assert(pi); - attr_new = pi->attr; - - /* Perform route selection; - * this is just to set the flags correctly - * as local route in the ES always wins. - */ - evpn_es_route_select_install(bgp, es, dest); - bgp_dest_unlock_node(dest); - - /* If this is a new route or some attribute has changed, export the - * route to the global table. The route will be advertised to peers - * from there. Note that this table is a 2-level tree (RD-level + - * Prefix-level) similar to L3VPN routes. - */ - if (route_changed) { - struct bgp_path_info *global_pi; - - dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &es->prd); - update_evpn_type4_route_entry(bgp, es, afi, safi, dest, - attr_new, 1, &global_pi, - &route_changed); - - /* Schedule for processing and unlock node. */ - bgp_process(bgp, dest, afi, safi); - bgp_dest_unlock_node(dest); - } - - /* Unintern temporary. */ - aspath_unintern(&attr.aspath); - return 0; + return local_pi; } static int update_evpn_type5_route_entry(struct bgp *bgp_evpn, @@ -1640,8 +1326,9 @@ static int update_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp, build_evpn_type5_route_extcomm(bgp_vrf, &attr); /* get the route node in global table */ - dest = bgp_afi_node_get(bgp_evpn->rib[afi][safi], afi, safi, - (struct prefix *)evp, &bgp_vrf->vrf_prd); + dest = bgp_global_evpn_node_get(bgp_evpn->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)evp, + &bgp_vrf->vrf_prd); assert(dest); /* create or update the route entry within the route node */ @@ -1660,15 +1347,137 @@ static int update_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp, return 0; } +static void bgp_evpn_get_sync_info(struct bgp *bgp, esi_t *esi, + struct bgp_node *rn, uint32_t loc_seq, uint32_t *max_sync_seq, + bool *active_on_peer, bool *peer_router, + bool *proxy_from_peer) +{ + struct bgp_path_info *tmp_pi; + struct bgp_path_info *second_best_path = NULL; + uint32_t tmp_mm_seq = 0; + esi_t *tmp_esi; + int paths_eq; + + /* find the best non-local path. a local path can only be present + * as best path + */ + for (tmp_pi = bgp_dest_get_bgp_path_info(rn); tmp_pi; + tmp_pi = tmp_pi->next) { + if (tmp_pi->sub_type != BGP_ROUTE_IMPORTED || + !CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID)) + continue; + + if (bgp_evpn_path_info_cmp(bgp, tmp_pi, + second_best_path, &paths_eq)) + second_best_path = tmp_pi; + } + + if (!second_best_path) + return; + + tmp_esi = bgp_evpn_attr_get_esi(second_best_path->attr); + /* if this has the same ES desination as the local path + * it is a sync path + */ + if (!memcmp(esi, tmp_esi, sizeof(esi_t))) { + tmp_mm_seq = mac_mobility_seqnum(second_best_path->attr); + if (tmp_mm_seq < loc_seq) + return; + + /* we have a non-proxy path from the ES peer. */ + if (second_best_path->attr->es_flags & + ATTR_ES_PROXY_ADVERT) { + *proxy_from_peer = true; + } else { + *active_on_peer = true; + } + + if (second_best_path->attr->router_flag) + *peer_router = true; + + /* we use both proxy and non-proxy imports to + * determine the max sync sequence + */ + if (tmp_mm_seq > *max_sync_seq) + *max_sync_seq = tmp_mm_seq; + } +} + +/* Bubble up sync-info from all paths (non-best) to the local-path. + * This is need for MM sequence number syncing and proxy advertisement. + * Note: The local path can only exist as a best path in the + * VPN route table. It will take precedence over all sync paths. + */ +static void update_evpn_route_entry_sync_info(struct bgp *bgp, + struct bgp_node *rn, struct attr *attr, uint32_t loc_seq, + bool setup_sync) +{ + esi_t *esi; + struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; + + if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + return; + + esi = bgp_evpn_attr_get_esi(attr); + if (bgp_evpn_is_esi_valid(esi)) { + if (setup_sync) { + uint32_t max_sync_seq = 0; + bool active_on_peer = false; + bool peer_router = false; + bool proxy_from_peer = false; + + bgp_evpn_get_sync_info(bgp, esi, rn, loc_seq, + &max_sync_seq, &active_on_peer, + &peer_router, &proxy_from_peer); + attr->mm_sync_seqnum = max_sync_seq; + if (active_on_peer) + attr->es_flags |= ATTR_ES_PEER_ACTIVE; + else + attr->es_flags &= ~ATTR_ES_PEER_ACTIVE; + if (proxy_from_peer) + attr->es_flags |= ATTR_ES_PEER_PROXY; + else + attr->es_flags &= ~ATTR_ES_PEER_PROXY; + if (peer_router) + attr->es_flags |= ATTR_ES_PEER_ROUTER; + else + attr->es_flags &= ~ATTR_ES_PEER_ROUTER; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) { + char prefix_buf[PREFIX_STRLEN]; + char esi_buf[ESI_STR_LEN]; + + zlog_debug("setup sync info for %s es %s max_seq %d %s%s%s", + prefix2str(evp, prefix_buf, + sizeof(prefix_buf)), + esi_to_str(esi, esi_buf, + sizeof(esi_buf)), + max_sync_seq, + (attr->es_flags & ATTR_ES_PEER_ACTIVE) ? + "peer-active " : "", + (attr->es_flags & ATTR_ES_PEER_PROXY) ? + "peer-proxy " : "", + (attr->es_flags & ATTR_ES_PEER_ROUTER) ? + "peer-router " : ""); + } + } + } else { + attr->mm_sync_seqnum = 0; + attr->es_flags &= ~ATTR_ES_PEER_ACTIVE; + attr->es_flags &= ~ATTR_ES_PEER_PROXY; + } +} + /* * Create or update EVPN route entry. This could be in the VNI route table * or the global route table. */ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, - afi_t afi, safi_t safi, - struct bgp_dest *dest, struct attr *attr, - int add, struct bgp_path_info **pi, - uint8_t flags, uint32_t seq) + afi_t afi, safi_t safi, struct bgp_dest *dest, + struct attr *attr, int add, + struct bgp_path_info **pi, uint8_t flags, + uint32_t seq, bool setup_sync, + bool *old_is_sync) { struct bgp_path_info *tmp_pi; struct bgp_path_info *local_pi; @@ -1684,14 +1493,7 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, memset(&label, 0, sizeof(label)); /* See if this is an update of an existing route, or a new add. */ - local_pi = NULL; - for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi; - tmp_pi = tmp_pi->next) { - if (tmp_pi->peer == bgp->peer_self - && tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_STATIC) - local_pi = tmp_pi; - } + local_pi = bgp_evpn_route_get_local_path(bgp, dest); /* If route doesn't exist already, create a new one, if told to. * Otherwise act based on whether the attributes of the route have @@ -1700,6 +1502,14 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, if (!local_pi && !add) return 0; + if (old_is_sync && local_pi) + *old_is_sync = bgp_evpn_attr_is_sync(local_pi->attr); + + /* if a local path is being added with a non-zero esi look + * for SYNC paths from ES peers and bubble up the sync-info + */ + update_evpn_route_entry_sync_info(bgp, dest, attr, seq, setup_sync); + /* For non-GW MACs, update MAC mobility seq number, if needed. */ if (seq && !CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW)) add_mac_mobility_to_attr(seq, attr); @@ -1811,11 +1621,11 @@ static void evpn_zebra_reinstall_best_route(struct bgp *bgp, } if (curr_select && curr_select->type == ZEBRA_ROUTE_BGP - && curr_select->sub_type == BGP_ROUTE_IMPORTED) - evpn_zebra_install( - bgp, vpn, - (const struct prefix_evpn *)bgp_dest_get_prefix(dest), - curr_select); + && (curr_select->sub_type == BGP_ROUTE_IMPORTED || + bgp_evpn_attr_is_sync(curr_select->attr))) + evpn_zebra_install(bgp, vpn, + (const struct prefix_evpn *)bgp_dest_get_prefix(dest), + curr_select); } /* @@ -1842,7 +1652,7 @@ static void evpn_cleanup_local_non_best_route(struct bgp *bgp, zlog_debug("evicting local evpn prefix %pRN as remote won", dest); - evpn_delete_old_local_route(bgp, vpn, dest, local_pi); + evpn_delete_old_local_route(bgp, vpn, dest, local_pi, NULL); bgp_path_info_reap(dest, local_pi); /* tell zebra to re-add the best remote path */ @@ -1855,7 +1665,7 @@ static void evpn_cleanup_local_non_best_route(struct bgp *bgp, */ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, struct prefix_evpn *p, uint8_t flags, - uint32_t seq) + uint32_t seq, esi_t *esi) { struct bgp_dest *dest; struct attr attr; @@ -1865,6 +1675,7 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, afi_t afi = AFI_L2VPN; safi_t safi = SAFI_EVPN; int route_change; + bool old_is_sync = false; memset(&attr, 0, sizeof(struct attr)); @@ -1877,6 +1688,13 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, attr.default_gw = CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW) ? 1 : 0; attr.router_flag = CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG) ? 1 : 0; + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) + attr.es_flags |= ATTR_ES_PROXY_ADVERT; + + if (esi && bgp_evpn_is_esi_valid(esi)) { + memcpy(&attr.esi, esi, sizeof(esi_t)); + attr.es_flags |= ATTR_ES_IS_LOCAL; + } /* PMSI is only needed for type-3 routes */ if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) { @@ -1884,6 +1702,21 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, attr.pmsi_tnl_type = PMSI_TNLTYPE_INGR_REPL; } + if (bgp_debug_zebra(NULL)) { + char buf[ETHER_ADDR_STRLEN]; + char buf1[PREFIX_STRLEN]; + char buf3[ESI_STR_LEN]; + + zlog_debug("VRF %s vni %u type-2 route evp %s RMAC %s nexthop %s esi %s", + vpn->bgp_vrf ? + vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ", + vpn->vni, + prefix2str(p, buf1, sizeof(buf1)), + prefix_mac2str(&attr.rmac, buf, + sizeof(buf)), + inet_ntoa(attr.mp_nexthop_global_in), + esi_to_str(esi, buf3, sizeof(buf3))); + } /* router mac is only needed for type-2 routes here. */ if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) { uint8_t af_flags = 0; @@ -1892,20 +1725,6 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, SET_FLAG(af_flags, BGP_EVPN_MACIP_TYPE_SVI_IP); bgp_evpn_get_rmac_nexthop(vpn, p, &attr, af_flags); - - if (bgp_debug_zebra(NULL)) { - char buf[ETHER_ADDR_STRLEN]; - char buf1[PREFIX_STRLEN]; - - zlog_debug("VRF %s vni %u type-2 route evp %s RMAC %s nexthop %s", - vpn->bgp_vrf ? - vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ", - vpn->vni, - prefix2str(p, buf1, sizeof(buf1)), - prefix_mac2str(&attr.rmac, buf, - sizeof(buf)), - inet_ntoa(attr.mp_nexthop_global_in)); - } } vni2label(vpn->vni, &(attr.label)); @@ -1930,7 +1749,8 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, /* Create or update route entry. */ route_change = update_evpn_route_entry(bgp, vpn, afi, safi, dest, &attr, - 1, &pi, flags, seq); + 1, &pi, flags, seq, + true /* setup_sync */, &old_is_sync); assert(pi); attr_new = pi->attr; @@ -1951,9 +1771,25 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, * to re-add the best remote dest. BGP doesn't retain non-best local * routes. */ - if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) { + if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { route_change = 0; - evpn_cleanup_local_non_best_route(bgp, vpn, dest, pi); + } else { + if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) { + route_change = 0; + evpn_cleanup_local_non_best_route(bgp, vpn, dest, pi); + } else { + bool new_is_sync; + + /* If the local path already existed and is still the + * best path we need to also check if it transitioned + * from being a sync path to a non-sync path. If it + * it did we need to notify zebra that the sync-path + * has been removed. + */ + new_is_sync = bgp_evpn_attr_is_sync(pi->attr); + if (!new_is_sync && old_is_sync) + evpn_zebra_uninstall(bgp, vpn, p, zero_vtep_ip); + } } bgp_path_info_unlock(pi); @@ -1967,10 +1803,12 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, if (route_change) { struct bgp_path_info *global_pi; - dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &vpn->prd); + dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)p, + &vpn->prd); update_evpn_route_entry(bgp, vpn, afi, safi, dest, attr_new, 1, - &global_pi, flags, seq); + &global_pi, flags, seq, + false /* setup_sync */, NULL /* old_is_sync */); /* Schedule for processing and unlock node. */ bgp_process(bgp, dest, afi, safi); @@ -1987,7 +1825,7 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, * Delete EVPN route entry. * The entry can be in ESI/VNI table or the global table. */ -static void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi, +void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi, struct bgp_dest *dest, struct bgp_path_info **pi) { @@ -2010,56 +1848,6 @@ static void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi, bgp_path_info_delete(dest, tmp_pi); } - - -/* Delete EVPN ES (type-4) route */ -static int delete_evpn_type4_route(struct bgp *bgp, - struct evpnes *es, - struct prefix_evpn *p) -{ - afi_t afi = AFI_L2VPN; - safi_t safi = SAFI_EVPN; - struct bgp_path_info *pi; - struct bgp_dest *dest = NULL; /* dest in esi table */ - struct bgp_dest *global_dest = NULL; /* dest in global table */ - - /* First, locate the route node within the ESI. - * If it doesn't exist, ther is nothing to do. - * Note: there is no RD here. - */ - dest = bgp_node_lookup(es->route_table, (struct prefix *)p); - if (!dest) - return 0; - - /* Next, locate route node in the global EVPN routing table. - * Note that this table is a 2-level tree (RD-level + Prefix-level) - */ - global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &es->prd); - if (global_dest) { - - /* Delete route entry in the global EVPN table. */ - delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi); - - /* Schedule for processing - withdraws to peers happen from - * this table. - */ - if (pi) - bgp_process(bgp, global_dest, afi, safi); - bgp_dest_unlock_node(global_dest); - } - - /* - * Delete route entry in the ESI route table. - * This can just be removed. - */ - delete_evpn_route_entry(bgp, afi, safi, dest, &pi); - if (pi) - bgp_path_info_reap(dest, pi); - bgp_dest_unlock_node(dest); - return 0; -} - /* Delete EVPN type5 route */ static int delete_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp) { @@ -2074,8 +1862,8 @@ static int delete_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp) return 0; /* locate the global route entry for this type-5 prefix */ - dest = bgp_afi_node_lookup(bgp_evpn->rib[afi][safi], afi, safi, - (struct prefix *)evp, &bgp_vrf->vrf_prd); + dest = bgp_global_evpn_node_lookup(bgp_evpn->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)evp, &bgp_vrf->vrf_prd); if (!dest) return 0; @@ -2111,8 +1899,8 @@ static int delete_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, * this table is a 2-level tree (RD-level + Prefix-level) similar to * L3VPN routes. */ - global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &vpn->prd); + global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)p, &vpn->prd); if (global_dest) { /* Delete route entry in the global EVPN table. */ delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi); @@ -2137,139 +1925,177 @@ static int delete_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, return 0; } +static void bgp_evpn_update_type2_route_entry(struct bgp *bgp, + struct bgpevpn *vpn, struct bgp_node *rn, + struct bgp_path_info *local_pi, const char *caller) +{ + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct bgp_path_info *pi; + struct attr attr; + struct attr *attr_new; + uint32_t seq; + int add_l3_ecomm = 0; + struct bgp_node *global_rn; + struct bgp_path_info *global_pi; + struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; + int route_change; + bool old_is_sync = false; + + if (CHECK_FLAG(local_pi->flags, BGP_PATH_REMOVED)) + return; + + /* + * Build attribute per local route as the MAC mobility and + * some other values could differ for different routes. The + * attributes will be shared in the hash table. + */ + bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); + attr.nexthop = vpn->originator_ip; + attr.mp_nexthop_global_in = vpn->originator_ip; + attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + attr.sticky = (local_pi->attr->sticky) ? 1 : 0; + attr.router_flag = (local_pi->attr->router_flag) ? 1 : 0; + attr.es_flags = local_pi->attr->es_flags; + if (local_pi->attr->default_gw) { + attr.default_gw = 1; + if (is_evpn_prefix_ipaddr_v6(evp)) + attr.router_flag = 1; + } + memcpy(&attr.esi, &local_pi->attr->esi, sizeof(esi_t)); + bgp_evpn_get_rmac_nexthop(vpn, evp, &attr, + local_pi->extra->af_flags); + vni2label(vpn->vni, &(attr.label)); + /* Add L3 VNI RTs and RMAC for non IPv6 link-local if + * using L3 VNI for type-2 routes also. + */ + if ((is_evpn_prefix_ipaddr_v4(evp) || + !IN6_IS_ADDR_LINKLOCAL( + &evp->prefix.macip_addr.ip.ipaddr_v6)) && + CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS) && + bgpevpn_get_l3vni(vpn)) + add_l3_ecomm = 1; + + /* Set up extended community. */ + build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm); + seq = mac_mobility_seqnum(local_pi->attr); + + if (bgp_debug_zebra(NULL)) { + char buf[ETHER_ADDR_STRLEN]; + char buf1[PREFIX_STRLEN]; + char buf3[ESI_STR_LEN]; + + zlog_debug("VRF %s vni %u evp %s RMAC %s nexthop %s esi %s esf 0x%x from %s", + vpn->bgp_vrf ? + vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ", + vpn->vni, + prefix2str(evp, buf1, sizeof(buf1)), + prefix_mac2str(&attr.rmac, buf, sizeof(buf)), + inet_ntoa(attr.mp_nexthop_global_in), + esi_to_str(&attr.esi, buf3, sizeof(buf3)), + attr.es_flags, caller); + } + + /* Update the route entry. */ + route_change = update_evpn_route_entry(bgp, vpn, afi, safi, + rn, &attr, 0, &pi, 0, seq, + true /* setup_sync */, &old_is_sync); + + assert(pi); + attr_new = pi->attr; + /* lock ri to prevent freeing in evpn_route_select_install */ + bgp_path_info_lock(pi); + + /* Perform route selection. Normally, the local route in the + * VNI is expected to win and be the best route. However, + * under peculiar situations (e.g., tunnel (next hop) IP change + * that causes best selection to be based on next hop), a + * remote route could win. If the local route is the best, + * ensure it is updated in the global EVPN route table and + * advertised to peers; otherwise, ensure it is evicted and + * (re)install the remote route into zebra. + */ + evpn_route_select_install(bgp, vpn, rn); + + if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { + route_change = 0; + } else { + if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) { + route_change = 0; + evpn_cleanup_local_non_best_route(bgp, vpn, rn, pi); + } else { + bool new_is_sync; + + /* If the local path already existed and is still the + * best path we need to also check if it transitioned + * from being a sync path to a non-sync path. If it + * it did we need to notify zebra that the sync-path + * has been removed. + */ + new_is_sync = bgp_evpn_attr_is_sync(pi->attr); + if (!new_is_sync && old_is_sync) + evpn_zebra_uninstall(bgp, vpn, + evp, zero_vtep_ip); + } + } + + + /* unlock pi */ + bgp_path_info_unlock(pi); + + if (route_change) { + /* Update route in global routing table. */ + global_rn = bgp_global_evpn_node_get(bgp->rib[afi][safi], + afi, safi, evp, &vpn->prd); + assert(global_rn); + update_evpn_route_entry(bgp, vpn, afi, safi, global_rn, + attr_new, 0, &global_pi, 0, + mac_mobility_seqnum(attr_new), + false /* setup_sync */, NULL /* old_is_sync */); + + /* Schedule for processing and unlock node. */ + bgp_process(bgp, global_rn, afi, safi); + bgp_dest_unlock_node(global_rn); + } + + /* Unintern temporary. */ + aspath_unintern(&attr.aspath); +} + /* * Update all type-2 (MACIP) local routes for this VNI - these should also * be scheduled for advertise to peers. */ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) { - afi_t afi; - safi_t safi; struct bgp_dest *dest; - struct bgp_path_info *pi, *tmp_pi; - struct attr attr; - struct attr *attr_new; - uint32_t seq; - int add_l3_ecomm = 0; - - afi = AFI_L2VPN; - safi = SAFI_EVPN; + struct bgp_path_info *tmp_pi; /* Walk this VNI's route table and update local type-2 routes. For any * routes updated, update corresponding entry in the global table too. */ for (dest = bgp_table_top(vpn->route_table); dest; - dest = bgp_route_next(dest)) { + dest = bgp_route_next(dest)) { const struct prefix_evpn *evp = (const struct prefix_evpn *)bgp_dest_get_prefix(dest); - struct bgp_dest *rd_dest; - struct bgp_path_info *global_pi; if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) continue; /* Identify local route. */ for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi; - tmp_pi = tmp_pi->next) { + tmp_pi = tmp_pi->next) { if (tmp_pi->peer == bgp->peer_self - && tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_STATIC) + && tmp_pi->type == ZEBRA_ROUTE_BGP + && tmp_pi->sub_type == BGP_ROUTE_STATIC) break; } if (!tmp_pi) continue; - /* - * Build attribute per local route as the MAC mobility and - * some other values could differ for different routes. The - * attributes will be shared in the hash table. - */ - bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); - attr.nexthop = vpn->originator_ip; - attr.mp_nexthop_global_in = vpn->originator_ip; - attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - bgp_evpn_get_rmac_nexthop(vpn, evp, &attr, - tmp_pi->extra->af_flags); - - if (evpn_route_is_sticky(bgp, dest)) - attr.sticky = 1; - else if (evpn_route_is_def_gw(bgp, dest)) { - attr.default_gw = 1; - if (is_evpn_prefix_ipaddr_v6(evp)) - attr.router_flag = 1; - } - - if (bgp_debug_zebra(NULL)) { - char buf[ETHER_ADDR_STRLEN]; - char buf1[PREFIX_STRLEN]; - - zlog_debug("VRF %s vni %u evp %s RMAC %s nexthop %s", - vpn->bgp_vrf ? - vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ", - vpn->vni, - prefix2str(evp, buf1, sizeof(buf1)), - prefix_mac2str(&attr.rmac, buf, sizeof(buf)), - inet_ntoa(attr.mp_nexthop_global_in)); - } - - /* Add L3 VNI RTs and RMAC for non IPv6 link-local if - * using L3 VNI for type-2 routes also. - */ - if ((is_evpn_prefix_ipaddr_v4(evp) || - !IN6_IS_ADDR_LINKLOCAL( - &evp->prefix.macip_addr.ip.ipaddr_v6)) && - CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS) && - bgpevpn_get_l3vni(vpn)) - add_l3_ecomm = 1; - - /* Set up extended community. */ - build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm); - - seq = mac_mobility_seqnum(tmp_pi->attr); - - /* Update the route entry. */ - update_evpn_route_entry(bgp, vpn, afi, safi, dest, &attr, 0, - &pi, 0, seq); - - /* lock ri to prevent freeing in evpn_route_select_install */ - bgp_path_info_lock(pi); - - /* Perform route selection. Normally, the local route in the - * VNI is expected to win and be the best route. However, - * under peculiar situations (e.g., tunnel (next hop) IP change - * that causes best selection to be based on next hop), a - * remote route could win. If the local route is the best, - * ensure it is updated in the global EVPN route table and - * advertised to peers; otherwise, ensure it is evicted and - * (re)install the remote route into zebra. - */ - evpn_route_select_install(bgp, vpn, dest); - if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) { - evpn_cleanup_local_non_best_route(bgp, vpn, dest, pi); - /* unlock pi */ - bgp_path_info_unlock(pi); - } else { - attr_new = pi->attr; - /* unlock pi */ - bgp_path_info_unlock(pi); - - /* Update route in global routing table. */ - rd_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, - safi, (struct prefix *)evp, - &vpn->prd); - assert(rd_dest); - update_evpn_route_entry(bgp, vpn, afi, safi, rd_dest, - attr_new, 0, &global_pi, 0, - mac_mobility_seqnum(attr_new)); - - /* Schedule for processing and unlock node. */ - bgp_process(bgp, rd_dest, afi, safi); - bgp_dest_unlock_node(rd_dest); - } - - /* Unintern temporary. */ - aspath_unintern(&attr.aspath); + bgp_evpn_update_type2_route_entry(bgp, vpn, dest, tmp_pi, + __func__); } return 0; @@ -2356,27 +2182,6 @@ static int delete_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) } /* - * Delete all routes in per ES route-table - */ -static int delete_all_es_routes(struct bgp *bgp, struct evpnes *es) -{ - struct bgp_dest *dest; - struct bgp_path_info *pi, *nextpi; - - /* Walk this ES's route table and delete all routes. */ - for (dest = bgp_table_top(es->route_table); dest; - dest = bgp_route_next(dest)) { - for (pi = bgp_dest_get_bgp_path_info(dest); - (pi != NULL) && (nextpi = pi->next, 1); pi = nextpi) { - bgp_path_info_delete(dest, pi); - bgp_path_info_reap(dest, pi); - } - } - - return 0; -} - -/* * Delete all routes in the per-VNI route table. */ static int delete_all_vni_routes(struct bgp *bgp, struct bgpevpn *vpn) @@ -2434,7 +2239,7 @@ int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) if (bgp_evpn_vni_flood_mode_get(bgp, vpn) == VXLAN_FLOOD_HEAD_END_REPL) { build_evpn_type3_prefix(&p, vpn->originator_ip); - ret = update_evpn_route(bgp, vpn, &p, 0, 0); + ret = update_evpn_route(bgp, vpn, &p, 0, 0, NULL); if (ret) return ret; } @@ -2442,29 +2247,6 @@ int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) return update_all_type2_routes(bgp, vpn); } -/* Delete (and withdraw) local routes for specified ES from global and ES table. - * Also remove all other routes from the per ES table. - * Invoked when ES is deleted. - */ -static int delete_routes_for_es(struct bgp *bgp, struct evpnes *es) -{ - int ret; - char buf[ESI_STR_LEN]; - struct prefix_evpn p; - - /* Delete and withdraw locally learnt ES route */ - build_evpn_type4_prefix(&p, &es->esi, es->originator_ip.ipaddr_v4); - ret = delete_evpn_type4_route(bgp, es, &p); - if (ret) { - flog_err(EC_BGP_EVPN_ROUTE_DELETE, - "%u failed to delete type-4 route for ESI %s", - bgp->vrf_id, esi_to_str(&es->esi, buf, sizeof(buf))); - } - - /* Delete all routes from per ES table */ - return delete_all_es_routes(bgp, es); -} - /* * Delete (and withdraw) local routes for specified VNI from the global * table and per-VNI table. After this, remove all other routes from @@ -2574,68 +2356,6 @@ bgp_create_evpn_bgp_path_info(struct bgp_path_info *parent_pi, return pi; } -/* Install EVPN route entry in ES */ -static int install_evpn_route_entry_in_es(struct bgp *bgp, struct evpnes *es, - const struct prefix_evpn *p, - struct bgp_path_info *parent_pi) -{ - int ret = 0; - struct bgp_dest *dest = NULL; - struct bgp_path_info *pi = NULL; - struct attr *attr_new = NULL; - - /* Create (or fetch) route within the VNI. - * NOTE: There is no RD here. - */ - dest = bgp_node_get(es->route_table, (struct prefix *)p); - - /* Check if route entry is already present. */ - for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) - if (pi->extra - && (struct bgp_path_info *)pi->extra->parent == parent_pi) - break; - - if (!pi) { - /* Add (or update) attribute to hash. */ - attr_new = bgp_attr_intern(parent_pi->attr); - - /* Create new route with its attribute. */ - pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0, - parent_pi->peer, attr_new, dest); - SET_FLAG(pi->flags, BGP_PATH_VALID); - bgp_path_info_extra_get(pi); - pi->extra->parent = bgp_path_info_lock(parent_pi); - bgp_dest_lock_node((struct bgp_dest *)parent_pi->net); - bgp_path_info_add(dest, pi); - } else { - if (attrhash_cmp(pi->attr, parent_pi->attr) - && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { - bgp_dest_unlock_node(dest); - return 0; - } - /* The attribute has changed. */ - /* Add (or update) attribute to hash. */ - attr_new = bgp_attr_intern(parent_pi->attr); - - /* Restore route, if needed. */ - if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) - bgp_path_info_restore(dest, pi); - - /* Mark if nexthop has changed. */ - if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop)) - SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED); - - /* Unintern existing, set to new. */ - bgp_attr_unintern(&pi->attr); - pi->attr = attr_new; - pi->uptime = bgp_clock(); - } - - /* Perform route selection and update zebra, if required. */ - ret = evpn_es_route_select_install(bgp, es, dest); - return ret; -} - /* * Install route entry into the VRF routing table and invoke route selection. */ @@ -2761,8 +2481,17 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, { struct bgp_dest *dest; struct bgp_path_info *pi; + struct bgp_path_info *local_pi; struct attr *attr_new; int ret; + struct prefix_evpn ad_evp; + + /* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ + if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) + p = evpn_type1_prefix_vni_copy(&ad_evp, p, + parent_pi->attr->nexthop); /* Create (or fetch) route within the VNI. */ /* NOTE: There is no RD here. */ @@ -2805,46 +2534,16 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, /* Perform route selection and update zebra, if required. */ ret = evpn_route_select_install(bgp, vpn, dest); - bgp_dest_unlock_node(dest); - - return ret; -} - -/* Uninstall EVPN route entry from ES route table */ -static int uninstall_evpn_route_entry_in_es(struct bgp *bgp, struct evpnes *es, - const struct prefix_evpn *p, - struct bgp_path_info *parent_pi) -{ - int ret; - struct bgp_dest *dest; - struct bgp_path_info *pi; - - if (!es->route_table) - return 0; - - /* Locate route within the ESI. - * NOTE: There is no RD here. + /* if the best path is a local path with a non-zero ES + * sync info against the local path may need to be updated + * when a remote path is added/updated (including changes + * from sync-path to remote-path) */ - dest = bgp_node_lookup(es->route_table, (struct prefix *)p); - if (!dest) - return 0; - - /* Find matching route entry. */ - for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) - if (pi->extra - && (struct bgp_path_info *)pi->extra->parent == parent_pi) - break; - - if (!pi) - return 0; - - /* Mark entry for deletion */ - bgp_path_info_delete(dest, pi); + local_pi = bgp_evpn_route_get_local_path(bgp, dest); + if (local_pi && bgp_evpn_attr_is_local_es(local_pi->attr)) + bgp_evpn_update_type2_route_entry(bgp, vpn, dest, local_pi, + __func__); - /* Perform route selection and update zebra, if required. */ - ret = evpn_es_route_select_install(bgp, es, dest); - - /* Unlock route node. */ bgp_dest_unlock_node(dest); return ret; @@ -2936,7 +2635,16 @@ static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, { struct bgp_dest *dest; struct bgp_path_info *pi; + struct bgp_path_info *local_pi; int ret; + struct prefix_evpn ad_evp; + + /* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ + if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) + p = evpn_type1_prefix_vni_copy(&ad_evp, p, + parent_pi->attr->nexthop); /* Locate route within the VNI. */ /* NOTE: There is no RD here. */ @@ -2959,6 +2667,15 @@ static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, /* Perform route selection and update zebra, if required. */ ret = evpn_route_select_install(bgp, vpn, dest); + /* if the best path is a local path with a non-zero ES + * sync info against the local path may need to be updated + * when a remote path is deleted + */ + local_pi = bgp_evpn_route_get_local_path(bgp, dest); + if (local_pi && bgp_evpn_attr_is_local_es(local_pi->attr)) + bgp_evpn_update_type2_route_entry(bgp, vpn, dest, local_pi, + __func__); + /* Unlock route node. */ bgp_dest_unlock_node(dest); @@ -2966,22 +2683,6 @@ static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, } /* - * Given a prefix, see if it belongs to ES. - */ -static int is_prefix_matching_for_es(const struct prefix_evpn *p, - struct evpnes *es) -{ - /* if not an ES route return false */ - if (p->prefix.route_type != BGP_EVPN_ES_ROUTE) - return 0; - - if (memcmp(&p->prefix.es_addr.esi, &es->esi, sizeof(esi_t)) == 0) - return 1; - - return 0; -} - -/* * Given a route entry and a VRF, see if this route entry should be * imported into the VRF i.e., RTs match. */ @@ -3115,78 +2816,6 @@ static int is_route_matching_for_vni(struct bgp *bgp, struct bgpevpn *vpn, return 0; } -static int install_uninstall_routes_for_es(struct bgp *bgp, - struct evpnes *es, - int install) -{ - int ret; - afi_t afi; - safi_t safi; - char buf[PREFIX_STRLEN]; - char buf1[ESI_STR_LEN]; - struct bgp_dest *rd_dest, *dest; - struct bgp_table *table; - struct bgp_path_info *pi; - - afi = AFI_L2VPN; - safi = SAFI_EVPN; - - /* - * Walk entire global routing table and evaluate routes which could be - * imported into this VRF. Note that we need to loop through all global - * routes to determine which route matches the import rt on vrf - */ - for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest; - rd_dest = bgp_route_next(rd_dest)) { - table = bgp_dest_get_bgp_table_info(rd_dest); - if (!table) - continue; - - for (dest = bgp_table_top(table); dest; - dest = bgp_route_next(dest)) { - const struct prefix_evpn *evp = - (const struct prefix_evpn *)bgp_dest_get_prefix( - dest); - - for (pi = bgp_dest_get_bgp_path_info(dest); pi; - pi = pi->next) { - /* - * Consider "valid" remote routes applicable for - * this ES. - */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - continue; - - if (!is_prefix_matching_for_es(evp, es)) - continue; - - if (install) - ret = install_evpn_route_entry_in_es( - bgp, es, evp, pi); - else - ret = uninstall_evpn_route_entry_in_es( - bgp, es, evp, pi); - - if (ret) { - flog_err( - EC_BGP_EVPN_FAIL, - "Failed to %s EVPN %s route in ESI %s", - install ? "install" - : "uninstall", - prefix2str(evp, buf, - sizeof(buf)), - esi_to_str(&es->esi, buf1, - sizeof(buf1))); - return ret; - } - } - } - } - return 0; -} - /* This API will scan evpn routes for checking attribute's rmac * macthes with bgp instance router mac. It avoid installing * route into bgp vrf table and remote rmac in bridge table. @@ -3390,15 +3019,6 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp, return 0; } -/* Install any existing remote ES routes applicable for this ES into its routing - * table. This is invoked when ES comes up. - */ -static int install_routes_for_es(struct bgp *bgp, struct evpnes *es) -{ - return install_uninstall_routes_for_es(bgp, es, 1); -} - - /* Install any existing remote routes applicable for this VRF into VRF RIB. This * is invoked upon l3vni-add or l3vni import rt change */ @@ -3425,6 +3045,11 @@ static int install_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) if (ret) return ret; + ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE, + 1); + if (ret) + return ret; + return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_MAC_IP_ROUTE, 1); } @@ -3453,33 +3078,14 @@ static int uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) if (ret) return ret; - return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE, - 0); -} + ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE, + 1); + if (ret) + return ret; -/* Install or unistall route in ES */ -static int install_uninstall_route_in_es(struct bgp *bgp, struct evpnes *es, - afi_t afi, safi_t safi, - struct prefix_evpn *evp, - struct bgp_path_info *pi, int install) -{ - int ret = 0; - char buf[ESI_STR_LEN]; - if (install) - ret = install_evpn_route_entry_in_es(bgp, es, evp, pi); - else - ret = uninstall_evpn_route_entry_in_es(bgp, es, evp, pi); - - if (ret) { - flog_err( - EC_BGP_EVPN_FAIL, - "%u: Failed to %s EVPN %s route in ESI %s", bgp->vrf_id, - install ? "install" : "uninstall", "ES", - esi_to_str(&evp->prefix.es_addr.esi, buf, sizeof(buf))); - return ret; - } - return 0; + return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE, + 0); } /* @@ -3576,6 +3182,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, struct attr *attr = pi->attr; struct ecommunity *ecom; int i; + struct prefix_evpn ad_evp; assert(attr); @@ -3583,6 +3190,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE || evp->prefix.route_type == BGP_EVPN_IMET_ROUTE || evp->prefix.route_type == BGP_EVPN_ES_ROUTE + || evp->prefix.route_type == BGP_EVPN_AD_ROUTE || evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE)) return 0; @@ -3590,6 +3198,12 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, if (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES))) return 0; + /* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) + evp = evpn_type1_prefix_vni_copy(&ad_evp, evp, attr->nexthop); + ecom = attr->ecommunity; if (!ecom || !ecom->size) return -1; @@ -3603,7 +3217,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, struct ecommunity_val eval_tmp; struct irt_node *irt; /* import rt for l2vni */ struct vrf_irt_node *vrf_irt; /* import rt for l3vni */ - struct evpnes *es; + struct bgp_evpn_es *es; /* Only deal with RTs */ pnt = (ecom->val + (i * ECOMMUNITY_SIZE)); @@ -3621,6 +3235,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, */ if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE || evp->prefix.route_type == BGP_EVPN_IMET_ROUTE || + evp->prefix.route_type == BGP_EVPN_AD_ROUTE || evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) { irt = lookup_import_rt(bgp, eval); @@ -3668,9 +3283,9 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, /* we will match based on the entire esi to avoid * imoort of an es route for esi2 into esi1 */ - es = bgp_evpn_lookup_es(bgp, &evp->prefix.es_addr.esi); - if (es && is_es_local(es)) - install_uninstall_route_in_es( + es = bgp_evpn_es_find(&evp->prefix.es_addr.esi); + if (es && bgp_evpn_is_es_local(es)) + bgp_evpn_es_route_install_uninstall( bgp, es, afi, safi, evp, pi, import); } } @@ -3804,10 +3419,11 @@ static int update_advertise_vni_routes(struct bgp *bgp, struct bgpevpn *vpn) return 0; attr = pi->attr; - global_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)&p, &vpn->prd); + global_dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], + afi, safi, &p, &vpn->prd); update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr, - 1, &pi, 0, mac_mobility_seqnum(attr)); + 1, &pi, 0, mac_mobility_seqnum(attr), + false /* setup_sync */, NULL /* old_is_sync */); /* Schedule for processing and unlock node. */ bgp_process(bgp, global_dest, afi, safi); @@ -3838,12 +3454,13 @@ static int update_advertise_vni_routes(struct bgp *bgp, struct bgpevpn *vpn) * attribute. */ attr = pi->attr; - global_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)evp, &vpn->prd); + global_dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + evp, &vpn->prd); assert(global_dest); - update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr, - 1, &global_pi, 0, - mac_mobility_seqnum(attr)); + update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr, 1, + &global_pi, 0, + mac_mobility_seqnum(attr), + false /* setup_sync */, NULL /* old_is_sync */); /* Schedule for processing and unlock node. */ bgp_process(bgp, global_dest, afi, safi); @@ -3875,8 +3492,8 @@ static int delete_withdraw_vni_routes(struct bgp *bgp, struct bgpevpn *vpn) /* Remove type-3 route for this VNI from global table. */ build_evpn_type3_prefix(&p, vpn->originator_ip); - global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - (struct prefix *)&p, &vpn->prd); + global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)&p, &vpn->prd); if (global_dest) { /* Delete route entry in the global EVPN table. */ delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi); @@ -3943,7 +3560,7 @@ static void create_advertise_type3(struct hash_bucket *bucket, void *data) return; build_evpn_type3_prefix(&p, vpn->originator_ip); - if (update_evpn_route(bgp, vpn, &p, 0, 0)) + if (update_evpn_route(bgp, vpn, &p, 0, 0, NULL)) flog_err(EC_BGP_EVPN_ROUTE_CREATE, "Type3 route creation failure for VNI %u", vpn->vni); } @@ -4011,8 +3628,14 @@ static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi, p.prefix.route_type = BGP_EVPN_MAC_IP_ROUTE; /* Copy Ethernet Seg Identifier */ - memcpy(&evpn.eth_s_id.val, pfx, ESI_LEN); - pfx += ESI_LEN; + if (attr) { + memcpy(&attr->esi, pfx, sizeof(esi_t)); + if (bgp_evpn_is_esi_local(&attr->esi)) + attr->es_flags |= ATTR_ES_IS_LOCAL; + else + attr->es_flags &= ~ATTR_ES_IS_LOCAL; + } + pfx += sizeof(esi_t); /* Copy Ethernet Tag */ memcpy(ð_tag, pfx, 4); @@ -4165,68 +3788,6 @@ static int process_type3_route(struct peer *peer, afi_t afi, safi_t safi, } /* - * Process received EVPN type-4 route (advertise or withdraw). - */ -static int process_type4_route(struct peer *peer, afi_t afi, safi_t safi, - struct attr *attr, uint8_t *pfx, int psize, - uint32_t addpath_id) -{ - int ret; - esi_t esi; - uint8_t ipaddr_len; - struct in_addr vtep_ip; - struct prefix_rd prd; - struct prefix_evpn p; - - /* Type-4 route should be either 23 or 35 bytes - * RD (8), ESI (10), ip-len (1), ip (4 or 16) - */ - if (psize != 23 && psize != 35) { - flog_err(EC_BGP_EVPN_ROUTE_INVALID, - "%u:%s - Rx EVPN Type-4 NLRI with invalid length %d", - peer->bgp->vrf_id, peer->host, psize); - return -1; - } - - /* Make prefix_rd */ - prd.family = AF_UNSPEC; - prd.prefixlen = 64; - memcpy(&prd.val, pfx, 8); - pfx += 8; - - /* get the ESI */ - memcpy(&esi, pfx, ESI_BYTES); - pfx += ESI_BYTES; - - - /* Get the IP. */ - ipaddr_len = *pfx++; - if (ipaddr_len == IPV4_MAX_BITLEN) { - memcpy(&vtep_ip, pfx, IPV4_MAX_BYTELEN); - } else { - flog_err( - EC_BGP_EVPN_ROUTE_INVALID, - "%u:%s - Rx EVPN Type-4 NLRI with unsupported IP address length %d", - peer->bgp->vrf_id, peer->host, ipaddr_len); - return -1; - } - - build_evpn_type4_prefix(&p, &esi, vtep_ip); - /* Process the route. */ - if (attr) { - ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr, - afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, - &prd, NULL, 0, 0, NULL); - } else { - ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr, - afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, - &prd, NULL, 0, NULL); - } - return ret; -} - - -/* * Process received EVPN type-5 route (advertise or withdraw). */ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi, @@ -4271,8 +3832,9 @@ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi, memset(&evpn, 0, sizeof(evpn)); /* Fetch ESI */ - memcpy(&evpn.eth_s_id.val, pfx, 10); - pfx += 10; + if (attr) + memcpy(&attr->esi, pfx, sizeof(esi_t)); + pfx += ESI_BYTES; /* Fetch Ethernet Tag. */ memcpy(ð_tag, pfx, 4); @@ -4322,7 +3884,7 @@ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi, if (attr) { is_valid_update = true; - if (is_zero_mac(&attr->rmac) && is_zero_esi(&evpn.eth_s_id) && + if (is_zero_mac(&attr->rmac) && is_zero_gw_ip(&evpn.gw_ip, gw_afi)) is_valid_update = false; @@ -4368,9 +3930,9 @@ static void evpn_mpattr_encode_type5(struct stream *s, const struct prefix *p, stream_putc(s, 8 + 10 + 4 + 1 + len + 3); stream_put(s, prd->val, 8); if (attr) - stream_put(s, &(attr->evpn_overlay.eth_s_id), 10); + stream_put(s, &attr->esi, sizeof(esi_t)); else - stream_put(s, &temp, 10); + stream_put(s, 0, sizeof(esi_t)); stream_putl(s, p_evpn_p->prefix_addr.eth_tag); stream_putc(s, p_evpn_p->prefix_addr.ip_prefix_length); if (IS_IPADDR_V4(&p_evpn_p->prefix_addr.ip)) @@ -5073,6 +4635,15 @@ char *bgp_evpn_route2str(const struct prefix_evpn *p, char *buf, int len) is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN, inet_ntoa(p->prefix.es_addr.ip.ipaddr_v4)); + } else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) { + snprintf(buf, len, "[%d]:[%u]:[%s]:[%d]:[%s]", + p->prefix.route_type, + p->prefix.ead_addr.eth_tag, + esi_to_str(&p->prefix.ead_addr.esi, + buf3, sizeof(buf3)), + is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN + : IPV6_MAX_BITLEN, + inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4)); } else { /* For EVPN route types not supported yet. */ snprintf(buf, len, "(unsupported route type %d)", @@ -5112,7 +4683,7 @@ void bgp_evpn_encode_prefix(struct stream *s, const struct prefix *p, stream_putc(s, len); stream_put(s, prd->val, 8); /* RD */ if (attr) - stream_put(s, &attr->evpn_overlay.eth_s_id, ESI_LEN); + stream_put(s, &attr->esi, ESI_BYTES); else stream_put(s, 0, 10); stream_putl(s, evp->prefix.macip_addr.eth_tag); /* Ethernet Tag ID */ @@ -5147,6 +4718,16 @@ void bgp_evpn_encode_prefix(struct stream *s, const struct prefix *p, stream_put_in_addr(s, &evp->prefix.es_addr.ip.ipaddr_v4); break; + case BGP_EVPN_AD_ROUTE: + /* RD, ESI, EthTag, 1 VNI */ + len = RD_BYTES + ESI_BYTES + EVPN_ETH_TAG_BYTES + BGP_LABEL_BYTES; + stream_putc(s, len); + stream_put(s, prd->val, RD_BYTES); /* RD */ + stream_put(s, evp->prefix.ead_addr.esi.val, ESI_BYTES); /* ESI */ + stream_putl(s, evp->prefix.ead_addr.eth_tag); /* Ethernet Tag */ + stream_put(s, label, BGP_LABEL_BYTES); + break; + case BGP_EVPN_IP_PREFIX_ROUTE: /* TODO: AddPath support. */ evpn_mpattr_encode_type5(s, p, prd, label, num_labels, attr); @@ -5234,7 +4815,7 @@ int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, break; case BGP_EVPN_ES_ROUTE: - if (process_type4_route(peer, afi, safi, + if (bgp_evpn_type4_route_process(peer, afi, safi, withdraw ? NULL : attr, pnt, psize, addpath_id)) { flog_err( @@ -5245,6 +4826,18 @@ int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, } break; + case BGP_EVPN_AD_ROUTE: + if (bgp_evpn_type1_route_process(peer, afi, safi, + withdraw ? NULL : attr, pnt, + psize, addpath_id)) { + flog_err( + EC_BGP_PKT_PROCESS, + "%u:%s - Error in processing EVPN type-1 NLRI size %d", + peer->bgp->vrf_id, peer->host, psize); + return BGP_NLRI_PARSE_ERROR_EVPN_TYPE1_SIZE; + } + break; + case BGP_EVPN_IP_PREFIX_ROUTE: if (process_type5_route(peer, afi, safi, withdraw ? NULL : attr, pnt, @@ -5423,7 +5016,7 @@ void bgp_evpn_derive_auto_rd_for_vrf(struct bgp *bgp) */ void bgp_evpn_derive_auto_rd(struct bgp *bgp, struct bgpevpn *vpn) { - char buf[100]; + char buf[BGP_EVPN_PREFIX_RD_LEN]; vpn->prd.family = AF_UNSPEC; vpn->prd.prefixlen = 64; @@ -5507,6 +5100,8 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, /* add to l2vni list on corresponding vrf */ bgpevpn_link_to_l3vni(vpn); + bgp_evpn_vni_es_init(vpn); + QOBJ_REG(vpn, bgpevpn); return vpn; } @@ -5519,6 +5114,7 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, */ void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn) { + bgp_evpn_vni_es_cleanup(vpn); bgpevpn_unlink_from_l3vni(vpn); bgp_table_unlock(vpn->route_table); bgp_evpn_unmap_vni_from_its_rts(bgp, vpn); @@ -5531,79 +5127,6 @@ void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn) } /* - * Lookup local ES. - */ -struct evpnes *bgp_evpn_lookup_es(struct bgp *bgp, esi_t *esi) -{ - struct evpnes *es; - struct evpnes tmp; - - memset(&tmp, 0, sizeof(struct evpnes)); - memcpy(&tmp.esi, esi, sizeof(esi_t)); - es = hash_lookup(bgp->esihash, &tmp); - return es; -} - -/* - * Create a new local es - invoked upon zebra notification. - */ -struct evpnes *bgp_evpn_es_new(struct bgp *bgp, - esi_t *esi, - struct ipaddr *originator_ip) -{ - char buf[100]; - struct evpnes *es; - - if (!bgp) - return NULL; - - es = XCALLOC(MTYPE_BGP_EVPN_ES, sizeof(struct evpnes)); - - /* set the ESI and originator_ip */ - memcpy(&es->esi, esi, sizeof(esi_t)); - memcpy(&es->originator_ip, originator_ip, sizeof(struct ipaddr)); - - /* Initialise the VTEP list */ - es->vtep_list = list_new(); - es->vtep_list->cmp = evpn_vtep_ip_cmp; - - /* auto derive RD for this es */ - bf_assign_index(bm->rd_idspace, es->rd_id); - es->prd.family = AF_UNSPEC; - es->prd.prefixlen = 64; - snprintf(buf, sizeof(buf), "%s:%hu", inet_ntoa(bgp->router_id), - es->rd_id); - (void)str2prefix_rd(buf, &es->prd); - - /* Initialize the ES route table */ - es->route_table = bgp_table_init(bgp, AFI_L2VPN, SAFI_EVPN); - - /* Add to hash */ - if (!hash_get(bgp->esihash, es, hash_alloc_intern)) { - XFREE(MTYPE_BGP_EVPN_ES, es); - return NULL; - } - - QOBJ_REG(es, evpnes); - return es; -} - -/* - * Free a given ES - - * This just frees appropriate memory, caller should have taken other - * needed actions. - */ -void bgp_evpn_es_free(struct bgp *bgp, struct evpnes *es) -{ - list_delete(&es->vtep_list); - bgp_table_unlock(es->route_table); - bf_release_index(bm->rd_idspace, es->rd_id); - hash_release(bgp->esihash, es); - QOBJ_UNREG(es); - XFREE(MTYPE_BGP_EVPN_ES, es); -} - -/* * Import evpn route from global table to VNI/VRF/ESI. */ int bgp_evpn_import_route(struct bgp *bgp, afi_t afi, safi_t safi, @@ -5724,7 +5247,7 @@ int bgp_evpn_local_macip_del(struct bgp *bgp, vni_t vni, struct ethaddr *mac, * Handle add of a local MACIP. */ int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac, - struct ipaddr *ip, uint8_t flags, uint32_t seq) + struct ipaddr *ip, uint8_t flags, uint32_t seq, esi_t *esi) { struct bgpevpn *vpn; struct prefix_evpn p; @@ -5740,7 +5263,7 @@ int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac, /* Create EVPN type-2 route and schedule for processing. */ build_evpn_type2_prefix(&p, mac, ip); - if (update_evpn_route(bgp, vpn, &p, flags, seq)) { + if (update_evpn_route(bgp, vpn, &p, flags, seq, esi)) { char buf[ETHER_ADDR_STRLEN]; char buf2[INET6_ADDRSTRLEN]; @@ -6112,7 +5635,7 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, if (bgp_evpn_vni_flood_mode_get(bgp, vpn) == VXLAN_FLOOD_HEAD_END_REPL) { build_evpn_type3_prefix(&p, vpn->originator_ip); - if (update_evpn_route(bgp, vpn, &p, 0, 0)) { + if (update_evpn_route(bgp, vpn, &p, 0, 0, NULL)) { flog_err(EC_BGP_EVPN_ROUTE_CREATE, "%u: Type3 route creation failure for VNI %u", bgp->vrf_id, vni); @@ -6137,88 +5660,6 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, } /* - * bgp_evpn_local_es_del - */ -int bgp_evpn_local_es_del(struct bgp *bgp, - esi_t *esi, - struct ipaddr *originator_ip) -{ - char buf[ESI_STR_LEN]; - struct evpnes *es = NULL; - - if (!bgp->esihash) { - flog_err(EC_BGP_ES_CREATE, "%u: ESI hash not yet created", - bgp->vrf_id); - return -1; - } - - /* Lookup ESI hash - should exist. */ - es = bgp_evpn_lookup_es(bgp, esi); - if (!es) { - flog_warn(EC_BGP_EVPN_ESI, - "%u: ESI hash entry for ESI %s at Local ES DEL", - bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); - return -1; - } - - /* Delete all local EVPN ES routes from ESI table - * and schedule for processing (to withdraw from peers)) - */ - delete_routes_for_es(bgp, es); - - /* free the hash entry */ - bgp_evpn_es_free(bgp, es); - - return 0; -} - -/* - * bgp_evpn_local_es_add - */ -int bgp_evpn_local_es_add(struct bgp *bgp, - esi_t *esi, - struct ipaddr *originator_ip) -{ - char buf[ESI_STR_LEN]; - struct evpnes *es = NULL; - struct prefix_evpn p; - - if (!bgp->esihash) { - flog_err(EC_BGP_ES_CREATE, "%u: ESI hash not yet created", - bgp->vrf_id); - return -1; - } - - /* create the new es */ - es = bgp_evpn_lookup_es(bgp, esi); - if (!es) { - es = bgp_evpn_es_new(bgp, esi, originator_ip); - if (!es) { - flog_err( - EC_BGP_ES_CREATE, - "%u: Failed to allocate ES entry for ESI %s - at Local ES Add", - bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); - return -1; - } - } - UNSET_FLAG(es->flags, EVPNES_REMOTE); - SET_FLAG(es->flags, EVPNES_LOCAL); - - build_evpn_type4_prefix(&p, esi, originator_ip->ipaddr_v4); - if (update_evpn_type4_route(bgp, es, &p)) { - flog_err(EC_BGP_EVPN_ROUTE_CREATE, - "%u: Type4 route creation failure for ESI %s", - bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); - return -1; - } - - /* import all remote ES routes in th ES table */ - install_routes_for_es(bgp, es); - - return 0; -} - -/* * Handle change in setting for BUM handling. The supported values * are head-end replication and dropping all BUM packets. Any change * should be registered with zebra. Also, if doing head-end replication, @@ -6267,9 +5708,6 @@ void bgp_evpn_cleanup(struct bgp *bgp) hash_free(bgp->vnihash); bgp->vnihash = NULL; - if (bgp->esihash) - hash_free(bgp->esihash); - bgp->esihash = NULL; list_delete(&bgp->vrf_import_rtl); list_delete(&bgp->vrf_export_rtl); @@ -6286,9 +5724,6 @@ void bgp_evpn_init(struct bgp *bgp) { bgp->vnihash = hash_create(vni_hash_key_make, vni_hash_cmp, "BGP VNI Hash"); - bgp->esihash = - hash_create(esi_hash_keymake, esi_cmp, - "BGP EVPN Local ESI Hash"); bgp->import_rt_hash = hash_create(import_rt_hash_key_make, import_rt_hash_cmp, "BGP Import RT Hash"); diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 267c87ee54..8535f1fa31 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -176,7 +176,7 @@ extern int bgp_evpn_local_macip_del(struct bgp *bgp, vni_t vni, int state); extern int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac, struct ipaddr *ip, - uint8_t flags, uint32_t seq); + uint8_t flags, uint32_t seq, esi_t *esi); extern int bgp_evpn_local_l3vni_add(vni_t vni, vrf_id_t vrf_id, struct ethaddr *rmac, struct ethaddr *vrr_rmac, @@ -188,10 +188,6 @@ extern int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, struct in_addr originator_ip, vrf_id_t tenant_vrf_id, struct in_addr mcast_grp); -extern int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi, - struct ipaddr *originator_ip); -extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi, - struct ipaddr *originator_ip); extern void bgp_evpn_flood_control_change(struct bgp *bgp); extern void bgp_evpn_cleanup_on_disable(struct bgp *bgp); extern void bgp_evpn_cleanup(struct bgp *bgp); diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c new file mode 100644 index 0000000000..eb65c43bb9 --- /dev/null +++ b/bgpd/bgp_evpn_mh.c @@ -0,0 +1,2905 @@ +/* EVPN Multihoming procedures + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRRouting is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRRouting is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <zebra.h> + +#include "command.h" +#include "filter.h" +#include "prefix.h" +#include "log.h" +#include "memory.h" +#include "stream.h" +#include "hash.h" +#include "jhash.h" +#include "zclient.h" + +#include "bgpd/bgp_attr_evpn.h" +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_evpn.h" +#include "bgpd/bgp_evpn_private.h" +#include "bgpd/bgp_evpn_mh.h" +#include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_encap_types.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_errors.h" +#include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_addpath.h" +#include "bgpd/bgp_label.h" + +static void bgp_evpn_local_es_down(struct bgp *bgp, + struct bgp_evpn_es *es); +static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp, + struct bgp_evpn_es *es); +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr); +static void bgp_evpn_es_vtep_del(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr); +static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es); +static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es); +static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi); + +esi_t zero_esi_buf, *zero_esi = &zero_esi_buf; + +/****************************************************************************** + * per-ES (Ethernet Segment) routing table + * + * Following routes are added to the ES's routing table - + * 1. Local and remote ESR (Type-4) + * 2. Local EAD-per-ES (Type-1). + * + * Key for these routes is {ESI, VTEP-IP} so the path selection is practically + * a no-op i.e. all paths lead to same VTEP-IP (i.e. result in the same VTEP + * being added to same ES). + * + * Note the following routes go into the VNI routing table (instead of the + * ES routing table) - + * 1. Remote EAD-per-ES + * 2. Local and remote EAD-per-EVI + */ + +/* Calculate the best path for a multi-homing (Type-1 or Type-4) route + * installed in the ES's routing table. + */ +static int bgp_evpn_es_route_select_install(struct bgp *bgp, + struct bgp_evpn_es *es, + struct bgp_node *rn) +{ + int ret = 0; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct bgp_path_info *old_select; /* old best */ + struct bgp_path_info *new_select; /* new best */ + struct bgp_path_info_pair old_and_new; + + /* Compute the best path. */ + bgp_best_selection(bgp, rn, &bgp->maxpaths[afi][safi], + &old_and_new, afi, safi); + old_select = old_and_new.old; + new_select = old_and_new.new; + + /* + * If the best path hasn't changed - see if something needs to be + * updated + */ + if (old_select && old_select == new_select + && old_select->type == ZEBRA_ROUTE_BGP + && old_select->sub_type == BGP_ROUTE_IMPORTED + && !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR) + && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED) + && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) { + if (bgp_zebra_has_route_changed(old_select)) { + bgp_evpn_es_vtep_add(bgp, es, + old_select->attr->nexthop, + true /*esr*/); + } + UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG); + bgp_zebra_clear_route_change_flags(rn); + return ret; + } + + /* If the user did a "clear" this flag will be set */ + UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR); + + /* bestpath has changed; update relevant fields and install or uninstall + * into the zebra RIB. + */ + if (old_select || new_select) + bgp_bump_version(rn); + + if (old_select) + bgp_path_info_unset_flag(rn, old_select, BGP_PATH_SELECTED); + if (new_select) { + bgp_path_info_set_flag(rn, new_select, BGP_PATH_SELECTED); + bgp_path_info_unset_flag(rn, new_select, BGP_PATH_ATTR_CHANGED); + UNSET_FLAG(new_select->flags, BGP_PATH_MULTIPATH_CHG); + } + + if (new_select && new_select->type == ZEBRA_ROUTE_BGP + && new_select->sub_type == BGP_ROUTE_IMPORTED) { + bgp_evpn_es_vtep_add(bgp, es, + new_select->attr->nexthop, true /*esr */); + } else { + if (old_select && old_select->type == ZEBRA_ROUTE_BGP + && old_select->sub_type == BGP_ROUTE_IMPORTED) + bgp_evpn_es_vtep_del( + bgp, es, old_select->attr->nexthop, + true /*esr*/); + } + + /* Clear any route change flags. */ + bgp_zebra_clear_route_change_flags(rn); + + /* Reap old select bgp_path_info, if it has been removed */ + if (old_select && CHECK_FLAG(old_select->flags, BGP_PATH_REMOVED)) + bgp_path_info_reap(rn, old_select); + + return ret; +} + +/* Install Type-1/Type-4 route entry in the per-ES routing table */ +static int bgp_evpn_es_route_install(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p, + struct bgp_path_info *parent_pi) +{ + int ret = 0; + struct bgp_node *rn = NULL; + struct bgp_path_info *pi = NULL; + struct attr *attr_new = NULL; + + /* Create (or fetch) route within the VNI. + * NOTE: There is no RD here. + */ + rn = bgp_node_get(es->route_table, (struct prefix *)p); + + /* Check if route entry is already present. */ + for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next) + if (pi->extra + && (struct bgp_path_info *)pi->extra->parent == + parent_pi) + break; + + if (!pi) { + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern(parent_pi->attr); + + /* Create new route with its attribute. */ + pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0, + parent_pi->peer, attr_new, rn); + SET_FLAG(pi->flags, BGP_PATH_VALID); + bgp_path_info_extra_get(pi); + pi->extra->parent = bgp_path_info_lock(parent_pi); + bgp_dest_lock_node((struct bgp_node *)parent_pi->net); + bgp_path_info_add(rn, pi); + } else { + if (attrhash_cmp(pi->attr, parent_pi->attr) + && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { + bgp_dest_unlock_node(rn); + return 0; + } + /* The attribute has changed. */ + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern(parent_pi->attr); + + /* Restore route, if needed. */ + if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) + bgp_path_info_restore(rn, pi); + + /* Mark if nexthop has changed. */ + if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop)) + SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED); + + /* Unintern existing, set to new. */ + bgp_attr_unintern(&pi->attr); + pi->attr = attr_new; + pi->uptime = bgp_clock(); + } + + /* Perform route selection and update zebra, if required. */ + ret = bgp_evpn_es_route_select_install(bgp, es, rn); + + bgp_dest_unlock_node(rn); + + return ret; +} + +/* Uninstall Type-1/Type-4 route entry from the ES routing table */ +static int bgp_evpn_es_route_uninstall(struct bgp *bgp, struct bgp_evpn_es *es, + struct prefix_evpn *p, struct bgp_path_info *parent_pi) +{ + int ret; + struct bgp_node *rn; + struct bgp_path_info *pi; + + if (!es->route_table) + return 0; + + /* Locate route within the ESI. + * NOTE: There is no RD here. + */ + rn = bgp_node_lookup(es->route_table, (struct prefix *)p); + if (!rn) + return 0; + + /* Find matching route entry. */ + for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next) + if (pi->extra + && (struct bgp_path_info *)pi->extra->parent == + parent_pi) + break; + + if (!pi) + return 0; + + /* Mark entry for deletion */ + bgp_path_info_delete(rn, pi); + + /* Perform route selection and update zebra, if required. */ + ret = bgp_evpn_es_route_select_install(bgp, es, rn); + + /* Unlock route node. */ + bgp_dest_unlock_node(rn); + + return ret; +} + +/* Install or unistall a Tyoe-4 route in the per-ES routing table */ +int bgp_evpn_es_route_install_uninstall(struct bgp *bgp, struct bgp_evpn_es *es, + afi_t afi, safi_t safi, struct prefix_evpn *evp, + struct bgp_path_info *pi, int install) +{ + int ret = 0; + + if (install) + ret = bgp_evpn_es_route_install(bgp, es, evp, pi); + else + ret = bgp_evpn_es_route_uninstall(bgp, es, evp, pi); + + if (ret) { + flog_err( + EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in ESI %s", + bgp->vrf_id, + install ? "install" : "uninstall", + "ES", es->esi_str); + return ret; + } + return 0; +} + +/* Delete (and withdraw) local routes for specified ES from global and ES table. + * Also remove all remote routes from the per ES table. Invoked when ES + * is deleted. + */ +static void bgp_evpn_es_route_del_all(struct bgp *bgp, struct bgp_evpn_es *es) +{ + struct bgp_node *rn; + struct bgp_path_info *pi, *nextpi; + + /* de-activate the ES */ + bgp_evpn_local_es_down(bgp, es); + bgp_evpn_local_type1_evi_route_del(bgp, es); + + /* Walk this ES's routing table and delete all routes. */ + for (rn = bgp_table_top(es->route_table); rn; + rn = bgp_route_next(rn)) { + for (pi = bgp_dest_get_bgp_path_info(rn); + (pi != NULL) && (nextpi = pi->next, 1); + pi = nextpi) { + bgp_path_info_delete(rn, pi); + bgp_path_info_reap(rn, pi); + } + } +} + +/***************************************************************************** + * Base APIs for creating MH routes (Type-1 or Type-4) on local ethernet + * segment updates. + */ + +/* create or update local EVPN type1/type4 route entry. + * + * This could be in - + * the ES table if ESR/EAD-ES (or) + * the VNI table if EAD-EVI (or) + * the global table if ESR/EAD-ES/EAD-EVI + * + * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and + * ESR). + */ +static int bgp_evpn_mh_route_update(struct bgp *bgp, + struct bgp_evpn_es *es, struct bgpevpn *vpn, afi_t afi, + safi_t safi, struct bgp_node *rn, struct attr *attr, + int add, struct bgp_path_info **ri, int *route_changed) +{ + struct bgp_path_info *tmp_pi = NULL; + struct bgp_path_info *local_pi = NULL; /* local route entry if any */ + struct bgp_path_info *remote_pi = NULL; /* remote route entry if any */ + struct attr *attr_new = NULL; + struct prefix_evpn *evp; + + *ri = NULL; + evp = (struct prefix_evpn *)&rn->p; + *route_changed = 1; + + /* locate the local and remote entries if any */ + for (tmp_pi = bgp_dest_get_bgp_path_info(rn); tmp_pi; + tmp_pi = tmp_pi->next) { + if (tmp_pi->peer == bgp->peer_self + && tmp_pi->type == ZEBRA_ROUTE_BGP + && tmp_pi->sub_type == BGP_ROUTE_STATIC) + local_pi = tmp_pi; + if (tmp_pi->type == ZEBRA_ROUTE_BGP + && tmp_pi->sub_type == BGP_ROUTE_IMPORTED + && CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID)) + remote_pi = tmp_pi; + } + + /* we don't expect to see a remote_ri at this point as + * an ES route has {esi, vtep_ip} as the key in the ES-rt-table + * in the VNI-rt-table. + */ + if (remote_pi) { + flog_err( + EC_BGP_ES_INVALID, + "%u ERROR: local es route for ESI: %s Vtep %s also learnt from remote", + bgp->vrf_id, es->esi_str, + inet_ntoa(es->originator_ip)); + return -1; + } + + if (!local_pi && !add) + return 0; + + /* create or update the entry */ + if (!local_pi) { + + /* Add or update attribute to hash */ + attr_new = bgp_attr_intern(attr); + + /* Create new route with its attribute. */ + tmp_pi = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0, + bgp->peer_self, attr_new, rn); + SET_FLAG(tmp_pi->flags, BGP_PATH_VALID); + + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) { + bgp_path_info_extra_get(tmp_pi); + tmp_pi->extra->num_labels = 1; + if (vpn) + vni2label(vpn->vni, &tmp_pi->extra->label[0]); + else + tmp_pi->extra->label[0] = 0; + } + + /* add the newly created path to the route-node */ + bgp_path_info_add(rn, tmp_pi); + } else { + tmp_pi = local_pi; + if (attrhash_cmp(tmp_pi->attr, attr) + && !CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED)) + *route_changed = 0; + else { + /* The attribute has changed. + * Add (or update) attribute to hash. + */ + attr_new = bgp_attr_intern(attr); + bgp_path_info_set_flag(rn, tmp_pi, + BGP_PATH_ATTR_CHANGED); + + /* Restore route, if needed. */ + if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED)) + bgp_path_info_restore(rn, tmp_pi); + + /* Unintern existing, set to new. */ + bgp_attr_unintern(&tmp_pi->attr); + tmp_pi->attr = attr_new; + tmp_pi->uptime = bgp_clock(); + } + } + + if (*route_changed) { + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("local ES %s vni %u route-type %s nexthop %s updated", + es->esi_str, + vpn ? vpn->vni : 0, + evp->prefix.route_type == + BGP_EVPN_ES_ROUTE ? "esr" : + (vpn ? "ead-evi" : "ead-es"), + inet_ntoa(attr->mp_nexthop_global_in)); + } + + /* Return back the route entry. */ + *ri = tmp_pi; + return 0; +} + +/* Delete local EVPN ESR (type-4) and EAD (type-1) route + * + * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and + * ESR). + */ +static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es, + struct bgpevpn *vpn, struct prefix_evpn *p) +{ + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct bgp_path_info *pi; + struct bgp_node *rn = NULL; /* rn in esi table */ + struct bgp_node *global_rn = NULL; /* rn in global table */ + struct bgp_table *rt_table; + struct prefix_rd *prd; + + if (vpn) { + rt_table = vpn->route_table; + prd = &vpn->prd; + } else { + rt_table = es->route_table; + prd = &es->prd; + } + + /* First, locate the route node within the ESI or VNI. + * If it doesn't exist, ther is nothing to do. + * Note: there is no RD here. + */ + rn = bgp_node_lookup(rt_table, (struct prefix *)p); + if (!rn) + return 0; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("local ES %s vni %u route-type %s nexthop %s delete", + es->esi_str, + vpn ? vpn->vni : 0, + p->prefix.route_type == BGP_EVPN_ES_ROUTE ? + "esr" : (vpn ? "ead-evi" : "ead-es"), + inet_ntoa(es->originator_ip)); + + /* Next, locate route node in the global EVPN routing table. + * Note that this table is a 2-level tree (RD-level + Prefix-level) + */ + global_rn = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)p, prd); + if (global_rn) { + + /* Delete route entry in the global EVPN table. */ + delete_evpn_route_entry(bgp, afi, safi, global_rn, &pi); + + /* Schedule for processing - withdraws to peers happen from + * this table. + */ + if (pi) + bgp_process(bgp, global_rn, afi, safi); + bgp_dest_unlock_node(global_rn); + } + + /* + * Delete route entry in the ESI or VNI routing table. + * This can just be removed. + */ + delete_evpn_route_entry(bgp, afi, safi, rn, &pi); + if (pi) + bgp_path_info_reap(rn, pi); + bgp_dest_unlock_node(rn); + return 0; +} + +/***************************************************************************** + * Ethernet Segment (Type-4) Routes + * ESRs are used for BUM handling. XXX - BUM support is planned for phase-2 i.e. + * this code is just a place holder for now + */ +/* Build extended community for EVPN ES (type-4) route */ +static void bgp_evpn_type4_route_extcomm_build(struct bgp_evpn_es *es, + struct attr *attr) +{ + struct ecommunity ecom_encap; + struct ecommunity ecom_es_rt; + struct ecommunity_val eval; + struct ecommunity_val eval_es_rt; + bgp_encap_types tnl_type; + struct ethaddr mac; + + /* Encap */ + tnl_type = BGP_ENCAP_TYPE_VXLAN; + memset(&ecom_encap, 0, sizeof(ecom_encap)); + encode_encap_extcomm(tnl_type, &eval); + ecom_encap.size = 1; + ecom_encap.val = (uint8_t *)eval.val; + attr->ecommunity = ecommunity_dup(&ecom_encap); + + /* ES import RT */ + memset(&mac, 0, sizeof(struct ethaddr)); + memset(&ecom_es_rt, 0, sizeof(ecom_es_rt)); + es_get_system_mac(&es->esi, &mac); + encode_es_rt_extcomm(&eval_es_rt, &mac); + ecom_es_rt.size = 1; + ecom_es_rt.val = (uint8_t *)eval_es_rt.val; + attr->ecommunity = + ecommunity_merge(attr->ecommunity, &ecom_es_rt); + + attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); +} + +/* Create or update local type-4 route */ +static int bgp_evpn_type4_route_update(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p) +{ + int ret = 0; + int route_changed = 0; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct attr attr; + struct attr *attr_new = NULL; + struct bgp_node *rn = NULL; + struct bgp_path_info *pi = NULL; + + memset(&attr, 0, sizeof(struct attr)); + + /* Build path-attribute for this route. */ + bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); + attr.nexthop = es->originator_ip; + attr.mp_nexthop_global_in = es->originator_ip; + attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + + /* Set up extended community. */ + bgp_evpn_type4_route_extcomm_build(es, &attr); + + /* First, create (or fetch) route node within the ESI. */ + /* NOTE: There is no RD here. */ + rn = bgp_node_get(es->route_table, (struct prefix *)p); + + /* Create or update route entry. */ + ret = bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, + rn, &attr, 1, &pi, &route_changed); + if (ret != 0) { + flog_err(EC_BGP_ES_INVALID, + "%u ERROR: Failed to updated ES route ESI: %s VTEP %s", + bgp->vrf_id, es->esi_str, + inet_ntoa(es->originator_ip)); + } + + assert(pi); + attr_new = pi->attr; + + /* Perform route selection; + * this is just to set the flags correctly + * as local route in the ES always wins. + */ + bgp_evpn_es_route_select_install(bgp, es, rn); + bgp_dest_unlock_node(rn); + + /* If this is a new route or some attribute has changed, export the + * route to the global table. The route will be advertised to peers + * from there. Note that this table is a 2-level tree (RD-level + + * Prefix-level) similar to L3VPN routes. + */ + if (route_changed) { + struct bgp_path_info *global_pi; + + rn = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + p, &es->prd); + bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, + rn, attr_new, 1, &global_pi, &route_changed); + + /* Schedule for processing and unlock node. */ + bgp_process(bgp, rn, afi, safi); + bgp_dest_unlock_node(rn); + } + + /* Unintern temporary. */ + aspath_unintern(&attr.aspath); + return 0; +} + +/* Delete local type-4 route */ +static int bgp_evpn_type4_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p) +{ + return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p); +} + +/* Process remote/received EVPN type-4 route (advertise or withdraw) */ +int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id) +{ + int ret; + esi_t esi; + uint8_t ipaddr_len; + struct in_addr vtep_ip; + struct prefix_rd prd; + struct prefix_evpn p; + + /* Type-4 route should be either 23 or 35 bytes + * RD (8), ESI (10), ip-len (1), ip (4 or 16) + */ + if (psize != BGP_EVPN_TYPE4_V4_PSIZE && + psize != BGP_EVPN_TYPE4_V6_PSIZE) { + flog_err(EC_BGP_EVPN_ROUTE_INVALID, + "%u:%s - Rx EVPN Type-4 NLRI with invalid length %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + + /* Make prefix_rd */ + prd.family = AF_UNSPEC; + prd.prefixlen = 64; + memcpy(&prd.val, pfx, RD_BYTES); + pfx += RD_BYTES; + + /* get the ESI */ + memcpy(&esi, pfx, ESI_BYTES); + pfx += ESI_BYTES; + + + /* Get the IP. */ + ipaddr_len = *pfx++; + if (ipaddr_len == IPV4_MAX_BITLEN) { + memcpy(&vtep_ip, pfx, IPV4_MAX_BYTELEN); + } else { + flog_err( + EC_BGP_EVPN_ROUTE_INVALID, + "%u:%s - Rx EVPN Type-4 NLRI with unsupported IP address length %d", + peer->bgp->vrf_id, peer->host, ipaddr_len); + return -1; + } + + build_evpn_type4_prefix(&p, &esi, vtep_ip); + /* Process the route. */ + if (attr) { + ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr, + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, 0, NULL); + } else { + ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr, + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, NULL); + } + return ret; +} + +/* Check if a prefix belongs to the local ES */ +static bool bgp_evpn_type4_prefix_match(struct prefix_evpn *p, + struct bgp_evpn_es *es) +{ + return (p->prefix.route_type == BGP_EVPN_ES_ROUTE) && + !memcmp(&p->prefix.es_addr.esi, &es->esi, sizeof(esi_t)); +} + +/* Import remote ESRs on local ethernet segment add */ +static int bgp_evpn_type4_remote_routes_import(struct bgp *bgp, + struct bgp_evpn_es *es, bool install) +{ + int ret; + afi_t afi; + safi_t safi; + char buf[PREFIX_STRLEN]; + struct bgp_node *rd_rn, *rn; + struct bgp_table *table; + struct bgp_path_info *pi; + + afi = AFI_L2VPN; + safi = SAFI_EVPN; + + /* Walk entire global routing table and evaluate routes which could be + * imported into this Ethernet Segment. + */ + for (rd_rn = bgp_table_top(bgp->rib[afi][safi]); rd_rn; + rd_rn = bgp_route_next(rd_rn)) { + table = bgp_dest_get_bgp_table_info(rd_rn); + if (!table) + continue; + + for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) { + struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; + + for (pi = bgp_dest_get_bgp_path_info(rn); pi; + pi = pi->next) { + /* + * Consider "valid" remote routes applicable for + * this ES. + */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) + && pi->type == ZEBRA_ROUTE_BGP + && pi->sub_type == BGP_ROUTE_NORMAL)) + continue; + + if (!bgp_evpn_type4_prefix_match(evp, es)) + continue; + + if (install) + ret = bgp_evpn_es_route_install( + bgp, es, evp, pi); + else + ret = bgp_evpn_es_route_uninstall( + bgp, es, evp, pi); + + if (ret) { + flog_err( + EC_BGP_EVPN_FAIL, + "Failed to %s EVPN %s route in ESI %s", + install ? "install" + : "uninstall", + prefix2str(evp, buf, + sizeof(buf)), + es->esi_str); + return ret; + } + } + } + } + return 0; +} + +/***************************************************************************** + * Ethernet Auto Discovery (EAD/Type-1) route handling + * There are two types of EAD routes - + * 1. EAD-per-ES - Key: {ESI, ET=0xffffffff} + * 2. EAD-per-EVI - Key: {ESI, ET=0} + */ + +/* Extended communities associated with EAD-per-ES */ +static void bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es *es, + struct attr *attr) +{ + struct ecommunity ecom_encap; + struct ecommunity ecom_esi_label; + struct ecommunity_val eval; + struct ecommunity_val eval_esi_label; + bgp_encap_types tnl_type; + struct listnode *evi_node, *rt_node; + struct ecommunity *ecom; + struct bgp_evpn_es_evi *es_evi; + + /* Encap */ + tnl_type = BGP_ENCAP_TYPE_VXLAN; + memset(&ecom_encap, 0, sizeof(ecom_encap)); + encode_encap_extcomm(tnl_type, &eval); + ecom_encap.size = 1; + ecom_encap.val = (uint8_t *)eval.val; + attr->ecommunity = ecommunity_dup(&ecom_encap); + + /* ESI label */ + encode_esi_label_extcomm(&eval_esi_label, + false /*single_active*/); + ecom_esi_label.size = 1; + ecom_esi_label.val = (uint8_t *)eval_esi_label.val; + attr->ecommunity = + ecommunity_merge(attr->ecommunity, &ecom_esi_label); + + /* Add export RTs for all L2-VNIs associated with this ES */ + /* XXX - suppress EAD-ES advertisment if there are no EVIs associated + * with it. + */ + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, + evi_node, es_evi)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + continue; + for (ALL_LIST_ELEMENTS_RO(es_evi->vpn->export_rtl, + rt_node, ecom)) + attr->ecommunity = ecommunity_merge(attr->ecommunity, + ecom); + } + + attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); +} + +/* Extended communities associated with EAD-per-EVI */ +static void bgp_evpn_type1_evi_route_extcomm_build(struct bgp_evpn_es *es, + struct bgpevpn *vpn, struct attr *attr) +{ + struct ecommunity ecom_encap; + struct ecommunity_val eval; + bgp_encap_types tnl_type; + struct listnode *rt_node; + struct ecommunity *ecom; + + /* Encap */ + tnl_type = BGP_ENCAP_TYPE_VXLAN; + memset(&ecom_encap, 0, sizeof(ecom_encap)); + encode_encap_extcomm(tnl_type, &eval); + ecom_encap.size = 1; + ecom_encap.val = (uint8_t *)eval.val; + attr->ecommunity = ecommunity_dup(&ecom_encap); + + /* Add export RTs for the L2-VNI */ + for (ALL_LIST_ELEMENTS_RO(vpn->export_rtl, rt_node, ecom)) + attr->ecommunity = ecommunity_merge(attr->ecommunity, ecom); + + attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); +} + +/* Update EVPN EAD (type-1) route - + * vpn - valid for EAD-EVI routes and NULL for EAD-ES routes + */ +static int bgp_evpn_type1_route_update(struct bgp *bgp, + struct bgp_evpn_es *es, struct bgpevpn *vpn, + struct prefix_evpn *p) +{ + int ret = 0; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct attr attr; + struct attr *attr_new = NULL; + struct bgp_node *rn = NULL; + struct bgp_path_info *pi = NULL; + int route_changed = 0; + struct prefix_rd *global_rd; + + memset(&attr, 0, sizeof(struct attr)); + + /* Build path-attribute for this route. */ + bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); + attr.nexthop = es->originator_ip; + attr.mp_nexthop_global_in = es->originator_ip; + attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + + if (vpn) { + /* EAD-EVI route update */ + /* MPLS label */ + vni2label(vpn->vni, &(attr.label)); + + /* Set up extended community */ + bgp_evpn_type1_evi_route_extcomm_build(es, vpn, &attr); + + /* First, create (or fetch) route node within the VNI. */ + rn = bgp_node_get(vpn->route_table, (struct prefix *)p); + + /* Create or update route entry. */ + ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, + rn, &attr, 1, &pi, &route_changed); + if (ret != 0) { + flog_err(EC_BGP_ES_INVALID, + "%u Failed to update EAD-EVI route ESI: %s VNI %u VTEP %s", + bgp->vrf_id, es->esi_str, vpn->vni, + inet_ntoa(es->originator_ip)); + } + global_rd = &vpn->prd; + } else { + /* EAD-ES route update */ + /* MPLS label is 0 for EAD-ES route */ + + /* Set up extended community */ + bgp_evpn_type1_es_route_extcomm_build(es, &attr); + + /* First, create (or fetch) route node within the ES. */ + /* NOTE: There is no RD here. */ + /* XXX: fragment ID must be included as a part of the prefix. */ + rn = bgp_node_get(es->route_table, (struct prefix *)p); + + /* Create or update route entry. */ + ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, + rn, &attr, 1, &pi, &route_changed); + if (ret != 0) { + flog_err(EC_BGP_ES_INVALID, + "%u ERROR: Failed to updated EAD-EVI route ESI: %s VTEP %s", + bgp->vrf_id, es->esi_str, + inet_ntoa(es->originator_ip)); + } + global_rd = &es->prd; + } + + + assert(pi); + attr_new = pi->attr; + + /* Perform route selection; + * this is just to set the flags correctly as local route in + * the ES always wins. + */ + evpn_route_select_install(bgp, vpn, rn); + bgp_dest_unlock_node(rn); + + /* If this is a new route or some attribute has changed, export the + * route to the global table. The route will be advertised to peers + * from there. Note that this table is a 2-level tree (RD-level + + * Prefix-level) similar to L3VPN routes. + */ + if (route_changed) { + struct bgp_path_info *global_pi; + + rn = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + p, global_rd); + bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, + rn, attr_new, 1, &global_pi, &route_changed); + + /* Schedule for processing and unlock node. */ + bgp_process(bgp, rn, afi, safi); + bgp_dest_unlock_node(rn); + } + + /* Unintern temporary. */ + aspath_unintern(&attr.aspath); + return 0; +} + +/* Delete local Type-1 route */ +static int bgp_evpn_type1_es_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p) +{ + return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p); +} + +static int bgp_evpn_type1_evi_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es, struct bgpevpn *vpn, + struct prefix_evpn *p) +{ + return bgp_evpn_mh_route_delete(bgp, es, vpn, p); +} + +/* Generate EAD-EVI for all VNIs */ +static void bgp_evpn_local_type1_evi_route_add(struct bgp *bgp, + struct bgp_evpn_es *es) +{ + struct listnode *evi_node; + struct prefix_evpn p; + struct bgp_evpn_es_evi *es_evi; + + if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) + /* EAD-EVI route add for this ES is already done */ + return; + + SET_FLAG(es->flags, BGP_EVPNES_ADV_EVI); + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + continue; + if (bgp_evpn_type1_route_update(bgp, es, es_evi->vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: Type4 route creation failure for ESI %s", + bgp->vrf_id, es->esi_str); + } +} + +/* + * Withdraw EAD-EVI for all VNIs + */ +static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp, + struct bgp_evpn_es *es) +{ + struct listnode *evi_node; + struct prefix_evpn p; + struct bgp_evpn_es_evi *es_evi; + + /* Delete and withdraw locally learnt EAD-EVI route */ + if (!CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) + /* EAD-EVI route has not been advertised for this ES */ + return; + + UNSET_FLAG(es->flags, BGP_EVPNES_ADV_EVI); + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + continue; + if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: Type4 route creation failure for ESI %s", + bgp->vrf_id, es->esi_str); + } +} + +/* + * Process received EVPN type-1 route (advertise or withdraw). + */ +int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id) +{ + int ret; + struct prefix_rd prd; + esi_t esi; + uint32_t eth_tag; + mpls_label_t label; + struct in_addr vtep_ip; + struct prefix_evpn p; + + if (psize != BGP_EVPN_TYPE1_PSIZE) { + flog_err(EC_BGP_EVPN_ROUTE_INVALID, + "%u:%s - Rx EVPN Type-1 NLRI with invalid length %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + + /* Make prefix_rd */ + prd.family = AF_UNSPEC; + prd.prefixlen = 64; + memcpy(&prd.val, pfx, RD_BYTES); + pfx += RD_BYTES; + + /* get the ESI */ + memcpy(&esi, pfx, ESI_BYTES); + pfx += ESI_BYTES; + + /* Copy Ethernet Tag */ + memcpy(ð_tag, pfx, EVPN_ETH_TAG_BYTES); + eth_tag = ntohl(eth_tag); + pfx += EVPN_ETH_TAG_BYTES; + + memcpy(&label, pfx, BGP_LABEL_BYTES); + + /* EAD route prefix doesn't include the nexthop in the global + * table + */ + vtep_ip.s_addr = 0; + build_evpn_type1_prefix(&p, eth_tag, &esi, vtep_ip); + /* Process the route. */ + if (attr) { + ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr, + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, 0, NULL); + } else { + ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr, + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, NULL); + } + return ret; +} + +/*****************************************************************************/ +/* Ethernet Segment Management + * 1. Ethernet Segment is a collection of links attached to the same + * server (MHD) or switch (MHN) + * 2. An Ethernet Segment can span multiple PEs and is identified by the + * 10-byte ES-ID. + * 3. Local ESs are configured in zebra and sent to BGP + * 4. Remote ESs are created by BGP when one or more ES-EVIs reference it i.e. + * created on first reference and release on last de-reference + * 5. An ES can be both local and remote. Infact most local ESs are expected + * to have an ES peer. + */ + +/* A list of remote VTEPs is maintained for each ES. This list includes - + * 1. VTEPs for which we have imported the ESR i.e. ES-peers + * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI + * have been imported into one or more VNIs + */ +static int bgp_evpn_es_vtep_cmp(void *p1, void *p2) +{ + const struct bgp_evpn_es_vtep *es_vtep1 = p1; + const struct bgp_evpn_es_vtep *es_vtep2 = p2; + + return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr; +} + +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_new(struct bgp_evpn_es *es, + struct in_addr vtep_ip) +{ + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_VTEP, sizeof(*es_vtep)); + + es_vtep->es = es; + es_vtep->vtep_ip.s_addr = vtep_ip.s_addr; + listnode_init(&es_vtep->es_listnode, es_vtep); + listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode); + + return es_vtep; +} + +static void bgp_evpn_es_vtep_free(struct bgp_evpn_es_vtep *es_vtep) +{ + struct bgp_evpn_es *es = es_vtep->es; + + if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR) || + es_vtep->evi_cnt) + /* as long as there is some reference we can't free it */ + return; + + list_delete_node(es->es_vtep_list, &es_vtep->es_listnode); + XFREE(MTYPE_BGP_EVPN_ES_VTEP, es_vtep); +} + +/* check if VTEP is already part of the list */ +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_find(struct bgp_evpn_es *es, + struct in_addr vtep_ip) +{ + struct listnode *node = NULL; + struct bgp_evpn_es_vtep *es_vtep; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr) + return es_vtep; + } + return NULL; +} + +/* Send the remote ES to zebra for NHG programming */ +static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp, + struct bgp_evpn_es_vtep *es_vtep, bool add) +{ + struct bgp_evpn_es *es = es_vtep->es; + struct stream *s; + + /* Check socket. */ + if (!zclient || zclient->sock < 0) + return 0; + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("No zebra instance, not installing remote es %s", + es->esi_str); + return 0; + } + + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + add ? ZEBRA_REMOTE_ES_VTEP_ADD : ZEBRA_REMOTE_ES_VTEP_DEL, + bgp->vrf_id); + stream_put(s, &es->esi, sizeof(esi_t)); + stream_put_ipv4(s, es_vtep->vtep_ip.s_addr); + + stream_putw_at(s, 0, stream_get_endp(s)); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("Tx %s Remote ESI %s VTEP %s", + add ? "ADD" : "DEL", es->esi_str, + inet_ntoa(es_vtep->vtep_ip)); + + return zclient_send_message(zclient); +} + +static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp, + struct bgp_evpn_es_vtep *es_vtep) +{ + bool old_active; + bool new_active; + + old_active = !!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + /* currently we need an active EVI reference to use the VTEP as + * a nexthop. this may change... + */ + if (es_vtep->evi_cnt) + SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + else + UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + + new_active = !!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + + if (old_active == new_active) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vtep %s %s", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), + new_active ? "active" : "inactive"); + + /* send remote ES to zebra */ + bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active); + + /* queue up the es for background consistency checks */ + bgp_evpn_es_cons_checks_pend_add(es_vtep->es); +} + +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr) +{ + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip); + + if (!es_vtep) + es_vtep = bgp_evpn_es_vtep_new(es, vtep_ip); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vtep %s add %s", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), + esr ? "esr" : "ead"); + + if (esr) + SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR); + else + ++es_vtep->evi_cnt; + + bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep); + + return es_vtep; +} + +static void bgp_evpn_es_vtep_do_del(struct bgp *bgp, + struct bgp_evpn_es_vtep *es_vtep, bool esr) +{ + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vtep %s del %s", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), + esr ? "esr" : "ead"); + if (esr) { + UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR); + } else { + if (es_vtep->evi_cnt) + --es_vtep->evi_cnt; + } + + bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep); + bgp_evpn_es_vtep_free(es_vtep); +} + +static void bgp_evpn_es_vtep_del(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr) +{ + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip); + if (es_vtep) + bgp_evpn_es_vtep_do_del(bgp, es_vtep, esr); +} + +/* compare ES-IDs for the global ES RB tree */ +static int bgp_es_rb_cmp(const struct bgp_evpn_es *es1, + const struct bgp_evpn_es *es2) +{ + return memcmp(&es1->esi, &es2->esi, ESI_BYTES); +} +RB_GENERATE(bgp_es_rb_head, bgp_evpn_es, rb_node, bgp_es_rb_cmp); + +struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi) +{ + struct bgp_evpn_es tmp; + + memcpy(&tmp.esi, esi, sizeof(esi_t)); + return RB_FIND(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, &tmp); +} + +static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi) +{ + struct bgp_evpn_es *es; + + if (!bgp) + return NULL; + + es = XCALLOC(MTYPE_BGP_EVPN_ES, sizeof(struct bgp_evpn_es)); + + /* set the ESI */ + memcpy(&es->esi, esi, sizeof(esi_t)); + + /* Initialise the VTEP list */ + es->es_vtep_list = list_new(); + listset_app_node_mem(es->es_vtep_list); + es->es_vtep_list->cmp = bgp_evpn_es_vtep_cmp; + + esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str)); + + /* Initialize the ES routing table */ + es->route_table = bgp_table_init(bgp, AFI_L2VPN, SAFI_EVPN); + + /* Add to rb_tree */ + if (RB_INSERT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es)) { + XFREE(MTYPE_BGP_EVPN_ES, es); + return NULL; + } + + /* Initialise the ES-EVI list */ + es->es_evi_list = list_new(); + listset_app_node_mem(es->es_evi_list); + + QOBJ_REG(es, bgp_evpn_es); + + return es; +} + +/* Free a given ES - + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +static void bgp_evpn_es_free(struct bgp_evpn_es *es) +{ + if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) + return; + + /* cleanup resources maintained against the ES */ + list_delete(&es->es_evi_list); + list_delete(&es->es_vtep_list); + bgp_table_unlock(es->route_table); + + /* remove the entry from various databases */ + RB_REMOVE(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es); + bgp_evpn_es_cons_checks_pend_del(es); + + QOBJ_UNREG(es); + XFREE(MTYPE_BGP_EVPN_ES, es); +} + +/* init local info associated with the ES */ +static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es) +{ + char buf[BGP_EVPN_PREFIX_RD_LEN]; + + if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) + return; + + SET_FLAG(es->flags, BGP_EVPNES_LOCAL); + listnode_init(&es->es_listnode, es); + listnode_add(bgp_mh_info->local_es_list, &es->es_listnode); + + /* auto derive RD for this es */ + bf_assign_index(bm->rd_idspace, es->rd_id); + es->prd.family = AF_UNSPEC; + es->prd.prefixlen = 64; + snprintf(buf, sizeof(buf), "%s:%hu", inet_ntoa(bgp->router_id), + es->rd_id); + (void)str2prefix_rd(buf, &es->prd); +} + +/* clear any local info associated with the ES */ +static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es) +{ + if (!CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) + return; + + UNSET_FLAG(es->flags, BGP_EVPNES_LOCAL); + + /* remove from the ES local list */ + list_delete_node(bgp_mh_info->local_es_list, &es->es_listnode); + + bf_release_index(bm->rd_idspace, es->rd_id); + + bgp_evpn_es_free(es); +} + +/* eval remote info associated with the ES */ +static void bgp_evpn_es_remote_info_re_eval(struct bgp_evpn_es *es) +{ + if (es->remote_es_evi_cnt) { + SET_FLAG(es->flags, BGP_EVPNES_REMOTE); + } else { + if (CHECK_FLAG(es->flags, BGP_EVPNES_REMOTE)) { + UNSET_FLAG(es->flags, BGP_EVPNES_REMOTE); + bgp_evpn_es_free(es); + } + } +} + +/* Process ES link oper-down by withdrawing ES-EAD and ESR */ +static void bgp_evpn_local_es_down(struct bgp *bgp, + struct bgp_evpn_es *es) +{ + struct prefix_evpn p; + int ret; + + if (!CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) + return; + + UNSET_FLAG(es->flags, BGP_EVPNES_OPER_UP); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("local es %s down", es->esi_str); + + /* withdraw ESR */ + /* Delete and withdraw locally learnt ES route */ + build_evpn_type4_prefix(&p, &es->esi, es->originator_ip); + ret = bgp_evpn_type4_route_delete(bgp, es, &p); + if (ret) { + flog_err(EC_BGP_EVPN_ROUTE_DELETE, + "%u failed to delete type-4 route for ESI %s", + bgp->vrf_id, es->esi_str); + } + + /* withdraw EAD-EVI */ + if (!bgp_mh_info->ead_evi_adv_for_down_links) + bgp_evpn_local_type1_evi_route_del(bgp, es); + + /* withdraw EAD-ES */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + ret = bgp_evpn_type1_es_route_delete(bgp, es, &p); + if (ret) { + flog_err(EC_BGP_EVPN_ROUTE_DELETE, + "%u failed to delete type-1 route for ESI %s", + bgp->vrf_id, es->esi_str); + } +} + +/* Process ES link oper-up by generating ES-EAD and ESR */ +static void bgp_evpn_local_es_up(struct bgp *bgp, struct bgp_evpn_es *es) +{ + struct prefix_evpn p; + + if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) + return; + + SET_FLAG(es->flags, BGP_EVPNES_OPER_UP); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("local es %s up", es->esi_str); + + /* generate ESR */ + build_evpn_type4_prefix(&p, &es->esi, es->originator_ip); + if (bgp_evpn_type4_route_update(bgp, es, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: Type4 route creation failure for ESI %s", + bgp->vrf_id, es->esi_str); + + /* generate EAD-EVI */ + bgp_evpn_local_type1_evi_route_add(bgp, es); + + /* generate EAD-ES */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + bgp_evpn_type1_route_update(bgp, es, NULL, &p); +} + +static void bgp_evpn_local_es_do_del(struct bgp *bgp, struct bgp_evpn_es *es) +{ + struct bgp_evpn_es_evi *es_evi; + struct listnode *evi_node, *evi_next_node; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del local es %s", es->esi_str); + + /* Delete all local EVPN ES routes from ESI table + * and schedule for processing (to withdraw from peers)) + */ + bgp_evpn_es_route_del_all(bgp, es); + + /* release all local ES EVIs associated with the ES */ + for (ALL_LIST_ELEMENTS(es->es_evi_list, evi_node, + evi_next_node, es_evi)) { + bgp_evpn_local_es_evi_do_del(es_evi); + } + + /* Clear local info associated with the ES and free it up if there is + * no remote reference + */ + bgp_evpn_es_local_info_clear(es); +} + +bool bgp_evpn_is_esi_local(esi_t *esi) +{ + struct bgp_evpn_es *es = NULL; + + /* Lookup ESI hash - should exist. */ + es = bgp_evpn_es_find(esi); + return es ? !!(es->flags & BGP_EVPNES_LOCAL) : false; +} + +int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi) +{ + struct bgp_evpn_es *es = NULL; + + /* Lookup ESI hash - should exist. */ + es = bgp_evpn_es_find(esi); + if (!es) { + flog_warn(EC_BGP_EVPN_ESI, + "%u: ES %s missing at local ES DEL", + bgp->vrf_id, es->esi_str); + return -1; + } + + bgp_evpn_local_es_do_del(bgp, es); + return 0; +} + +/* Handle device to ES id association. Results in the creation of a local + * ES. + */ +int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi, + struct in_addr originator_ip, bool oper_up) +{ + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + bool new_es = true; + + /* create the new es */ + es = bgp_evpn_es_find(esi); + if (es) { + if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) + new_es = false; + } else { + es = bgp_evpn_es_new(bgp, esi); + if (!es) { + flog_err(EC_BGP_ES_CREATE, + "%u: Failed to allocate ES entry for ESI %s - at Local ES Add", + bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + } + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add local es %s orig-ip %s", + es->esi_str, + inet_ntoa(originator_ip)); + + es->originator_ip = originator_ip; + bgp_evpn_es_local_info_set(bgp, es); + + /* import all remote Type-4 routes in the ES table */ + if (new_es) + bgp_evpn_type4_remote_routes_import(bgp, es, + true /* install */); + + /* create and advertise EAD-EVI routes for the ES - + * XXX - till an ES-EVI reference is created there is really nothing to + * advertise + */ + if (bgp_mh_info->ead_evi_adv_for_down_links) + bgp_evpn_local_type1_evi_route_add(bgp, es); + + /* If the ES link is operationally up generate EAD-ES. EAD-EVI + * can be generated even if the link is inactive. + */ + if (oper_up) + bgp_evpn_local_es_up(bgp, es); + else + bgp_evpn_local_es_down(bgp, es); + + return 0; +} + +static char *bgp_evpn_es_vteps_str(char *vtep_str, struct bgp_evpn_es *es, + uint8_t vtep_str_size) +{ + char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ]; + struct listnode *node; + struct bgp_evpn_es_vtep *es_vtep; + bool first = true; + + vtep_str[0] = '\0'; + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + vtep_flag_str[0] = '\0'; + if (es_vtep->flags & BGP_EVPNES_VTEP_ESR) + strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str)); + if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE) + strlcat(vtep_flag_str, "A", sizeof(vtep_flag_str)); + + if (!strlen(vtep_flag_str)) + strlcat(vtep_flag_str, "-", sizeof(vtep_flag_str)); + if (first) + first = false; + else + strlcat(vtep_str, ",", vtep_str_size); + strlcat(vtep_str, inet_ntoa(es_vtep->vtep_ip), vtep_str_size); + strlcat(vtep_str, "(", vtep_str_size); + strlcat(vtep_str, vtep_flag_str, vtep_str_size); + strlcat(vtep_str, ")", vtep_str_size); + } + + return vtep_str; +} + +static inline void json_array_string_add(json_object *json, const char *str) +{ + json_object_array_add(json, json_object_new_string(str)); +} + +static void bgp_evpn_es_json_vtep_fill(json_object *json_vteps, + struct bgp_evpn_es_vtep *es_vtep) +{ + json_object *json_vtep_entry; + json_object *json_flags; + + json_vtep_entry = json_object_new_object(); + + json_object_string_add(json_vtep_entry, "vtep_ip", + inet_ntoa(es_vtep->vtep_ip)); + if (es_vtep->flags & (BGP_EVPNES_VTEP_ESR | + BGP_EVPNES_VTEP_ACTIVE)) { + json_flags = json_object_new_array(); + if (es_vtep->flags & BGP_EVPNES_VTEP_ESR) + json_array_string_add(json_flags, "esr"); + if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE) + json_array_string_add(json_flags, "active"); + json_object_object_add(json_vtep_entry, "flags", json_flags); + } + + json_object_array_add(json_vteps, + json_vtep_entry); +} + +static void bgp_evpn_es_show_entry(struct vty *vty, + struct bgp_evpn_es *es, json_object *json) +{ + char buf1[RD_ADDRSTRLEN]; + struct listnode *node; + struct bgp_evpn_es_vtep *es_vtep; + + if (json) { + json_object *json_vteps; + json_object *json_types; + + json_object_string_add(json, "esi", es->esi_str); + json_object_string_add(json, "rd", + prefix_rd2str(&es->prd, buf1, + sizeof(buf1))); + + if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) { + json_types = json_object_new_array(); + if (es->flags & BGP_EVPNES_LOCAL) + json_array_string_add(json_types, "local"); + if (es->flags & BGP_EVPNES_REMOTE) + json_array_string_add(json_types, "remote"); + json_object_object_add(json, "type", json_types); + } + + if (listcount(es->es_vtep_list)) { + json_vteps = json_object_new_array(); + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, + node, es_vtep)) { + bgp_evpn_es_json_vtep_fill(json_vteps, es_vtep); + } + json_object_object_add(json, "vteps", json_vteps); + } + json_object_int_add(json, "vniCount", + listcount(es->es_evi_list)); + } else { + char type_str[4]; + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + + type_str[0] = '\0'; + if (es->flags & BGP_EVPNES_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + if (es->flags & BGP_EVPNES_REMOTE) + strlcat(type_str, "R", sizeof(type_str)); + if (es->inconsistencies) + strlcat(type_str, "I", sizeof(type_str)); + + bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str)); + + if (es->flags & BGP_EVPNES_LOCAL) + prefix_rd2str(&es->prd, buf1, sizeof(buf1)); + else + strlcpy(buf1, "-", sizeof(buf1)); + + vty_out(vty, "%-30s %-5s %-21s %-8d %s\n", + es->esi_str, type_str, buf1, + listcount(es->es_evi_list), vtep_str); + } +} + +static void bgp_evpn_es_show_entry_detail(struct vty *vty, + struct bgp_evpn_es *es, json_object *json) +{ + if (json) { + json_object *json_flags; + json_object *json_incons; + + /* Add the "brief" info first */ + bgp_evpn_es_show_entry(vty, es, json); + if (es->flags & (BGP_EVPNES_OPER_UP | BGP_EVPNES_ADV_EVI)) { + json_flags = json_object_new_array(); + if (es->flags & BGP_EVPNES_OPER_UP) + json_array_string_add(json_flags, "up"); + if (es->flags & BGP_EVPNES_ADV_EVI) + json_array_string_add(json_flags, + "advertiseEVI"); + json_object_object_add(json, "flags", json_flags); + } + json_object_string_add(json, "originator_ip", + inet_ntoa(es->originator_ip)); + json_object_int_add(json, "remoteVniCount", + es->remote_es_evi_cnt); + json_object_int_add(json, "inconsistentVniVtepCount", + es->incons_evi_vtep_cnt); + if (es->inconsistencies) { + json_incons = json_object_new_array(); + if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST) + json_array_string_add(json_incons, + "vni-vtep-mismatch"); + json_object_object_add(json, "inconsistencies", + json_incons); + } + } else { + char incons_str[BGP_EVPNES_INCONS_STR_SZ]; + char type_str[4]; + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + char buf1[RD_ADDRSTRLEN]; + + type_str[0] = '\0'; + if (es->flags & BGP_EVPNES_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + if (es->flags & BGP_EVPNES_REMOTE) + strlcat(type_str, "R", sizeof(type_str)); + + bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str)); + if (!strlen(vtep_str)) + strlcpy(buf1, "-", sizeof(buf1)); + + if (es->flags & BGP_EVPNES_LOCAL) + prefix_rd2str(&es->prd, buf1, sizeof(buf1)); + else + strlcpy(buf1, "-", sizeof(buf1)); + + vty_out(vty, "ESI: %s\n", es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " RD: %s\n", buf1); + vty_out(vty, " Originator-IP: %s\n", + inet_ntoa(es->originator_ip)); + vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list)); + vty_out(vty, " Remote VNI Count: %d\n", + es->remote_es_evi_cnt); + vty_out(vty, " Inconsistent VNI VTEP Count: %d\n", + es->incons_evi_vtep_cnt); + if (es->inconsistencies) { + incons_str[0] = '\0'; + if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST) + strlcat(incons_str, "vni-vtep-mismatch", + sizeof(incons_str)); + } else { + strlcpy(incons_str, "-", sizeof(incons_str)); + } + vty_out(vty, " Inconsistencies: %s\n", + incons_str); + vty_out(vty, " VTEPs: %s\n", vtep_str); + vty_out(vty, "\n"); + } +} + +/* Display all ESs */ +void bgp_evpn_es_show(struct vty *vty, bool uj, bool detail) +{ + struct bgp_evpn_es *es; + json_object *json_array = NULL; + json_object *json = NULL; + + if (uj) { + /* create an array of ESs */ + json_array = json_object_new_array(); + } else { + if (!detail) { + vty_out(vty, + "ES Flags: L local, R remote, I inconsistent\n"); + vty_out(vty, + "VTEP Flags: E ESR/Type-4, A active nexthop\n"); + vty_out(vty, + "%-30s %-5s %-21s %-8s %s\n", + "ESI", "Flags", "RD", "#VNIs", "VTEPs"); + } + } + + RB_FOREACH(es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree) { + if (uj) + /* create a separate json object for each ES */ + json = json_object_new_object(); + if (detail) + bgp_evpn_es_show_entry_detail(vty, es, json); + else + bgp_evpn_es_show_entry(vty, es, json); + /* add ES to the json array */ + if (uj) + json_object_array_add(json_array, json); + } + + /* print the array of json-ESs */ + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); + } +} + +/* Display specific ES */ +void bgp_evpn_es_show_esi(struct vty *vty, esi_t *esi, bool uj) +{ + struct bgp_evpn_es *es; + json_object *json = NULL; + + if (uj) + json = json_object_new_object(); + + es = bgp_evpn_es_find(esi); + if (es) { + bgp_evpn_es_show_entry_detail(vty, es, json); + } else { + if (!uj) + vty_out(vty, "ESI not found\n"); + } + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +/*****************************************************************************/ +/* Ethernet Segment to EVI association - + * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI + * (bgpevpn->es_evi_rb_tree). + * 2. Each local ES-EVI entry is rxed from zebra and then used by BGP to + * advertises an EAD-EVI (Type-1 EVPN) route + * 3. The remote ES-EVI is created when a bgp_evpn_es_evi_vtep references + * it. + */ + +/* A list of remote VTEPs is maintained for each ES-EVI. This list includes - + * 1. VTEPs for which we have imported the EAD-per-ES Type1 route + * 2. VTEPs for which we have imported the EAD-per-EVI Type1 route + * VTEPs for which both routes have been rxed are activated. Activation + * creates a NHG in the parent ES. + */ +static int bgp_evpn_es_evi_vtep_cmp(void *p1, void *p2) +{ + const struct bgp_evpn_es_evi_vtep *evi_vtep1 = p1; + const struct bgp_evpn_es_evi_vtep *evi_vtep2 = p2; + + return evi_vtep1->vtep_ip.s_addr - evi_vtep2->vtep_ip.s_addr; +} + +static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_new( + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + + evi_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_EVI_VTEP, sizeof(*evi_vtep)); + + evi_vtep->es_evi = es_evi; + evi_vtep->vtep_ip.s_addr = vtep_ip.s_addr; + listnode_init(&evi_vtep->es_evi_listnode, evi_vtep); + listnode_add_sort(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode); + + return evi_vtep; +} + +static void bgp_evpn_es_evi_vtep_free(struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + struct bgp_evpn_es_evi *es_evi = evi_vtep->es_evi; + + if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD)) + /* as long as there is some reference we can't free it */ + return; + + list_delete_node(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode); + XFREE(MTYPE_BGP_EVPN_ES_EVI_VTEP, evi_vtep); +} + +/* check if VTEP is already part of the list */ +static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_find( + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip) +{ + struct listnode *node = NULL; + struct bgp_evpn_es_evi_vtep *evi_vtep; + + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) { + if (evi_vtep->vtep_ip.s_addr == vtep_ip.s_addr) + return evi_vtep; + } + return NULL; +} + +/* A VTEP can be added as "active" attach to an ES if EAD-per-ES and + * EAD-per-EVI routes are rxed from it. + */ +static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp, + struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + bool old_active; + bool new_active; + + old_active = !!CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + + /* Both EAD-per-ES and EAD-per-EVI routes must be rxed from a PE + * before it can be activated. + */ + if ((evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD) == + BGP_EVPN_EVI_VTEP_EAD) + SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + else + UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + + new_active = !!CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + + if (old_active == new_active) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s evi %u vtep %s %s", + evi_vtep->es_evi->es->esi_str, + evi_vtep->es_evi->vpn->vni, + inet_ntoa(evi_vtep->vtep_ip), + new_active ? "active" : "inactive"); + + /* add VTEP to parent es */ + if (new_active) { + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = bgp_evpn_es_vtep_add(bgp, evi_vtep->es_evi->es, + evi_vtep->vtep_ip, false /*esr*/); + evi_vtep->es_vtep = es_vtep; + } else { + if (evi_vtep->es_vtep) { + bgp_evpn_es_vtep_do_del(bgp, evi_vtep->es_vtep, + false /*esr*/); + evi_vtep->es_vtep = NULL; + } + } + /* queue up the parent es for background consistency checks */ + bgp_evpn_es_cons_checks_pend_add(evi_vtep->es_evi->es); +} + +static void bgp_evpn_es_evi_vtep_add(struct bgp *bgp, + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip, + bool ead_es) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + + evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip); + + if (!evi_vtep) + evi_vtep = bgp_evpn_es_evi_vtep_new(es_evi, vtep_ip); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add es %s evi %u vtep %s %s", + evi_vtep->es_evi->es->esi_str, + evi_vtep->es_evi->vpn->vni, + inet_ntoa(evi_vtep->vtep_ip), + ead_es ? "ead_es" : "ead_evi"); + + if (ead_es) + SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES); + else + SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI); + + bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); +} + +static void bgp_evpn_es_evi_vtep_del(struct bgp *bgp, + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip, + bool ead_es) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + + evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip); + if (!evi_vtep) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del es %s evi %u vtep %s %s", + evi_vtep->es_evi->es->esi_str, + evi_vtep->es_evi->vpn->vni, + inet_ntoa(evi_vtep->vtep_ip), + ead_es ? "ead_es" : "ead_evi"); + + if (ead_es) + UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES); + else + UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI); + + bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); + bgp_evpn_es_evi_vtep_free(evi_vtep); +} + +/* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */ +static int bgp_es_evi_rb_cmp(const struct bgp_evpn_es_evi *es_evi1, + const struct bgp_evpn_es_evi *es_evi2) +{ + return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES); +} +RB_GENERATE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, bgp_es_evi_rb_cmp); + +/* find the ES-EVI in the per-L2-VNI RB tree */ +static struct bgp_evpn_es_evi *bgp_evpn_es_evi_find(struct bgp_evpn_es *es, + struct bgpevpn *vpn) +{ + struct bgp_evpn_es_evi es_evi; + + es_evi.es = es; + + return RB_FIND(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, &es_evi); +} + +/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES + * tables. + */ +static struct bgp_evpn_es_evi *bgp_evpn_es_evi_new(struct bgp_evpn_es *es, + struct bgpevpn *vpn) +{ + struct bgp_evpn_es_evi *es_evi; + + es_evi = XCALLOC(MTYPE_BGP_EVPN_ES_EVI, sizeof(*es_evi)); + + es_evi->es = es; + es_evi->vpn = vpn; + + /* Initialise the VTEP list */ + es_evi->es_evi_vtep_list = list_new(); + listset_app_node_mem(es_evi->es_evi_vtep_list); + es_evi->es_evi_vtep_list->cmp = bgp_evpn_es_evi_vtep_cmp; + + /* insert into the VNI-ESI rb tree */ + if (RB_INSERT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi)) { + XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi); + return NULL; + } + + /* add to the ES's VNI list */ + listnode_init(&es_evi->es_listnode, es_evi); + listnode_add(es->es_evi_list, &es_evi->es_listnode); + + return es_evi; +} + +/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free + * up the memory. + */ +static void bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi) +{ + struct bgp_evpn_es *es = es_evi->es; + struct bgpevpn *vpn = es_evi->vpn; + + /* cannot free the element as long as there is a local or remote + * reference + */ + if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | BGP_EVPNES_EVI_REMOTE)) + return; + + /* remove from the ES's VNI list */ + list_delete_node(es->es_evi_list, &es_evi->es_listnode); + + /* remove from the VNI-ESI rb tree */ + RB_REMOVE(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi); + + /* free the VTEP list */ + list_delete(&es_evi->es_evi_vtep_list); + + /* remove from the VNI-ESI rb tree */ + XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi); +} + +/* init local info associated with the ES-EVI */ +static void bgp_evpn_es_evi_local_info_set(struct bgp_evpn_es_evi *es_evi) +{ + struct bgpevpn *vpn = es_evi->vpn; + + if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + return; + + SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL); + listnode_init(&es_evi->l2vni_listnode, es_evi); + listnode_add(vpn->local_es_evi_list, &es_evi->l2vni_listnode); +} + +/* clear any local info associated with the ES-EVI */ +static void bgp_evpn_es_evi_local_info_clear(struct bgp_evpn_es_evi *es_evi) +{ + struct bgpevpn *vpn = es_evi->vpn; + + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + return; + + UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL); + list_delete_node(vpn->local_es_evi_list, &es_evi->l2vni_listnode); + + bgp_evpn_es_evi_free(es_evi); +} + +/* eval remote info associated with the ES */ +static void bgp_evpn_es_evi_remote_info_re_eval(struct bgp_evpn_es_evi *es_evi) +{ + struct bgp_evpn_es *es = es_evi->es; + + /* if there are remote VTEPs the ES-EVI is classified as "remote" */ + if (listcount(es_evi->es_evi_vtep_list)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) { + SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE); + ++es->remote_es_evi_cnt; + /* set remote on the parent es */ + bgp_evpn_es_remote_info_re_eval(es); + } + } else { + if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) { + UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE); + if (es->remote_es_evi_cnt) + --es->remote_es_evi_cnt; + bgp_evpn_es_evi_free(es_evi); + /* check if "remote" can be cleared from the + * parent es. + */ + bgp_evpn_es_remote_info_re_eval(es); + } + } +} + +static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi) +{ + struct prefix_evpn p; + struct bgp_evpn_es *es = es_evi->es; + struct bgp *bgp; + + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del local es %s evi %u", + es_evi->es->esi_str, + es_evi->vpn->vni); + + bgp = bgp_get_evpn(); + + if (bgp) { + /* update EAD-ES with new list of VNIs */ + if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) { + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + if (bgp_evpn_type1_route_update(bgp, es, NULL, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: EAD-ES route update failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, + es_evi->vpn->vni); + } + + /* withdraw and delete EAD-EVI */ + if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) { + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + if (bgp_evpn_type1_evi_route_delete(bgp, + es, es_evi->vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_DELETE, + "%u: EAD-EVI route deletion failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, + es_evi->vpn->vni); + } + } + + bgp_evpn_es_evi_local_info_clear(es_evi); + +} + +int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni) +{ + struct bgpevpn *vpn; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + char buf[ESI_STR_LEN]; + + es = bgp_evpn_es_find(esi); + if (!es) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to deref VNI %d from ESI %s; ES not present", + bgp->vrf_id, vni, + esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + + vpn = bgp_evpn_lookup_vni(bgp, vni); + if (!vpn) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to deref VNI %d from ESI %s; VNI not present", + bgp->vrf_id, vni, es->esi_str); + return -1; + } + + es_evi = bgp_evpn_es_evi_find(es, vpn); + if (!es_evi) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to deref VNI %d from ESI %s; ES-VNI not present", + bgp->vrf_id, vni, es->esi_str); + return -1; + } + + bgp_evpn_local_es_evi_do_del(es_evi); + return 0; +} + +/* Create ES-EVI and advertise the corresponding EAD routes */ +int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni) +{ + struct bgpevpn *vpn; + struct prefix_evpn p; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + char buf[ESI_STR_LEN]; + + es = bgp_evpn_es_find(esi); + if (!es) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to associate VNI %d with ESI %s; ES not present", + bgp->vrf_id, vni, + esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + + vpn = bgp_evpn_lookup_vni(bgp, vni); + if (!vpn) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to associate VNI %d with ESI %s; VNI not present", + bgp->vrf_id, vni, es->esi_str); + return -1; + } + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add local es %s evi %u", + es->esi_str, vni); + + es_evi = bgp_evpn_es_evi_find(es, vpn); + + if (es_evi) { + if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + /* dup */ + return 0; + } else { + es_evi = bgp_evpn_es_evi_new(es, vpn); + if (!es_evi) + return -1; + } + + bgp_evpn_es_evi_local_info_set(es_evi); + + /* generate an EAD-EVI for this new VNI */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) { + if (bgp_evpn_type1_route_update(bgp, es, vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: EAD-EVI route creation failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, vni); + } + + /* update EAD-ES */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) { + if (bgp_evpn_type1_route_update(bgp, es, NULL, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: EAD-ES route creation failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, vni); + } + + return 0; +} + +/* Add remote ES-EVI entry. This is actually the remote VTEP add and the + * ES-EVI is implicity created on first VTEP's reference. + */ +int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p) +{ + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + bool ead_es; + const esi_t *esi = &p->prefix.ead_addr.esi; + + if (!vpn) + /* local EAD-ES need not be sent back to zebra */ + return 0; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add remote %s es %s evi %u vtep %s", + p->prefix.ead_addr.eth_tag ? + "ead-es" : "ead-evi", + esi_to_str(esi, buf, + sizeof(buf)), + vpn->vni, + inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4)); + + es = bgp_evpn_es_find(esi); + if (!es) { + es = bgp_evpn_es_new(bgp, esi); + if (!es) { + flog_err(EC_BGP_ES_CREATE, + "%u: Failed to allocate ES entry for ESI %s - at remote ES Add", + bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + } + + es_evi = bgp_evpn_es_evi_find(es, vpn); + if (!es_evi) { + es_evi = bgp_evpn_es_evi_new(es, vpn); + if (!es_evi) { + bgp_evpn_es_free(es); + return -1; + } + } + + ead_es = !!p->prefix.ead_addr.eth_tag; + bgp_evpn_es_evi_vtep_add(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4, + ead_es); + + bgp_evpn_es_evi_remote_info_re_eval(es_evi); + return 0; +} + +/* A remote VTEP has withdrawn. The es-evi-vtep will be deleted and the + * parent es-evi freed up implicitly in last VTEP's deref. + */ +int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p) +{ + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + bool ead_es; + + if (!vpn) + /* local EAD-ES need not be sent back to zebra */ + return 0; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del remote %s es %s evi %u vtep %s", + p->prefix.ead_addr.eth_tag ? + "ead-es" : "ead-evi", + esi_to_str(&p->prefix.ead_addr.esi, buf, + sizeof(buf)), + vpn->vni, + inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4)); + + es = bgp_evpn_es_find(&p->prefix.ead_addr.esi); + if (!es) + /* XXX - error logs */ + return 0; + es_evi = bgp_evpn_es_evi_find(es, vpn); + if (!es_evi) + /* XXX - error logs */ + return 0; + + ead_es = !!p->prefix.ead_addr.eth_tag; + bgp_evpn_es_evi_vtep_del(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4, + ead_es); + bgp_evpn_es_evi_remote_info_re_eval(es_evi); + return 0; +} + +/* Initialize the ES tables maintained per-L2_VNI */ +void bgp_evpn_vni_es_init(struct bgpevpn *vpn) +{ + /* Initialize the ES-EVI RB tree */ + RB_INIT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree); + + /* Initialize the local list maintained for quick walks by type */ + vpn->local_es_evi_list = list_new(); + listset_app_node_mem(vpn->local_es_evi_list); +} + +/* Cleanup the ES info maintained per-L2_VNI */ +void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn) +{ + struct bgp_evpn_es_evi *es_evi; + struct bgp_evpn_es_evi *es_evi_next; + + RB_FOREACH_SAFE(es_evi, bgp_es_evi_rb_head, + &vpn->es_evi_rb_tree, es_evi_next) { + bgp_evpn_local_es_evi_do_del(es_evi); + } + + list_delete(&vpn->local_es_evi_list); +} + +static char *bgp_evpn_es_evi_vteps_str(char *vtep_str, + struct bgp_evpn_es_evi *es_evi, + uint8_t vtep_str_size) +{ + char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ]; + struct listnode *node; + struct bgp_evpn_es_evi_vtep *evi_vtep; + bool first = true; + + vtep_str[0] = '\0'; + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) { + vtep_flag_str[0] = '\0'; + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES) + strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str)); + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI) + strlcat(vtep_flag_str, "V", sizeof(vtep_flag_str)); + + if (!strnlen(vtep_flag_str, sizeof(vtep_flag_str))) + strlcpy(vtep_flag_str, "-", sizeof(vtep_flag_str)); + if (first) + first = false; + else + strlcat(vtep_str, ",", vtep_str_size); + strlcat(vtep_str, inet_ntoa(evi_vtep->vtep_ip), vtep_str_size); + strlcat(vtep_str, "(", vtep_str_size); + strlcat(vtep_str, vtep_flag_str, vtep_str_size); + strlcat(vtep_str, ")", vtep_str_size); + } + + return vtep_str; +} + +static void bgp_evpn_es_evi_json_vtep_fill(json_object *json_vteps, + struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + json_object *json_vtep_entry; + json_object *json_flags; + + json_vtep_entry = json_object_new_object(); + + json_object_string_add(json_vtep_entry, + "vtep_ip", + inet_ntoa(evi_vtep->vtep_ip)); + if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD_PER_ES | + BGP_EVPN_EVI_VTEP_EAD_PER_EVI)) { + json_flags = json_object_new_array(); + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES) + json_array_string_add(json_flags, "ead-per-es"); + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI) + json_array_string_add(json_flags, "ed-per-evi"); + json_object_object_add(json_vtep_entry, + "flags", json_flags); + } + + json_object_array_add(json_vteps, + json_vtep_entry); +} + +static void bgp_evpn_es_evi_show_entry(struct vty *vty, + struct bgp_evpn_es_evi *es_evi, json_object *json) +{ + struct listnode *node; + struct bgp_evpn_es_evi_vtep *evi_vtep; + + if (json) { + json_object *json_vteps; + json_object *json_types; + + json_object_string_add(json, "esi", es_evi->es->esi_str); + json_object_int_add(json, "vni", es_evi->vpn->vni); + + if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | + BGP_EVPNES_EVI_REMOTE)) { + json_types = json_object_new_array(); + if (es_evi->flags & BGP_EVPNES_EVI_LOCAL) + json_array_string_add(json_types, "local"); + if (es_evi->flags & BGP_EVPNES_EVI_REMOTE) + json_array_string_add(json_types, "remote"); + json_object_object_add(json, "type", json_types); + } + + if (listcount(es_evi->es_evi_vtep_list)) { + json_vteps = json_object_new_array(); + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, + node, evi_vtep)) { + bgp_evpn_es_evi_json_vtep_fill(json_vteps, + evi_vtep); + } + json_object_object_add(json, "vteps", json_vteps); + } + } else { + char type_str[4]; + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + + type_str[0] = '\0'; + if (es_evi->flags & BGP_EVPNES_EVI_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + if (es_evi->flags & BGP_EVPNES_EVI_REMOTE) + strlcat(type_str, "R", sizeof(type_str)); + if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) + strlcat(type_str, "I", sizeof(type_str)); + + bgp_evpn_es_evi_vteps_str(vtep_str, es_evi, sizeof(vtep_str)); + + vty_out(vty, "%-8d %-30s %-5s %s\n", + es_evi->vpn->vni, es_evi->es->esi_str, + type_str, vtep_str); + } +} + +static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty, + struct bgp_evpn_es_evi *es_evi, json_object *json) +{ + if (json) { + json_object *json_flags; + + /* Add the "brief" info first */ + bgp_evpn_es_evi_show_entry(vty, es_evi, json); + if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) { + json_flags = json_object_new_array(); + json_array_string_add(json_flags, "es-vtep-mismatch"); + json_object_object_add(json, "flags", json_flags); + } + } else { + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + char type_str[4]; + + type_str[0] = '\0'; + if (es_evi->flags & BGP_EVPNES_EVI_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + if (es_evi->flags & BGP_EVPNES_EVI_REMOTE) + strlcat(type_str, "R", sizeof(type_str)); + + bgp_evpn_es_evi_vteps_str(vtep_str, es_evi, sizeof(vtep_str)); + if (!strlen(vtep_str)) + strlcpy(vtep_str, "-", sizeof(type_str)); + + vty_out(vty, "VNI: %d ESI: %s\n", + es_evi->vpn->vni, es_evi->es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " Inconsistencies: %s\n", + (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) ? + "es-vtep-mismatch":"-"); + vty_out(vty, " VTEPs: %s\n", vtep_str); + vty_out(vty, "\n"); + } +} + +static void bgp_evpn_es_evi_show_one_vni(struct bgpevpn *vpn, struct vty *vty, + json_object *json_array, bool detail) +{ + struct bgp_evpn_es_evi *es_evi; + json_object *json = NULL; + + RB_FOREACH(es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree) { + if (json_array) + /* create a separate json object for each ES */ + json = json_object_new_object(); + if (detail) + bgp_evpn_es_evi_show_entry_detail(vty, es_evi, json); + else + bgp_evpn_es_evi_show_entry(vty, es_evi, json); + /* add ES to the json array */ + if (json_array) + json_object_array_add(json_array, json); + } +} + +struct es_evi_show_ctx { + struct vty *vty; + json_object *json; + int detail; +}; + +static void bgp_evpn_es_evi_show_one_vni_hash_cb(struct hash_bucket *bucket, + void *ctxt) +{ + struct bgpevpn *vpn = (struct bgpevpn *)bucket->data; + struct es_evi_show_ctx *wctx = (struct es_evi_show_ctx *)ctxt; + + bgp_evpn_es_evi_show_one_vni(vpn, wctx->vty, wctx->json, wctx->detail); +} + +/* Display all ES EVIs */ +void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail) +{ + json_object *json_array = NULL; + struct es_evi_show_ctx wctx; + struct bgp *bgp; + + if (uj) { + /* create an array of ES-EVIs */ + json_array = json_object_new_array(); + } + + wctx.vty = vty; + wctx.json = json_array; + wctx.detail = detail; + + bgp = bgp_get_evpn(); + + if (!json_array && !detail) { + vty_out(vty, "Flags: L local, R remote, I inconsistent\n"); + vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n"); + vty_out(vty, "%-8s %-30s %-5s %s\n", + "VNI", "ESI", "Flags", "VTEPs"); + } + + if (bgp) + hash_iterate(bgp->vnihash, + (void (*)(struct hash_bucket *, + void *))bgp_evpn_es_evi_show_one_vni_hash_cb, + &wctx); + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); + } +} + +/* Display specific ES EVI */ +void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni, + bool uj, bool detail) +{ + struct bgpevpn *vpn = NULL; + json_object *json_array = NULL; + struct bgp *bgp; + + if (uj) { + /* create an array of ES-EVIs */ + json_array = json_object_new_array(); + } + + bgp = bgp_get_evpn(); + if (bgp) + vpn = bgp_evpn_lookup_vni(bgp, vni); + + if (vpn) { + if (!json_array && !detail) { + vty_out(vty, "Flags: L local, R remote, I inconsistent\n"); + vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n"); + vty_out(vty, "%-8s %-30s %-5s %s\n", + "VNI", "ESI", "Flags", "VTEPs"); + } + + bgp_evpn_es_evi_show_one_vni(vpn, vty, json_array, detail); + } else { + if (!uj) + vty_out(vty, "VNI not found\n"); + } + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); + } +} + +/***************************************************************************** + * Ethernet Segment Consistency checks + * Consistency checking is done to detect misconfig or mis-cabling. When + * an inconsistency is detected it is simply logged (and displayed via + * show commands) at this point. A more drastic action can be executed (based + * on user config) in the future. + */ +/* queue up the es for background consistency checks */ +static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es) +{ + if (!bgp_mh_info->consistency_checking) + /* consistency checking is not enabled */ + return; + + if (CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND)) + /* already queued for consistency checking */ + return; + + SET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND); + listnode_init(&es->pend_es_listnode, es); + listnode_add_after(bgp_mh_info->pend_es_list, + listtail_unchecked(bgp_mh_info->pend_es_list), + &es->pend_es_listnode); +} + +/* pull the ES from the consistency check list */ +static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es) +{ + if (!CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND)) + return; + + UNSET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND); + list_delete_node(bgp_mh_info->pend_es_list, + &es->pend_es_listnode); +} + +/* Number of active VTEPs associated with the ES-per-EVI */ +static uint32_t bgp_evpn_es_evi_get_active_vtep_cnt( + struct bgp_evpn_es_evi *es_evi) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + struct listnode *node; + uint32_t vtep_cnt = 0; + + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) { + if (CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE)) + ++vtep_cnt; + } + + return vtep_cnt; +} + +/* Number of active VTEPs associated with the ES */ +static uint32_t bgp_evpn_es_get_active_vtep_cnt(struct bgp_evpn_es *es) +{ + struct listnode *node; + uint32_t vtep_cnt = 0; + struct bgp_evpn_es_vtep *es_vtep; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE)) + ++vtep_cnt; + } + + return vtep_cnt; +} + +static struct bgp_evpn_es_vtep *bgp_evpn_es_get_next_active_vtep( + struct bgp_evpn_es *es, struct bgp_evpn_es_vtep *es_vtep) +{ + struct listnode *node; + struct bgp_evpn_es_vtep *next_es_vtep; + + if (es_vtep) + node = listnextnode_unchecked(&es_vtep->es_listnode); + else + node = listhead(es->es_vtep_list); + + for (; node; node = listnextnode_unchecked(node)) { + next_es_vtep = listgetdata(node); + if (CHECK_FLAG(next_es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE)) + return next_es_vtep; + } + + return NULL; +} + +static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_get_next_active_vtep( + struct bgp_evpn_es_evi *es_evi, + struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + struct listnode *node; + struct bgp_evpn_es_evi_vtep *next_evi_vtep; + + if (evi_vtep) + node = listnextnode_unchecked(&evi_vtep->es_evi_listnode); + else + node = listhead(es_evi->es_evi_vtep_list); + + for (; node; node = listnextnode_unchecked(node)) { + next_evi_vtep = listgetdata(node); + if (CHECK_FLAG(next_evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE)) + return next_evi_vtep; + } + + return NULL; +} + +static void bgp_evpn_es_evi_set_inconsistent(struct bgp_evpn_es_evi *es_evi) +{ + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST)) { + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("inconsistency detected - es %s evi %u vtep list mismatch", + es_evi->es->esi_str, + es_evi->vpn->vni); + SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST); + + /* update parent ES with the incosistency setting */ + if (!es_evi->es->incons_evi_vtep_cnt && + BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("inconsistency detected - es %s vtep list mismatch", + es_evi->es->esi_str); + ++es_evi->es->incons_evi_vtep_cnt; + SET_FLAG(es_evi->es->inconsistencies, + BGP_EVPNES_INCONS_VTEP_LIST); + } +} + +static uint32_t bgp_evpn_es_run_consistency_checks(struct bgp_evpn_es *es) +{ + int proc_cnt = 0; + int es_active_vtep_cnt; + int evi_active_vtep_cnt; + struct bgp_evpn_es_evi *es_evi; + struct listnode *evi_node; + struct bgp_evpn_es_vtep *es_vtep; + struct bgp_evpn_es_evi_vtep *evi_vtep; + + /* reset the inconsistencies and re-evaluate */ + es->incons_evi_vtep_cnt = 0; + es->inconsistencies = 0; + + es_active_vtep_cnt = bgp_evpn_es_get_active_vtep_cnt(es); + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, + evi_node, es_evi)) { + ++proc_cnt; + + /* reset the inconsistencies on the EVI and re-evaluate*/ + UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST); + + evi_active_vtep_cnt = + bgp_evpn_es_evi_get_active_vtep_cnt(es_evi); + if (es_active_vtep_cnt != evi_active_vtep_cnt) { + bgp_evpn_es_evi_set_inconsistent(es_evi); + continue; + } + + if (!es_active_vtep_cnt) + continue; + + es_vtep = NULL; + evi_vtep = NULL; + while ((es_vtep = bgp_evpn_es_get_next_active_vtep( + es, es_vtep))) { + evi_vtep = bgp_evpn_es_evi_get_next_active_vtep(es_evi, + evi_vtep); + if (!evi_vtep) { + bgp_evpn_es_evi_set_inconsistent(es_evi); + break; + } + if (es_vtep->vtep_ip.s_addr != + evi_vtep->vtep_ip.s_addr) { + /* inconsistency detected; set it and move + * to the next evi + */ + bgp_evpn_es_evi_set_inconsistent(es_evi); + break; + } + } + } + + return proc_cnt; +} + +static int bgp_evpn_run_consistency_checks(struct thread *t) +{ + int proc_cnt = 0; + int es_cnt = 0; + struct listnode *node; + struct listnode *nextnode; + struct bgp_evpn_es *es; + + for (ALL_LIST_ELEMENTS(bgp_mh_info->pend_es_list, + node, nextnode, es)) { + ++es_cnt; + ++proc_cnt; + /* run consistency checks on the ES and remove it from the + * pending list + */ + proc_cnt += bgp_evpn_es_run_consistency_checks(es); + bgp_evpn_es_cons_checks_pend_del(es); + if (proc_cnt > 500) + break; + } + + /* restart the timer */ + thread_add_timer(bm->master, bgp_evpn_run_consistency_checks, NULL, + BGP_EVPN_CONS_CHECK_INTERVAL, + &bgp_mh_info->t_cons_check); + + return 0; +} + +/*****************************************************************************/ +void bgp_evpn_mh_init(void) +{ + bm->mh_info = XCALLOC(MTYPE_BGP_EVPN_MH_INFO, sizeof(*bm->mh_info)); + + /* setup ES tables */ + RB_INIT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree); + /* local ES list */ + bgp_mh_info->local_es_list = list_new(); + listset_app_node_mem(bgp_mh_info->local_es_list); + /* list of ESs with pending processing */ + bgp_mh_info->pend_es_list = list_new(); + listset_app_node_mem(bgp_mh_info->pend_es_list); + + /* config knobs - XXX add cli to control it */ + bgp_mh_info->ead_evi_adv_for_down_links = true; + bgp_mh_info->consistency_checking = true; + + if (bgp_mh_info->consistency_checking) + thread_add_timer(bm->master, bgp_evpn_run_consistency_checks, + NULL, BGP_EVPN_CONS_CHECK_INTERVAL, + &bgp_mh_info->t_cons_check); + + memset(&zero_esi_buf, 0, sizeof(esi_t)); +} + +void bgp_evpn_mh_finish(void) +{ + struct bgp_evpn_es *es; + struct bgp_evpn_es *es_next; + struct bgp *bgp; + + bgp = bgp_get_evpn(); + if (bgp) { + RB_FOREACH_SAFE(es, bgp_es_rb_head, + &bgp_mh_info->es_rb_tree, es_next) { + /* XXX - need to force free remote ESs here */ + bgp_evpn_local_es_do_del(bgp, es); + } + } + thread_cancel(bgp_mh_info->t_cons_check); + list_delete(&bgp_mh_info->local_es_list); + list_delete(&bgp_mh_info->pend_es_list); + + XFREE(MTYPE_BGP_EVPN_MH_INFO, bgp_mh_info); +} diff --git a/bgpd/bgp_evpn_mh.h b/bgpd/bgp_evpn_mh.h new file mode 100644 index 0000000000..93355d495a --- /dev/null +++ b/bgpd/bgp_evpn_mh.h @@ -0,0 +1,299 @@ +/* EVPN header for multihoming procedures + * + * Copyright (C) 2019 Cumulus Networks + * Anuradha Karuppiah + * + * This file is part of FRRouting. + * + * FRRouting is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRRouting is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef _FRR_BGP_EVPN_MH_H +#define _FRR_BGP_EVPN_MH_H + +#include "vxlan.h" +#include "bgpd.h" +#include "bgp_evpn.h" +#include "bgp_evpn_private.h" + +#define BGP_EVPN_AD_ES_ETH_TAG 0xffffffff +#define BGP_EVPN_AD_EVI_ETH_TAG 0 + +#define BGP_EVPNES_INCONS_STR_SZ 80 +#define BGP_EVPN_FLAG_STR_SZ 5 +#define BGP_EVPN_VTEPS_FLAG_STR_SZ (BGP_EVPN_FLAG_STR_SZ * ES_VTEP_MAX_CNT) + +#define BGP_EVPN_CONS_CHECK_INTERVAL 60 + + +/* Ethernet Segment entry - + * - Local and remote ESs are maintained in a global RB tree, + * bgp_mh_info->es_rb_tree using ESI as key + * - Local ESs are received from zebra (BGP_EVPNES_LOCAL) + * - Remotes ESs are implicitly created (by reference) by a remote ES-EVI + * (BGP_EVPNES_REMOTE) + * - An ES can be simulatenously LOCAL and REMOTE; infact all LOCAL ESs are + * expected to have REMOTE ES peers. + */ +struct bgp_evpn_es { + /* Ethernet Segment Identifier */ + esi_t esi; + char esi_str[ESI_STR_LEN]; + + /* es flags */ + uint32_t flags; + /* created via zebra config */ +#define BGP_EVPNES_LOCAL (1 << 0) + /* created implicitly by a remote ES-EVI reference */ +#define BGP_EVPNES_REMOTE (1 << 1) + /* local ES link is oper-up */ +#define BGP_EVPNES_OPER_UP (1 << 2) + /* enable generation of EAD-EVI routes */ +#define BGP_EVPNES_ADV_EVI (1 << 3) + /* consistency checks pending */ +#define BGP_EVPNES_CONS_CHECK_PEND (1 << 4) + + /* memory used for adding the es to bgp->es_rb_tree */ + RB_ENTRY(bgp_evpn_es) rb_node; + + /* [EVPNES_LOCAL] memory used for linking the es to + * bgp_mh_info->local_es_list + */ + struct listnode es_listnode; + + /* memory used for linking the es to "processing" pending list + * bgp_mh_info->pend_es_list + */ + struct listnode pend_es_listnode; + + /* [EVPNES_LOCAL] Id for deriving the RD automatically for this ESI */ + uint16_t rd_id; + + /* [EVPNES_LOCAL] RD for this ES */ + struct prefix_rd prd; + + /* [EVPNES_LOCAL] originator ip address */ + struct in_addr originator_ip; + + /* [EVPNES_LOCAL] Route table for EVPN routes for this ESI- + * - Type-4 local and remote routes + * - Type-1 local routes + */ + struct bgp_table *route_table; + + /* list of PEs (bgp_evpn_es_vtep) attached to the ES */ + struct list *es_vtep_list; + + /* List of ES-EVIs associated with this ES */ + struct list *es_evi_list; + + /* Number of remote VNIs referencing this ES */ + uint32_t remote_es_evi_cnt; + + uint32_t inconsistencies; + /* there are one or more EVIs whose VTEP list doesn't match + * with the ES's VTEP list + */ +#define BGP_EVPNES_INCONS_VTEP_LIST (1 << 0) + + /* number of es-evi entries whose VTEP list doesn't match + * with the ES's + */ + uint32_t incons_evi_vtep_cnt; + + QOBJ_FIELDS +}; +DECLARE_QOBJ_TYPE(bgp_evpn_es) +RB_HEAD(bgp_es_rb_head, bgp_evpn_es); +RB_PROTOTYPE(bgp_es_rb_head, bgp_evpn_es, rb_node, bgp_es_rb_cmp); + +/* PE attached to an ES */ +struct bgp_evpn_es_vtep { + struct bgp_evpn_es *es; /* parent ES */ + struct in_addr vtep_ip; + + uint32_t flags; + /* Rxed a Type4 route from this PE */ +#define BGP_EVPNES_VTEP_ESR (1 << 0) + /* Active (rxed EAD-ES and EAD-EVI) and can be included as + * a nexthop + */ +#define BGP_EVPNES_VTEP_ACTIVE (1 << 1) + + uint32_t evi_cnt; /* es_evis referencing this vtep as an active path */ + + /* memory used for adding the entry to es->es_vtep_list */ + struct listnode es_listnode; +}; + +/* ES per-EVI info + * - ES-EVIs are maintained per-L2-VNI (vpn->es_evi_rb_tree) + * - ES-EVIs are also linked to the parent ES (es->es_evi_list) + * - Local ES-EVIs are created by zebra (via config). They are linked to a + * per-VNI list (vpn->local_es_evi_list) for quick access + * - Remote ES-EVIs are created implicitly when a bgp_evpn_es_evi_vtep + * references it. + */ +struct bgp_evpn_es_evi { + struct bgp_evpn_es *es; + struct bgpevpn *vpn; + + /* ES-EVI flags */ + uint32_t flags; +/* local ES-EVI, created by zebra */ +#define BGP_EVPNES_EVI_LOCAL (1 << 0) +/* created via a remote VTEP imported by BGP */ +#define BGP_EVPNES_EVI_REMOTE (1 << 1) +#define BGP_EVPNES_EVI_INCONS_VTEP_LIST (1 << 2) + + /* memory used for adding the es_evi to es_evi->vpn->es_evi_rb_tree */ + RB_ENTRY(bgp_evpn_es_evi) rb_node; + /* memory used for linking the es_evi to + * es_evi->vpn->local_es_evi_list + */ + struct listnode l2vni_listnode; + /* memory used for linking the es_evi to + * es_evi->es->es_evi_list + */ + struct listnode es_listnode; + + /* list of PEs (bgp_evpn_es_evi_vtep) attached to the ES for this VNI */ + struct list *es_evi_vtep_list; +}; + +/* PE attached to an ES for a VNI. This entry is created when an EAD-per-ES + * or EAD-per-EVI Type1 route is imported into the VNI. + */ +struct bgp_evpn_es_evi_vtep { + struct bgp_evpn_es_evi *es_evi; /* parent ES-EVI */ + struct in_addr vtep_ip; + + uint32_t flags; + /* Rxed an EAD-per-ES route from the PE */ +#define BGP_EVPN_EVI_VTEP_EAD_PER_ES (1 << 0) /* rxed EAD-per-ES */ + /* Rxed an EAD-per-EVI route from the PE */ +#define BGP_EVPN_EVI_VTEP_EAD_PER_EVI (1 << 1) /* rxed EAD-per-EVI */ + /* VTEP is active i.e. will result in the creation of an es-vtep */ +#define BGP_EVPN_EVI_VTEP_ACTIVE (1 << 2) +#define BGP_EVPN_EVI_VTEP_EAD (BGP_EVPN_EVI_VTEP_EAD_PER_ES |\ + BGP_EVPN_EVI_VTEP_EAD_PER_EVI) + + /* memory used for adding the entry to es_evi->es_evi_vtep_list */ + struct listnode es_evi_listnode; + struct bgp_evpn_es_vtep *es_vtep; +}; + +/* multihoming information stored in bgp_master */ +#define bgp_mh_info (bm->mh_info) +struct bgp_evpn_mh_info { + /* RB tree of Ethernet segments (used for EVPN-MH) */ + struct bgp_es_rb_head es_rb_tree; + /* List of local ESs */ + struct list *local_es_list; + /* List of ESs with pending/periodic processing */ + struct list *pend_es_list; + /* periodic timer for running background consistency checks */ + struct thread *t_cons_check; + + /* config knobs for optimizing or interop */ + /* Generate EAD-EVI routes even if the ES is oper-down. This can be + * enabled as an optimization to avoid a storm of updates when an ES + * link flaps. + */ + bool ead_evi_adv_for_down_links; + /* Enable ES consistency checking */ + bool consistency_checking; +}; + +/****************************************************************************/ +static inline int bgp_evpn_is_es_local(struct bgp_evpn_es *es) +{ + return CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL) ? 1 : 0; +} + +extern esi_t *zero_esi; +static inline bool bgp_evpn_is_esi_valid(esi_t *esi) +{ + return !!memcmp(esi, zero_esi, sizeof(esi_t)); +} + +static inline esi_t *bgp_evpn_attr_get_esi(struct attr *attr) +{ + return attr ? &attr->esi : zero_esi; +} + +static inline bool bgp_evpn_attr_is_sync(struct attr *attr) +{ + return attr ? !!(attr->es_flags & + (ATTR_ES_PEER_PROXY | ATTR_ES_PEER_ACTIVE)) : false; +} + +static inline uint32_t bgp_evpn_attr_get_sync_seq(struct attr *attr) +{ + return attr ? attr->mm_sync_seqnum : 0; +} + +static inline bool bgp_evpn_attr_is_active_on_peer(struct attr *attr) +{ + return attr ? + !!(attr->es_flags & ATTR_ES_PEER_ACTIVE) : false; +} + +static inline bool bgp_evpn_attr_is_router_on_peer(struct attr *attr) +{ + return attr ? + !!(attr->es_flags & ATTR_ES_PEER_ROUTER) : false; +} + +static inline bool bgp_evpn_attr_is_proxy(struct attr *attr) +{ + return attr ? !!(attr->es_flags & ATTR_ES_PROXY_ADVERT) : false; +} + +static inline bool bgp_evpn_attr_is_local_es(struct attr *attr) +{ + return attr ? !!(attr->es_flags & ATTR_ES_IS_LOCAL) : false; +} + +/****************************************************************************/ +extern int bgp_evpn_es_route_install_uninstall(struct bgp *bgp, + struct bgp_evpn_es *es, afi_t afi, safi_t safi, + struct prefix_evpn *evp, struct bgp_path_info *pi, + int install); +int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id); +int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id); +extern int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi, + struct in_addr originator_ip, bool oper_up); +extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi); +extern int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni); +extern int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni); +extern int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p); +extern int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p); +extern void bgp_evpn_mh_init(void); +extern void bgp_evpn_mh_finish(void); +void bgp_evpn_vni_es_init(struct bgpevpn *vpn); +void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn); +void bgp_evpn_es_show_esi(struct vty *vty, esi_t *esi, bool uj); +void bgp_evpn_es_show(struct vty *vty, bool uj, bool detail); +void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni, + bool uj, bool detail); +void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail); +struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi); +extern bool bgp_evpn_is_esi_local(esi_t *esi); + +#endif /* _FRR_BGP_EVPN_MH_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index c7ccf69f05..ca45b198a7 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -34,15 +34,23 @@ * in bits */ #define EVPN_ROUTE_PREFIXLEN (sizeof(struct evpn_addr) * 8) -/* EVPN route types. */ -typedef enum { - BGP_EVPN_AD_ROUTE = 1, /* Ethernet Auto-Discovery (A-D) route */ - BGP_EVPN_MAC_IP_ROUTE, /* MAC/IP Advertisement route */ - BGP_EVPN_IMET_ROUTE, /* Inclusive Multicast Ethernet Tag route */ - BGP_EVPN_ES_ROUTE, /* Ethernet Segment route */ - BGP_EVPN_IP_PREFIX_ROUTE, /* IP Prefix route */ -} bgp_evpn_route_type; +/* EVPN route RD buffer length */ +#define BGP_EVPN_PREFIX_RD_LEN 100 +/* packet sizes for EVPN routes */ +/* Type-1 route should be 25 bytes + * RD (8), ESI (10), eth-tag (4), vni (3) + */ +#define BGP_EVPN_TYPE1_PSIZE 25 +/* Type-4 route should be either 23 or 35 bytes + * RD (8), ESI (10), ip-len (1), ip (4 or 16) + */ +#define BGP_EVPN_TYPE4_V4_PSIZE 23 +#define BGP_EVPN_TYPE4_V6_PSIZE 34 + +RB_HEAD(bgp_es_evi_rb_head, bgp_evpn_es_evi); +RB_PROTOTYPE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, + bgp_es_evi_rb_cmp); /* * Hash table of EVIs. Right now, the only type of EVI supported is with * VxLAN encapsulation, hence each EVI corresponds to a L2 VNI. @@ -98,46 +106,16 @@ struct bgpevpn { * this VNI. */ struct bgp_table *route_table; - QOBJ_FIELDS -}; - -DECLARE_QOBJ_TYPE(bgpevpn) + /* RB tree of ES-EVIs */ + struct bgp_es_evi_rb_head es_evi_rb_tree; -struct evpnes { - - /* Ethernet Segment Identifier */ - esi_t esi; - - /* es flags */ - uint16_t flags; -#define EVPNES_LOCAL 0x01 -#define EVPNES_REMOTE 0x02 - - /* - * Id for deriving the RD - * automatically for this ESI - */ - uint16_t rd_id; - - /* RD for this VNI. */ - struct prefix_rd prd; - - /* originator ip address */ - struct ipaddr originator_ip; - - /* list of VTEPs in the same site */ - struct list *vtep_list; - - /* - * Route table for EVPN routes for - * this ESI. - type4 routes - */ - struct bgp_table *route_table; + /* List of local ESs */ + struct list *local_es_evi_list; QOBJ_FIELDS }; -DECLARE_QOBJ_TYPE(evpnes) +DECLARE_QOBJ_TYPE(bgpevpn) /* Mapping of Import RT to VNIs. * The Import RTs of all VNIs are maintained in a hash table with each @@ -330,6 +308,16 @@ static inline void encode_es_rt_extcomm(struct ecommunity_val *eval, memcpy(&eval->val[2], mac, ETH_ALEN); } +static inline void encode_esi_label_extcomm(struct ecommunity_val *eval, + bool single_active) +{ + memset(eval, 0, sizeof(struct ecommunity_val)); + eval->val[0] = ECOMMUNITY_ENCODE_EVPN; + eval->val[1] = ECOMMUNITY_EVPN_SUBTYPE_ESI_LABEL; + if (single_active) + eval->val[2] |= (1 << 0); +} + static inline void encode_rmac_extcomm(struct ecommunity_val *eval, struct ethaddr *rmac) { @@ -361,13 +349,15 @@ static inline void encode_mac_mobility_extcomm(int static_mac, uint32_t seq, } static inline void encode_na_flag_extcomm(struct ecommunity_val *eval, - uint8_t na_flag) + uint8_t na_flag, bool proxy) { memset(eval, 0, sizeof(*eval)); eval->val[0] = ECOMMUNITY_ENCODE_EVPN; eval->val[1] = ECOMMUNITY_EVPN_SUBTYPE_ND; if (na_flag) eval->val[2] |= ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG; + if (proxy) + eval->val[2] |= ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG; } static inline void ip_prefix_from_type5_prefix(const struct prefix_evpn *evp, @@ -487,6 +477,44 @@ static inline void build_evpn_type4_prefix(struct prefix_evpn *p, memcpy(&p->prefix.es_addr.esi, esi, sizeof(esi_t)); } +static inline void build_evpn_type1_prefix(struct prefix_evpn *p, + uint32_t eth_tag, + esi_t *esi, + struct in_addr originator_ip) +{ + memset(p, 0, sizeof(struct prefix_evpn)); + p->family = AF_EVPN; + p->prefixlen = EVPN_ROUTE_PREFIXLEN; + p->prefix.route_type = BGP_EVPN_AD_ROUTE; + p->prefix.ead_addr.eth_tag = eth_tag; + p->prefix.ead_addr.ip.ipa_type = IPADDR_V4; + p->prefix.ead_addr.ip.ipaddr_v4 = originator_ip; + memcpy(&p->prefix.ead_addr.esi, esi, sizeof(esi_t)); +} + +static inline void evpn_type1_prefix_global_copy(struct prefix_evpn *global_p, + const struct prefix_evpn *vni_p) +{ + memcpy(global_p, vni_p, sizeof(*global_p)); + global_p->prefix.ead_addr.ip.ipa_type = 0; + global_p->prefix.ead_addr.ip.ipaddr_v4.s_addr = 0; +} + +/* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ +static inline struct prefix_evpn *evpn_type1_prefix_vni_copy( + struct prefix_evpn *vni_p, + const struct prefix_evpn *global_p, + struct in_addr originator_ip) +{ + memcpy(vni_p, global_p, sizeof(*vni_p)); + vni_p->prefix.ead_addr.ip.ipa_type = IPADDR_V4; + vni_p->prefix.ead_addr.ip.ipaddr_v4 = originator_ip; + + return vni_p; +} + static inline int evpn_default_originate_set(struct bgp *bgp, afi_t afi, safi_t safi) { @@ -511,11 +539,6 @@ static inline void es_get_system_mac(esi_t *esi, memcpy(mac, &esi->val[1], ETH_ALEN); } -static inline int is_es_local(struct evpnes *es) -{ - return CHECK_FLAG(es->flags, EVPNES_LOCAL) ? 1 : 0; -} - static inline bool bgp_evpn_is_svi_macip_enabled(struct bgpevpn *vpn) { struct bgp *bgp_evpn = NULL; @@ -526,6 +549,16 @@ static inline bool bgp_evpn_is_svi_macip_enabled(struct bgpevpn *vpn) vpn->advertise_svi_macip); } +static inline bool bgp_evpn_is_path_local(struct bgp *bgp, + struct bgp_path_info *pi) +{ + return (pi->peer == bgp->peer_self + && pi->type == ZEBRA_ROUTE_BGP + && pi->sub_type == BGP_ROUTE_STATIC); +} + +extern struct zclient *zclient; + extern void bgp_evpn_install_uninstall_default_route(struct bgp *bgp_vrf, afi_t afi, safi_t safi, bool add); @@ -563,10 +596,18 @@ extern struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, vrf_id_t tenant_vrf_id, struct in_addr mcast_grp); extern void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn); -extern struct evpnes *bgp_evpn_lookup_es(struct bgp *bgp, esi_t *esi); -extern struct evpnes *bgp_evpn_es_new(struct bgp *bgp, esi_t *esi, - struct ipaddr *originator_ip); -extern void bgp_evpn_es_free(struct bgp *bgp, struct evpnes *es); extern bool bgp_evpn_lookup_l3vni_l2vni_table(vni_t vni); extern int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn); +extern void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi, + struct bgp_dest *dest, + struct bgp_path_info **pi); +int vni_list_cmp(void *p1, void *p2); +extern int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, + struct bgp_node *rn); +extern struct bgp_node *bgp_global_evpn_node_get( + struct bgp_table *table, afi_t afi, safi_t safi, + const struct prefix_evpn *evp, struct prefix_rd *prd); +extern struct bgp_node *bgp_global_evpn_node_lookup( + struct bgp_table *table, afi_t afi, safi_t safi, + const struct prefix_evpn *evp, struct prefix_rd *prd); #endif /* _BGP_EVPN_PRIVATE_H */ diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c index 2584939378..3a198b20f6 100644 --- a/bgpd/bgp_evpn_vty.c +++ b/bgpd/bgp_evpn_vty.c @@ -33,6 +33,7 @@ #include "bgpd/bgp_evpn_vty.h" #include "bgpd/bgp_evpn.h" #include "bgpd/bgp_evpn_private.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_zebra.h" #include "bgpd/bgp_vty.h" #include "bgpd/bgp_errors.h" @@ -348,6 +349,8 @@ static void bgp_evpn_show_route_header(struct vty *vty, struct bgp *bgp, "Status codes: s suppressed, d damped, h history, * valid, > best, i - internal\n"); vty_out(vty, "Origin codes: i - IGP, e - EGP, ? - incomplete\n"); vty_out(vty, + "EVPN type-1 prefix: [1]:[ESI]:[EthTag]:[IPlen]:[VTEP-IP]\n"); + vty_out(vty, "EVPN type-2 prefix: [2]:[EthTag]:[MAClen]:[MAC]:[IPlen]:[IP]\n"); vty_out(vty, "EVPN type-3 prefix: [3]:[EthTag]:[IPlen]:[OrigIP]\n"); vty_out(vty, "EVPN type-4 prefix: [4]:[ESI]:[IPlen]:[OrigIP]\n"); @@ -461,47 +464,6 @@ static void display_l3vni(struct vty *vty, struct bgp *bgp_vrf, json_object_object_add(json, "exportRts", json_export_rtl); } -static void display_es(struct vty *vty, struct evpnes *es, json_object *json) -{ - struct in_addr *vtep; - char buf[ESI_STR_LEN]; - char buf1[RD_ADDRSTRLEN]; - char buf2[INET6_ADDRSTRLEN]; - struct listnode *node = NULL; - json_object *json_vteps = NULL; - - if (json) { - json_vteps = json_object_new_array(); - json_object_string_add(json, "esi", - esi_to_str(&es->esi, buf, sizeof(buf))); - json_object_string_add(json, "rd", - prefix_rd2str(&es->prd, buf1, - sizeof(buf1))); - json_object_string_add( - json, "originatorIp", - ipaddr2str(&es->originator_ip, buf2, sizeof(buf2))); - if (es->vtep_list) { - for (ALL_LIST_ELEMENTS_RO(es->vtep_list, node, vtep)) - json_object_array_add( - json_vteps, json_object_new_string( - inet_ntoa(*vtep))); - } - json_object_object_add(json, "vteps", json_vteps); - } else { - vty_out(vty, "ESI: %s\n", - esi_to_str(&es->esi, buf, sizeof(buf))); - vty_out(vty, " RD: %s\n", prefix_rd2str(&es->prd, buf1, - sizeof(buf1))); - vty_out(vty, " Originator-IP: %s\n", - ipaddr2str(&es->originator_ip, buf2, sizeof(buf2))); - if (es->vtep_list) { - vty_out(vty, " VTEP List:\n"); - for (ALL_LIST_ELEMENTS_RO(es->vtep_list, node, vtep)) - vty_out(vty, " %s\n", inet_ntoa(*vtep)); - } - } -} - static void display_vni(struct vty *vty, struct bgpevpn *vpn, json_object *json) { char buf1[RD_ADDRSTRLEN]; @@ -628,7 +590,7 @@ static void display_vni(struct vty *vty, struct bgpevpn *vpn, json_object *json) } static void show_esi_routes(struct bgp *bgp, - struct evpnes *es, + struct bgp_evpn_es *es, struct vty *vty, json_object *json) { @@ -979,48 +941,6 @@ static void show_l3vni_entry(struct vty *vty, struct bgp *bgp, } } -static void show_es_entry(struct hash_bucket *bucket, void *args[]) -{ - char buf[ESI_STR_LEN]; - char buf1[RD_ADDRSTRLEN]; - char buf2[INET6_ADDRSTRLEN]; - struct in_addr *vtep = NULL; - struct vty *vty = args[0]; - json_object *json = args[1]; - json_object *json_vteps = NULL; - struct listnode *node = NULL; - struct evpnes *es = (struct evpnes *)bucket->data; - - if (json) { - json_vteps = json_object_new_array(); - json_object_string_add(json, "esi", - esi_to_str(&es->esi, buf, sizeof(buf))); - json_object_string_add(json, "type", - is_es_local(es) ? "Local" : "Remote"); - json_object_string_add(json, "rd", - prefix_rd2str(&es->prd, buf1, - sizeof(buf1))); - json_object_string_add( - json, "originatorIp", - ipaddr2str(&es->originator_ip, buf2, sizeof(buf2))); - if (es->vtep_list) { - for (ALL_LIST_ELEMENTS_RO(es->vtep_list, node, vtep)) - json_object_array_add(json_vteps, - json_object_new_string( - inet_ntoa(*vtep))); - } - json_object_object_add(json, "vteps", json_vteps); - } else { - vty_out(vty, "%-30s %-6s %-21s %-15s %-6d\n", - esi_to_str(&es->esi, buf, sizeof(buf)), - is_es_local(es) ? "Local" : "Remote", - prefix_rd2str(&es->prd, buf1, sizeof(buf1)), - ipaddr2str(&es->originator_ip, buf2, - sizeof(buf2)), - es->vtep_list ? listcount(es->vtep_list) : 0); - } -} - static void show_vni_entry(struct hash_bucket *bucket, void *args[]) { struct vty *vty; @@ -2454,10 +2374,10 @@ static void evpn_show_route_vni_macip(struct vty *vty, struct bgp *bgp, static void evpn_show_routes_esi(struct vty *vty, struct bgp *bgp, esi_t *esi, json_object *json) { - struct evpnes *es = NULL; + struct bgp_evpn_es *es = NULL; /* locate the ES */ - es = bgp_evpn_lookup_es(bgp, esi); + es = bgp_evpn_es_find(esi); if (!es) { if (!json) vty_out(vty, "ESI not found\n"); @@ -2863,43 +2783,6 @@ static void evpn_show_all_routes(struct vty *vty, struct bgp *bgp, int type, } } -/* Display specific ES */ -static void evpn_show_es(struct vty *vty, struct bgp *bgp, esi_t *esi, - json_object *json) -{ - struct evpnes *es = NULL; - - es = bgp_evpn_lookup_es(bgp, esi); - if (es) { - display_es(vty, es, json); - } else { - if (json) { - vty_out(vty, "{}\n"); - } else { - vty_out(vty, "ESI not found\n"); - return; - } - } -} - -/* Display all ESs */ -static void evpn_show_all_es(struct vty *vty, struct bgp *bgp, - json_object *json) -{ - void *args[2]; - - if (!json) - vty_out(vty, "%-30s %-6s %-21s %-15s %-6s\n", - "ESI", "Type", "RD", "Originator-IP", "#VTEPs"); - - /* print all ESs */ - args[0] = vty; - args[1] = json; - hash_iterate(bgp->esihash, - (void (*)(struct hash_bucket *, void *))show_es_entry, - args); -} - /* * Display specified VNI (vty handler) */ @@ -4022,55 +3905,50 @@ DEFUN(show_bgp_l2vpn_evpn_vni, return CMD_SUCCESS; } -/* Disaply ES */ -DEFUN(show_bgp_l2vpn_evpn_es, +DEFPY(show_bgp_l2vpn_evpn_es_evi, + show_bgp_l2vpn_evpn_es_evi_cmd, + "show bgp l2vpn evpn es-evi [vni (1-16777215)$vni] [json$uj] [detail$detail]", + SHOW_STR + BGP_STR + L2VPN_HELP_STR + EVPN_HELP_STR + "ES per EVI\n" + "VxLAN Network Identifier\n" + "VNI\n" + JSON_STR + "Detailed information\n") +{ + if (vni) + bgp_evpn_es_evi_show_vni(vty, vni, !!uj, !!detail); + else + bgp_evpn_es_evi_show(vty, !!uj, !!detail); + + return CMD_SUCCESS; +} + +DEFPY(show_bgp_l2vpn_evpn_es, show_bgp_l2vpn_evpn_es_cmd, - "show bgp l2vpn evpn es [ESI] [json]", + "show bgp l2vpn evpn es [NAME$esi_str|detail$detail] [json$uj]", SHOW_STR BGP_STR L2VPN_HELP_STR EVPN_HELP_STR - "ethernet-Segment\n" - "Ethernet-Segment Identifier\n" + "Ethernet Segment\n" + "ES ID\n" + "Detailed information\n" JSON_STR) { - int idx = 0; - bool uj = false; esi_t esi; - json_object *json = NULL; - struct bgp *bgp = NULL; - - memset(&esi, 0, sizeof(esi)); - uj = use_json(argc, argv); - bgp = bgp_get_evpn(); - if (!bgp) - return CMD_WARNING; - - if (!argv_find(argv, argc, "evpn", &idx)) - return CMD_WARNING; - - if ((uj && argc == ((idx + 1) + 2)) || - (!uj && argc == (idx + 1) + 1)) { - - /* show all ESs */ - evpn_show_all_es(vty, bgp, json); - } else { - - /* show a specific ES */ - - /* get the ESI - ESI-ID is at argv[5] */ - if (!str_to_esi(argv[idx + 2]->arg, &esi)) { - vty_out(vty, "%% Malformed ESI\n"); + if (esi_str) { + if (!str_to_esi(esi_str, &esi)) { + vty_out(vty, "%%Malformed ESI\n"); return CMD_WARNING; } - evpn_show_es(vty, bgp, &esi, json); - } + bgp_evpn_es_show_esi(vty, &esi, uj); + } else { - if (uj) { - vty_out(vty, "%s\n", json_object_to_json_string_ext( - json, JSON_C_TO_STRING_PRETTY)); - json_object_free(json); + bgp_evpn_es_show(vty, uj, !!detail); } return CMD_SUCCESS; @@ -4115,7 +3993,7 @@ DEFUN(show_bgp_l2vpn_evpn_summary, */ DEFUN(show_bgp_l2vpn_evpn_route, show_bgp_l2vpn_evpn_route_cmd, - "show bgp l2vpn evpn route [detail] [type <macip|2|multicast|3|es|4|prefix|5>] [json]", + "show bgp l2vpn evpn route [detail] [type <ead|1|macip|2|multicast|3|es|4|prefix|5>] [json]", SHOW_STR BGP_STR L2VPN_HELP_STR @@ -4123,6 +4001,7 @@ DEFUN(show_bgp_l2vpn_evpn_route, "EVPN route information\n" "Display Detailed Information\n" "Specify Route type\n" + "EAD (Type-1) route\n" "MAC-IP (Type-2) route\n" "MAC-IP (Type-2) route\n" "Multicast (Type-3) route\n" @@ -4158,9 +4037,12 @@ DEFUN(show_bgp_l2vpn_evpn_route, else if ((strncmp(argv[type_idx + 1]->arg, "mu", 2) == 0) || (strmatch(argv[type_idx + 1]->arg, "3"))) type = BGP_EVPN_IMET_ROUTE; - else if ((strncmp(argv[type_idx + 1]->arg, "e", 1) == 0) + else if ((strncmp(argv[type_idx + 1]->arg, "es", 2) == 0) || (strmatch(argv[type_idx + 1]->arg, "4"))) type = BGP_EVPN_ES_ROUTE; + else if ((strncmp(argv[type_idx + 1]->arg, "ea", 2) == 0) + || (strmatch(argv[type_idx + 1]->arg, "1"))) + type = BGP_EVPN_AD_ROUTE; else if ((strncmp(argv[type_idx + 1]->arg, "p", 1) == 0) || (strmatch(argv[type_idx + 1]->arg, "5"))) type = BGP_EVPN_IP_PREFIX_ROUTE; @@ -4186,7 +4068,7 @@ DEFUN(show_bgp_l2vpn_evpn_route, */ DEFUN(show_bgp_l2vpn_evpn_route_rd, show_bgp_l2vpn_evpn_route_rd_cmd, - "show bgp l2vpn evpn route rd ASN:NN_OR_IP-ADDRESS:NN [type <macip|multicast|es|prefix>] [json]", + "show bgp l2vpn evpn route rd ASN:NN_OR_IP-ADDRESS:NN [type <ead|macip|multicast|es|prefix>] [json]", SHOW_STR BGP_STR L2VPN_HELP_STR @@ -4195,6 +4077,7 @@ DEFUN(show_bgp_l2vpn_evpn_route_rd, "Route Distinguisher\n" "ASN:XX or A.B.C.D:XX\n" "Specify Route type\n" + "EAD (Type-1) route\n" "MAC-IP (Type-2) route\n" "Multicast (Type-3) route\n" "Ethernet Segment route\n" @@ -4236,6 +4119,10 @@ DEFUN(show_bgp_l2vpn_evpn_route_rd, type = BGP_EVPN_MAC_IP_ROUTE; else if (strncmp(argv[type_idx + 1]->arg, "mu", 2) == 0) type = BGP_EVPN_IMET_ROUTE; + else if (strncmp(argv[type_idx + 1]->arg, "es", 2) == 0) + type = BGP_EVPN_ES_ROUTE; + else if (strncmp(argv[type_idx + 1]->arg, "ea", 2) == 0) + type = BGP_EVPN_AD_ROUTE; else if (strncmp(argv[type_idx + 1]->arg, "pr", 2) == 0) type = BGP_EVPN_IP_PREFIX_ROUTE; else @@ -4380,7 +4267,7 @@ DEFUN(show_bgp_l2vpn_evpn_route_esi, * Display per-VNI EVPN routing table. */ DEFUN(show_bgp_l2vpn_evpn_route_vni, show_bgp_l2vpn_evpn_route_vni_cmd, - "show bgp l2vpn evpn route vni " CMD_VNI_RANGE " [<type <macip|multicast> | vtep A.B.C.D>] [json]", + "show bgp l2vpn evpn route vni " CMD_VNI_RANGE " [<type <ead|macip|multicast> | vtep A.B.C.D>] [json]", SHOW_STR BGP_STR L2VPN_HELP_STR @@ -4389,6 +4276,7 @@ DEFUN(show_bgp_l2vpn_evpn_route_vni, show_bgp_l2vpn_evpn_route_vni_cmd, "VXLAN Network Identifier\n" "VNI number\n" "Specify Route type\n" + "EAD (Type-1) route\n" "MAC-IP (Type-2) route\n" "Multicast (Type-3) route\n" "Remote VTEP\n" @@ -4426,6 +4314,8 @@ DEFUN(show_bgp_l2vpn_evpn_route_vni, show_bgp_l2vpn_evpn_route_vni_cmd, type = BGP_EVPN_MAC_IP_ROUTE; else if (strncmp(argv[idx + 5]->arg, "mu", 2) == 0) type = BGP_EVPN_IMET_ROUTE; + else if (strncmp(argv[idx + 5]->arg, "ea", 2) == 0) + type = BGP_EVPN_AD_ROUTE; else return CMD_WARNING; } else if (strncmp(argv[idx + 4]->arg, "vtep", 4) == 0) { @@ -4711,17 +4601,22 @@ DEFUN(show_bgp_l2vpn_evpn_import_rt, return CMD_SUCCESS; } -DEFUN(test_adv_evpn_type4_route, - test_adv_evpn_type4_route_cmd, - "advertise es ESI", - "Advertise EVPN ES route\n" +DEFPY(test_es_add, + test_es_add_cmd, + "[no$no] test es NAME$esi_str [state NAME$state_str]", + NO_STR + "Test\n" "Ethernet-segment\n" - "Ethernet-Segment Identifier\n") + "Ethernet-Segment Identifier\n" + "ES link state\n" + "up|down\n" +) { int ret = 0; esi_t esi; struct bgp *bgp; - struct ipaddr vtep_ip; + struct in_addr vtep_ip; + bool oper_up; bgp = bgp_get_evpn(); if (!bgp) { @@ -4729,33 +4624,47 @@ DEFUN(test_adv_evpn_type4_route, return CMD_WARNING; } - if (!str_to_esi(argv[2]->arg, &esi)) { + if (!str_to_esi(esi_str, &esi)) { vty_out(vty, "%%Malformed ESI\n"); return CMD_WARNING; } - vtep_ip.ipa_type = IPADDR_V4; - vtep_ip.ipaddr_v4 = bgp->router_id; + if (no) { + ret = bgp_evpn_local_es_del(bgp, &esi); + if (ret == -1) { + vty_out(vty, "%%Failed to delete ES\n"); + return CMD_WARNING; + } + } else { + if (state_str && !strcmp(state_str, "up")) + oper_up = true; + else + oper_up = false; + vtep_ip = bgp->router_id; - ret = bgp_evpn_local_es_add(bgp, &esi, &vtep_ip); - if (ret == -1) { - vty_out(vty, "%%Failed to EVPN advertise type-4 route\n"); - return CMD_WARNING; + ret = bgp_evpn_local_es_add(bgp, &esi, vtep_ip, oper_up); + if (ret == -1) { + vty_out(vty, "%%Failed to add ES\n"); + return CMD_WARNING; + } } return CMD_SUCCESS; } -DEFUN(test_withdraw_evpn_type4_route, - test_withdraw_evpn_type4_route_cmd, - "withdraw es ESI", - "Advertise EVPN ES route\n" +DEFPY(test_es_vni_add, + test_es_vni_add_cmd, + "[no$no] test es NAME$esi_str vni (1-16777215)$vni", + NO_STR + "Test\n" "Ethernet-segment\n" - "Ethernet-Segment Identifier\n") + "Ethernet-Segment Identifier\n" + "VNI\n" + "1-16777215\n" +) { int ret = 0; esi_t esi; struct bgp *bgp; - struct ipaddr vtep_ip; bgp = bgp_get_evpn(); if (!bgp) { @@ -4763,22 +4672,23 @@ DEFUN(test_withdraw_evpn_type4_route, return CMD_WARNING; } - if (!bgp->peer_self) { - vty_out(vty, "%%BGP instance doesn't have self peer\n"); - return CMD_WARNING; - } - - if (!str_to_esi(argv[2]->arg, &esi)) { + if (!str_to_esi(esi_str, &esi)) { vty_out(vty, "%%Malformed ESI\n"); return CMD_WARNING; } - vtep_ip.ipa_type = IPADDR_V4; - vtep_ip.ipaddr_v4 = bgp->router_id; - ret = bgp_evpn_local_es_del(bgp, &esi, &vtep_ip); - if (ret == -1) { - vty_out(vty, "%%Failed to withdraw EVPN type-4 route\n"); - return CMD_WARNING; + if (no) { + ret = bgp_evpn_local_es_evi_del(bgp, &esi, vni); + if (ret == -1) { + vty_out(vty, "%%Failed to deref ES VNI\n"); + return CMD_WARNING; + } + } else { + ret = bgp_evpn_local_es_evi_add(bgp, &esi, vni); + if (ret == -1) { + vty_out(vty, "%%Failed to ref ES VNI\n"); + return CMD_WARNING; + } } return CMD_SUCCESS; } @@ -5836,11 +5746,12 @@ void bgp_ethernetvpn_init(void) install_element(BGP_EVPN_NODE, &bgp_evpn_advertise_pip_ip_mac_cmd); /* test commands */ - install_element(BGP_EVPN_NODE, &test_adv_evpn_type4_route_cmd); - install_element(BGP_EVPN_NODE, &test_withdraw_evpn_type4_route_cmd); + install_element(BGP_EVPN_NODE, &test_es_add_cmd); + install_element(BGP_EVPN_NODE, &test_es_vni_add_cmd); /* "show bgp l2vpn evpn" commands. */ install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_es_cmd); + install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_es_evi_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_vni_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_summary_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_route_cmd); diff --git a/bgpd/bgp_memory.c b/bgpd/bgp_memory.c index 41c4108c0a..8bdab16680 100644 --- a/bgpd/bgp_memory.c +++ b/bgpd/bgp_memory.c @@ -116,8 +116,11 @@ DEFINE_MTYPE(BGPD, LCOMMUNITY_STR, "Large Community display string") DEFINE_MTYPE(BGPD, LCOMMUNITY_VAL, "Large Community value") DEFINE_MTYPE(BGPD, BGP_EVPN, "BGP EVPN Information") -DEFINE_MTYPE(BGPD, BGP_EVPN_ES_VTEP, "BGP EVPN ES VTEP Ip") +DEFINE_MTYPE(BGPD, BGP_EVPN_MH_INFO, "BGP EVPN Multihoming Information") +DEFINE_MTYPE(BGPD, BGP_EVPN_ES_VTEP, "BGP EVPN ES VTEP") +DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI_VTEP, "BGP EVPN ES-EVI VTEP") DEFINE_MTYPE(BGPD, BGP_EVPN_ES, "BGP EVPN ESI Information") +DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI, "BGP EVPN ES-per-EVI Information") DEFINE_MTYPE(BGPD, BGP_EVPN_IMPORT_RT, "BGP EVPN Import RT") DEFINE_MTYPE(BGPD, BGP_EVPN_VRF_IMPORT_RT, "BGP EVPN VRF Import RT") DEFINE_MTYPE(BGPD, BGP_EVPN_MACIP, "BGP EVPN MAC IP") diff --git a/bgpd/bgp_memory.h b/bgpd/bgp_memory.h index 5428022551..d1ae392c65 100644 --- a/bgpd/bgp_memory.h +++ b/bgpd/bgp_memory.h @@ -111,8 +111,11 @@ DECLARE_MTYPE(LCOMMUNITY) DECLARE_MTYPE(LCOMMUNITY_STR) DECLARE_MTYPE(LCOMMUNITY_VAL) +DECLARE_MTYPE(BGP_EVPN_MH_INFO) DECLARE_MTYPE(BGP_EVPN_ES) +DECLARE_MTYPE(BGP_EVPN_ES_EVI) DECLARE_MTYPE(BGP_EVPN_ES_VTEP) +DECLARE_MTYPE(BGP_EVPN_ES_EVI_VTEP) DECLARE_MTYPE(BGP_EVPN) DECLARE_MTYPE(BGP_EVPN_IMPORT_RT) diff --git a/bgpd/bgp_rd.h b/bgpd/bgp_rd.h index b5ad9d624d..2aee44c721 100644 --- a/bgpd/bgp_rd.h +++ b/bgpd/bgp_rd.h @@ -33,6 +33,7 @@ #endif #define RD_ADDRSTRLEN 28 +#define RD_BYTES 8 struct rd_as { uint16_t type; diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 6b2a5f55b7..80ffa18424 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -79,6 +79,7 @@ #include "bgpd/bgp_encap_types.h" #include "bgpd/bgp_encap_tlv.h" #include "bgpd/bgp_evpn.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_evpn_vty.h" #include "bgpd/bgp_flowspec.h" #include "bgpd/bgp_flowspec_util.h" @@ -544,6 +545,11 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, uint32_t new_mm_seq; uint32_t exist_mm_seq; int nh_cmp; + esi_t *exist_esi; + esi_t *new_esi; + bool same_esi; + bool old_proxy; + bool new_proxy; *paths_eq = 0; @@ -620,6 +626,47 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, } } + new_esi = bgp_evpn_attr_get_esi(newattr); + exist_esi = bgp_evpn_attr_get_esi(existattr); + if (bgp_evpn_is_esi_valid(new_esi) && + !memcmp(new_esi, exist_esi, sizeof(esi_t))) { + same_esi = true; + } else { + same_esi = false; + } + + /* If both paths have the same non-zero ES and + * one path is local it wins. + * PS: Note the local path wins even if the remote + * has the higher MM seq. The local path's + * MM seq will be fixed up to match the highest + * rem seq, subsequently. + */ + if (same_esi) { + char esi_buf[ESI_STR_LEN]; + + if (bgp_evpn_is_path_local(bgp, new)) { + *reason = bgp_path_selection_evpn_local_path; + if (debug) + zlog_debug( + "%s: %s wins over %s as ES %s is same and local", + pfx_buf, new_buf, exist_buf, + esi_to_str(new_esi, esi_buf, + sizeof(esi_buf))); + return 1; + } + if (bgp_evpn_is_path_local(bgp, exist)) { + *reason = bgp_path_selection_evpn_local_path; + if (debug) + zlog_debug( + "%s: %s loses to %s as ES %s is same and local", + pfx_buf, new_buf, exist_buf, + esi_to_str(new_esi, esi_buf, + sizeof(esi_buf))); + return 0; + } + } + new_mm_seq = mac_mobility_seqnum(newattr); exist_mm_seq = mac_mobility_seqnum(existattr); @@ -643,6 +690,30 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, return 0; } + /* if the sequence numbers and ESI are the same and one path + * is non-proxy it wins (over proxy) + */ + new_proxy = bgp_evpn_attr_is_proxy(newattr); + old_proxy = bgp_evpn_attr_is_proxy(existattr); + if (same_esi && bgp_evpn_attr_is_local_es(newattr) && + old_proxy != new_proxy) { + if (!new_proxy) { + *reason = bgp_path_selection_evpn_non_proxy; + if (debug) + zlog_debug( + "%s: %s wins over %s, same seq/es and non-proxy", + pfx_buf, new_buf, exist_buf); + return 1; + } + + *reason = bgp_path_selection_evpn_non_proxy; + if (debug) + zlog_debug( + "%s: %s loses to %s, same seq/es and non-proxy", + pfx_buf, new_buf, exist_buf); + return 0; + } + /* * if sequence numbers are the same path with the lowest IP * wins @@ -1175,6 +1246,17 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, return 1; } + +int bgp_evpn_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, + struct bgp_path_info *exist, int *paths_eq) +{ + enum bgp_path_selection_reason reason; + char pfx_buf[PREFIX2STR_BUFFER]; + + return bgp_path_info_cmp(bgp, new, exist, paths_eq, NULL, 0, pfx_buf, + AFI_L2VPN, SAFI_EVPN, &reason); +} + /* Compare two bgp route entity. Return -1 if new is preferred, 1 if exist * is preferred, or 0 if they are the same (usually will only occur if * multipath is enabled @@ -3172,19 +3254,10 @@ struct bgp_path_info *info_make(int type, int sub_type, unsigned short instance, } static void overlay_index_update(struct attr *attr, - struct eth_segment_id *eth_s_id, union gw_addr *gw_ip) { if (!attr) return; - - if (eth_s_id == NULL) { - memset(&(attr->evpn_overlay.eth_s_id), 0, - sizeof(struct eth_segment_id)); - } else { - memcpy(&(attr->evpn_overlay.eth_s_id), eth_s_id, - sizeof(struct eth_segment_id)); - } if (gw_ip == NULL) { memset(&(attr->evpn_overlay.gw_ip), 0, sizeof(union gw_addr)); } else { @@ -3194,20 +3267,17 @@ static void overlay_index_update(struct attr *attr, } static bool overlay_index_equal(afi_t afi, struct bgp_path_info *path, - struct eth_segment_id *eth_s_id, union gw_addr *gw_ip) { - struct eth_segment_id *path_eth_s_id, *path_eth_s_id_remote; union gw_addr *path_gw_ip, *path_gw_ip_remote; union { - struct eth_segment_id esi; + esi_t esi; union gw_addr ip; } temp; if (afi != AFI_L2VPN) return true; - path_eth_s_id = &(path->attr->evpn_overlay.eth_s_id); path_gw_ip = &(path->attr->evpn_overlay.gw_ip); if (gw_ip == NULL) { @@ -3216,17 +3286,7 @@ static bool overlay_index_equal(afi_t afi, struct bgp_path_info *path, } else path_gw_ip_remote = gw_ip; - if (eth_s_id == NULL) { - memset(&temp, 0, sizeof(temp)); - path_eth_s_id_remote = &temp.esi; - } else - path_eth_s_id_remote = eth_s_id; - - if (!memcmp(path_gw_ip, path_gw_ip_remote, sizeof(union gw_addr))) - return false; - - return !memcmp(path_eth_s_id, path_eth_s_id_remote, - sizeof(struct eth_segment_id)); + return !!memcmp(path_gw_ip, path_gw_ip_remote, sizeof(union gw_addr)); } /* Check if received nexthop is valid or not. */ @@ -3521,7 +3581,7 @@ int bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, num_labels * sizeof(mpls_label_t)) == 0) && (overlay_index_equal( - afi, pi, evpn == NULL ? NULL : &evpn->eth_s_id, + afi, pi, evpn == NULL ? NULL : &evpn->gw_ip))) { if (CHECK_FLAG(bgp->af_flags[afi][safi], BGP_CONFIG_DAMPENING) @@ -3746,7 +3806,7 @@ int bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, /* Update Overlay Index */ if (afi == AFI_L2VPN) { overlay_index_update( - pi->attr, evpn == NULL ? NULL : &evpn->eth_s_id, + pi->attr, evpn == NULL ? NULL : &evpn->gw_ip); } @@ -3912,7 +3972,6 @@ int bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, /* Update Overlay Index */ if (afi == AFI_L2VPN) { overlay_index_update(new->attr, - evpn == NULL ? NULL : &evpn->eth_s_id, evpn == NULL ? NULL : &evpn->gw_ip); } /* Nexthop reachability check. */ @@ -5301,7 +5360,7 @@ static void bgp_static_update_safi(struct bgp *bgp, const struct prefix *p, else if (bgp_static->gatewayIp.family == AF_INET6) memcpy(&(add.ipv6), &(bgp_static->gatewayIp.u.prefix6), sizeof(struct in6_addr)); - overlay_index_update(&attr, bgp_static->eth_s_id, &add); + memcpy(&attr.esi, bgp_static->eth_s_id, sizeof(esi_t)); if (bgp_static->encap_tunneltype == BGP_ENCAP_TYPE_VXLAN) { struct bgp_encap_type_vxlan bet; memset(&bet, 0, sizeof(struct bgp_encap_type_vxlan)); @@ -5352,7 +5411,7 @@ static void bgp_static_update_safi(struct bgp *bgp, const struct prefix *p, if (pi) { memset(&add, 0, sizeof(union gw_addr)); if (attrhash_cmp(pi->attr, attr_new) - && overlay_index_equal(afi, pi, bgp_static->eth_s_id, &add) + && overlay_index_equal(afi, pi, &add) && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { bgp_dest_unlock_node(dest); bgp_attr_unintern(&attr_new); @@ -5856,7 +5915,7 @@ int bgp_static_set_safi(afi_t afi, safi_t safi, struct vty *vty, if (esi) { bgp_static->eth_s_id = XCALLOC(MTYPE_ATTR, - sizeof(struct eth_segment_id)); + sizeof(esi_t)); str2esi(esi, bgp_static->eth_s_id); } if (routermac) { @@ -7582,6 +7641,7 @@ void route_vty_out(struct vty *vty, const struct prefix *p, const char *nexthop_vrfname = VRF_DEFAULT_NAME; char *nexthop_hostname = bgp_nexthop_hostname(path->peer, path->nexthop); + char esi_buf[ESI_STR_LEN]; if (json_paths) json_path = json_object_new_object(); @@ -7959,6 +8019,11 @@ void route_vty_out(struct vty *vty, const struct prefix *p, vty_out(vty, "%s", bgp_origin_str[attr->origin]); if (json_paths) { + if (bgp_evpn_is_esi_valid(&attr->esi)) { + json_object_string_add(json_path, "esi", + esi_to_str(&attr->esi, + esi_buf, sizeof(esi_buf))); + } if (safi == SAFI_EVPN && attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)) { json_ext_community = json_object_new_object(); @@ -8004,10 +8069,18 @@ void route_vty_out(struct vty *vty, const struct prefix *p, } else { vty_out(vty, "\n"); - if (safi == SAFI_EVPN && - attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)) { - vty_out(vty, "%*s", 20, " "); - vty_out(vty, "%s\n", attr->ecommunity->str); + if (safi == SAFI_EVPN) { + if (bgp_evpn_is_esi_valid(&attr->esi)) { + vty_out(vty, "%*s", 20, " "); + vty_out(vty, "ESI:%s\n", + esi_to_str(&attr->esi, + esi_buf, sizeof(esi_buf))); + } + if (attr->flag & + ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)) { + vty_out(vty, "%*s", 20, " "); + vty_out(vty, "%s\n", attr->ecommunity->str); + } } #ifdef ENABLE_BGP_VNC @@ -8340,15 +8413,6 @@ void route_vty_out_overlay(struct vty *vty, const struct prefix *p, } } - char *str = esi2str(&(attr->evpn_overlay.eth_s_id)); - - if (!json_path) - vty_out(vty, "%s", str); - else - json_object_string_add(json_overlay, "esi", str); - - XFREE(MTYPE_TMP, str); - if (is_evpn_prefix_ipaddr_v4((struct prefix_evpn *)p)) { inet_ntop(AF_INET, &(attr->evpn_overlay.gw_ip.ipv4), buf, BUFSIZ); @@ -8632,6 +8696,10 @@ static const char *bgp_path_selection_reason2str( return "EVPN sequence number"; case bgp_path_selection_evpn_lower_ip: return "EVPN lower IP"; + case bgp_path_selection_evpn_local_path: + return "EVPN local ES path"; + case bgp_path_selection_evpn_non_proxy: + return "EVPN non proxy"; case bgp_path_selection_weight: return "Weight"; case bgp_path_selection_local_pref: @@ -8670,9 +8738,67 @@ static const char *bgp_path_selection_reason2str( return "Invalid (internal error)"; } -void route_vty_out_detail(struct vty *vty, struct bgp *bgp, struct bgp_dest *bn, - struct bgp_path_info *path, afi_t afi, safi_t safi, - json_object *json_paths) +static void route_vty_out_detail_es_info(struct vty *vty, + struct attr *attr, json_object *json_path) +{ + char esi_buf[ESI_STR_LEN]; + bool es_local = !!CHECK_FLAG(attr->es_flags, ATTR_ES_IS_LOCAL); + bool peer_router = !!CHECK_FLAG(attr->es_flags, + ATTR_ES_PEER_ROUTER); + bool peer_active = !!CHECK_FLAG(attr->es_flags, + ATTR_ES_PEER_ACTIVE); + bool peer_proxy = !!CHECK_FLAG(attr->es_flags, + ATTR_ES_PEER_PROXY); + + esi_to_str(&attr->esi, esi_buf, sizeof(esi_buf)); + if (json_path) { + json_object *json_es_info = NULL; + + json_object_string_add( + json_path, "esi", + esi_buf); + if (es_local || bgp_evpn_attr_is_sync(attr)) { + json_es_info = json_object_new_object(); + if (es_local) + json_object_boolean_true_add( + json_es_info, "localEs"); + if (peer_active) + json_object_boolean_true_add( + json_es_info, "peerActive"); + if (peer_proxy) + json_object_boolean_true_add( + json_es_info, "peerProxy"); + if (peer_router) + json_object_boolean_true_add( + json_es_info, "peerRouter"); + if (attr->mm_sync_seqnum) + json_object_int_add( + json_es_info, "peerSeq", + attr->mm_sync_seqnum); + json_object_object_add( + json_path, "es_info", + json_es_info); + } + } else { + if (bgp_evpn_attr_is_sync(attr)) + vty_out(vty, + " ESI %s %s peer-info: (%s%s%sMM: %d)\n", + esi_buf, + es_local ? "local-es":"", + peer_proxy ? "proxy " : "", + peer_active ? "active ":"", + peer_router ? "router ":"", + attr->mm_sync_seqnum); + else + vty_out(vty, " ESI %s %s\n", + esi_buf, + es_local ? "local-es":""); + } +} + +void route_vty_out_detail(struct vty *vty, struct bgp *bgp, + struct bgp_dest *bn, struct bgp_path_info *path, + afi_t afi, safi_t safi, json_object *json_paths) { char buf[INET6_ADDRSTRLEN]; char buf1[BUFSIZ]; @@ -9142,6 +9268,11 @@ void route_vty_out_detail(struct vty *vty, struct bgp *bgp, struct bgp_dest *bn, "used"); } + if (safi == SAFI_EVPN && + bgp_evpn_is_esi_valid(&attr->esi)) { + route_vty_out_detail_es_info(vty, attr, json_path); + } + /* Line 3 display Origin, Med, Locpref, Weight, Tag, valid, * Int/Ext/Local, Atomic, best */ if (json_paths) @@ -13316,6 +13447,7 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp, char buf[PREFIX_STRLEN * 2]; char buf2[SU_ADDRSTRLEN]; char rdbuf[RD_ADDRSTRLEN]; + char esi_buf[ESI_BYTES]; /* Network configuration. */ for (pdest = bgp_table_top(bgp->route[afi][safi]); pdest; @@ -13331,13 +13463,13 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp, continue; char *macrouter = NULL; - char *esi = NULL; if (bgp_static->router_mac) macrouter = prefix_mac2str( bgp_static->router_mac, NULL, 0); if (bgp_static->eth_s_id) - esi = esi2str(bgp_static->eth_s_id); + esi_to_str(bgp_static->eth_s_id, + esi_buf, sizeof(esi_buf)); p = bgp_dest_get_prefix(dest); prd = (struct prefix_rd *)bgp_dest_get_prefix(pdest); @@ -13368,11 +13500,10 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp, " network %s rd %s ethtag %u label %u esi %s gwip %s routermac %s\n", buf, rdbuf, p->u.prefix_evpn.prefix_addr.eth_tag, - decode_label(&bgp_static->label), esi, buf2, + decode_label(&bgp_static->label), esi_buf, buf2, macrouter); XFREE(MTYPE_TMP, macrouter); - XFREE(MTYPE_TMP, esi); } } } diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 32c65c8fac..3f734d2672 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -99,6 +99,7 @@ enum bgp_show_adj_route_type { #define BGP_NLRI_PARSE_ERROR_FLOWSPEC_NLRI_SIZELIMIT -12 #define BGP_NLRI_PARSE_ERROR_FLOWSPEC_BAD_FORMAT -13 #define BGP_NLRI_PARSE_ERROR_ADDRESS_FAMILY -14 +#define BGP_NLRI_PARSE_ERROR_EVPN_TYPE1_SIZE -15 #define BGP_NLRI_PARSE_ERROR -32 /* Ancillary information to struct bgp_path_info, @@ -303,7 +304,7 @@ struct bgp_static { mpls_label_t label; /* EVPN */ - struct eth_segment_id *eth_s_id; + esi_t *eth_s_id; struct ethaddr *router_mac; uint16_t encap_tunneltype; struct prefix gatewayIp; @@ -681,4 +682,6 @@ extern int bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi); extern bool bgp_update_martian_nexthop(struct bgp *bgp, afi_t afi, safi_t safi, uint8_t type, uint8_t stype, struct attr *attr, struct bgp_dest *dest); +extern int bgp_evpn_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, + struct bgp_path_info *exist, int *paths_eq); #endif /* _QUAGGA_BGP_ROUTE_H */ diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h index a9ec36d29b..cf0086b52e 100644 --- a/bgpd/bgp_table.h +++ b/bgpd/bgp_table.h @@ -51,6 +51,8 @@ enum bgp_path_selection_reason { bgp_path_selection_first, bgp_path_selection_evpn_sticky_mac, bgp_path_selection_evpn_seq, + bgp_path_selection_evpn_local_path, + bgp_path_selection_evpn_non_proxy, bgp_path_selection_evpn_lower_ip, bgp_path_selection_weight, bgp_path_selection_local_pref, diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index c53286cb36..0268b7ec9d 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -65,6 +65,7 @@ #include "bgpd/bgp_io.h" #include "bgpd/bgp_evpn.h" #include "bgpd/bgp_evpn_vty.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_addpath.h" #include "bgpd/bgp_mac.h" #include "bgpd/bgp_flowspec.h" @@ -1093,7 +1094,8 @@ DEFUN_HIDDEN (bgp_local_mac, return CMD_WARNING; } - rv = bgp_evpn_local_macip_add(bgp, vni, &mac, &ip, 0 /* flags */, seq); + rv = bgp_evpn_local_macip_add(bgp, vni, &mac, &ip, 0 /* flags */, seq, + zero_esi); if (rv < 0) { vty_out(vty, "Internal error\n"); return CMD_WARNING; diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 0a55a46ed4..87936f1dd6 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -60,6 +60,7 @@ #include "bgpd/bgp_labelpool.h" #include "bgpd/bgp_pbr.h" #include "bgpd/bgp_evpn_private.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_mac.h" /* All information about zebra. */ @@ -1143,8 +1144,7 @@ static bool update_ipv6nh_for_route_install(int nh_othervrf, struct bgp *nh_bgp, api_nh->ifindex = 0; } } - if (nexthop) - api_nh->gate.ipv6 = *nexthop; + api_nh->gate.ipv6 = *nexthop; return true; } @@ -2499,17 +2499,66 @@ static void bgp_zebra_connected(struct zclient *zclient) BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer); } -static int bgp_zebra_process_local_es(ZAPI_CALLBACK_ARGS) +static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS) +{ + esi_t esi; + struct bgp *bgp = NULL; + struct stream *s = NULL; + char buf[ESI_STR_LEN]; + struct in_addr originator_ip; + uint8_t active; + + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) + return 0; + + s = zclient->ibuf; + stream_get(&esi, s, sizeof(esi_t)); + originator_ip.s_addr = stream_get_ipv4(s); + active = stream_getc(s); + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Rx add ESI %s originator-ip %s active %u", + esi_to_str(&esi, buf, sizeof(buf)), + inet_ntoa(originator_ip), + active); + + bgp_evpn_local_es_add(bgp, &esi, originator_ip, active); + + return 0; +} + +static int bgp_zebra_process_local_es_del(ZAPI_CALLBACK_ARGS) { esi_t esi; struct bgp *bgp = NULL; struct stream *s = NULL; char buf[ESI_STR_LEN]; - char buf1[INET6_ADDRSTRLEN]; - struct ipaddr originator_ip; memset(&esi, 0, sizeof(esi_t)); - memset(&originator_ip, 0, sizeof(struct ipaddr)); + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) + return 0; + + s = zclient->ibuf; + stream_get(&esi, s, sizeof(esi_t)); + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Rx del ESI %s", + esi_to_str(&esi, buf, sizeof(buf))); + + bgp_evpn_local_es_del(bgp, &esi); + + return 0; +} + +static int bgp_zebra_process_local_es_evi(ZAPI_CALLBACK_ARGS) +{ + esi_t esi; + vni_t vni; + struct bgp *bgp; + struct stream *s; + char buf[ESI_STR_LEN]; bgp = bgp_lookup_by_vrf_id(vrf_id); if (!bgp) @@ -2517,18 +2566,18 @@ static int bgp_zebra_process_local_es(ZAPI_CALLBACK_ARGS) s = zclient->ibuf; stream_get(&esi, s, sizeof(esi_t)); - stream_get(&originator_ip, s, sizeof(struct ipaddr)); + vni = stream_getl(s); if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("Rx %s ESI %s originator-ip %s", - (cmd == ZEBRA_LOCAL_ES_ADD) ? "add" : "del", - esi_to_str(&esi, buf, sizeof(buf)), - ipaddr2str(&originator_ip, buf1, sizeof(buf1))); + zlog_debug("Rx %s ESI %s VNI %u", + ZEBRA_VNI_ADD ? "add" : "del", + esi_to_str(&esi, buf, sizeof(buf)), vni); - if (cmd == ZEBRA_LOCAL_ES_ADD) - bgp_evpn_local_es_add(bgp, &esi, &originator_ip); + if (cmd == ZEBRA_LOCAL_ES_EVI_ADD) + bgp_evpn_local_es_evi_add(bgp, &esi, vni); else - bgp_evpn_local_es_del(bgp, &esi, &originator_ip); + bgp_evpn_local_es_evi_del(bgp, &esi, vni); + return 0; } @@ -2628,6 +2677,8 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS) uint8_t flags = 0; uint32_t seqnum = 0; int state = 0; + char buf2[ESI_STR_LEN]; + esi_t esi; memset(&ip, 0, sizeof(ip)); s = zclient->ibuf; @@ -2651,6 +2702,7 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS) if (cmd == ZEBRA_MACIP_ADD) { flags = stream_getc(s); seqnum = stream_getl(s); + stream_get(&esi, s, sizeof(esi_t)); } else { state = stream_getl(s); } @@ -2660,15 +2712,15 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS) return 0; if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("%u:Recv MACIP %s flags 0x%x MAC %s IP %s VNI %u seq %u state %d", + zlog_debug("%u:Recv MACIP %s f 0x%x MAC %s IP %s VNI %u seq %u state %d ESI %s", vrf_id, (cmd == ZEBRA_MACIP_ADD) ? "Add" : "Del", flags, prefix_mac2str(&mac, buf, sizeof(buf)), ipaddr2str(&ip, buf1, sizeof(buf1)), vni, seqnum, - state); + state, esi_to_str(&esi, buf2, sizeof(buf2))); if (cmd == ZEBRA_MACIP_ADD) return bgp_evpn_local_macip_add(bgp, vni, &mac, &ip, - flags, seqnum); + flags, seqnum, &esi); else return bgp_evpn_local_macip_del(bgp, vni, &mac, &ip, state); } @@ -2801,9 +2853,11 @@ void bgp_zebra_init(struct thread_master *master, unsigned short instance) zclient->nexthop_update = bgp_read_nexthop_update; zclient->import_check_update = bgp_read_import_check_update; zclient->fec_update = bgp_read_fec_update; - zclient->local_es_add = bgp_zebra_process_local_es; - zclient->local_es_del = bgp_zebra_process_local_es; + zclient->local_es_add = bgp_zebra_process_local_es_add; + zclient->local_es_del = bgp_zebra_process_local_es_del; zclient->local_vni_add = bgp_zebra_process_local_vni; + zclient->local_es_evi_add = bgp_zebra_process_local_es_evi; + zclient->local_es_evi_del = bgp_zebra_process_local_es_evi; zclient->local_vni_del = bgp_zebra_process_local_vni; zclient->local_macip_add = bgp_zebra_process_local_macip; zclient->local_macip_del = bgp_zebra_process_local_macip; diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index f0ee800287..c9e6fd2ac0 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -87,6 +87,7 @@ #include "bgpd/bgp_pbr.h" #include "bgpd/bgp_addpath.h" #include "bgpd/bgp_evpn_private.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_mac.h" DEFINE_MTYPE_STATIC(BGPD, PEER_TX_SHUTDOWN_MSG, "Peer shutdown message (TX)"); @@ -1227,6 +1228,10 @@ struct peer *peer_new(struct bgp *bgp) peer->addpath_type[afi][safi] = BGP_ADDPATH_NONE; } + /* set nexthop-unchanged for l2vpn evpn by default */ + SET_FLAG(peer->af_flags[AFI_L2VPN][SAFI_EVPN], + PEER_FLAG_NEXTHOP_UNCHANGED); + SET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN); /* Initialize per peer bgp GR FSM */ @@ -6935,6 +6940,7 @@ void bgp_master_init(struct thread_master *master, const int buffer_size) /* mpls label dynamic allocation pool */ bgp_lp_init(bm->master, &bm->labelpool); + bgp_evpn_mh_init(); QOBJ_REG(bm, bgp_master); } @@ -7134,6 +7140,7 @@ void bgp_terminate(void) BGP_TIMER_OFF(bm->t_rmap_update); bgp_mac_finish(); + bgp_evpn_mh_finish(); } struct peer *peer_lookup_in_view(struct vty *vty, struct bgp *bgp, diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 8eea2a5f60..966de87830 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -165,6 +165,9 @@ struct bgp_master { /* How big should we set the socket buffer size */ uint32_t socket_buffer; + /* EVPN multihoming */ + struct bgp_evpn_mh_info *mh_info; + bool terminating; /* global flag that sigint terminate seen */ QOBJ_FIELDS }; @@ -661,9 +664,6 @@ struct bgp { struct bgp_pbr_config *bgp_pbr_cfg; - /* local esi hash table */ - struct hash *esihash; - /* Count of peers in established state */ uint32_t established_peers; diff --git a/bgpd/subdir.am b/bgpd/subdir.am index 6b5c0fe719..a5393e25ac 100644 --- a/bgpd/subdir.am +++ b/bgpd/subdir.am @@ -15,6 +15,7 @@ vtysh_scan += \ bgpd/bgp_bfd.c \ bgpd/bgp_debug.c \ bgpd/bgp_dump.c \ + bgpd/bgp_evpn_mh.c \ bgpd/bgp_evpn_vty.c \ bgpd/bgp_filter.c \ bgpd/bgp_mplsvpn.c \ @@ -65,6 +66,7 @@ bgpd_libbgp_a_SOURCES = \ bgpd/bgp_encap_tlv.c \ bgpd/bgp_errors.c \ bgpd/bgp_evpn.c \ + bgpd/bgp_evpn_mh.c \ bgpd/bgp_evpn_vty.c \ bgpd/bgp_filter.c \ bgpd/bgp_flowspec.c \ @@ -139,6 +141,7 @@ noinst_HEADERS += \ bgpd/bgp_encap_types.h \ bgpd/bgp_errors.h \ bgpd/bgp_evpn.h \ + bgpd/bgp_evpn_mh.h \ bgpd/bgp_evpn_private.h \ bgpd/bgp_evpn_vty.h \ bgpd/bgp_filter.h \ diff --git a/doc/developer/building-frr-for-openwrt.rst b/doc/developer/building-frr-for-openwrt.rst index 5d8f82f27e..9bd1296dad 100644 --- a/doc/developer/building-frr-for-openwrt.rst +++ b/doc/developer/building-frr-for-openwrt.rst @@ -1,6 +1,8 @@ -OpenWRT +OpenWrt ======= +General info about OpenWrt buildsystem: `link <https://openwrt.org/docs/guide-developer/build-system/start>`_. + Prepare build environment ------------------------- @@ -13,16 +15,16 @@ For Debian based distributions, run: For other environments, instructions can be found in the `official documentation -<https://wiki.openwrt.org/doc/howto/buildroot.exigence#examples_of_package_installations>`_. +<https://openwrt.org/docs/guide-developer/build-system/install-buildsystem#examples_of_package_installations>`_. -Get OpenWRT Sources (from Git) +Get OpenWrt Sources (from Git) ------------------------------ .. note:: - The OpenWRT build will fail if you run it as root. So take care to run it as a nonprivileged user. + The OpenWrt build will fail if you run it as root. So take care to run it as a nonprivileged user. -Clone the OpenWRT sources and retrieve the package feeds +Clone the OpenWrt sources and retrieve the package feeds :: @@ -30,21 +32,15 @@ Clone the OpenWRT sources and retrieve the package feeds cd openwrt ./scripts/feeds update -a ./scripts/feeds install -a - cd feeds/routing - git fetch origin pull/319/head - git read-tree --prefix=frr/ -u FETCH_HEAD:frr - cd ../../package/feeds/routing/ - ln -sv ../../../feeds/routing/frr . - cd ../../.. - -Configure OpenWRT for your target and select the needed FRR packages in Network -> Routing and Redirection -> frr, + +Configure OpenWrt for your target and select the needed FRR packages in Network -> Routing and Redirection -> frr, exit and save :: make menuconfig -Then, to compile either a complete OpenWRT image, or the FRR packages, run: +Then, to compile either a complete OpenWrt image, or the FRR packages, run: :: @@ -54,10 +50,16 @@ It may be possible that on first build ``make package/frr/compile`` not to work and it may be needed to run a ``make`` for the entire build environment. Add ``V=s`` to get more debugging output. +More information about OpenWrt buildsystem can be found `here +<https://openwrt.org/docs/guide-developer/build-system/use-buildsystem>`_. + Work with sources ----------------- -To update to a newer version, or change other options, you need to edit the ``feeds/routing/frr/Makefile``. +To update to a newer version, or change other options, you need to edit the ``feeds/packages/frr/Makefile``. + +More information about working with patches in OpenWrt buildsystem can be found `here +<https://openwrt.org/docs/guide-developer/build-system/use-patches-with-buildsystem>`_. Usage ----- diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst index cc27bc202d..db0776adba 100644 --- a/doc/user/bgp.rst +++ b/doc/user/bgp.rst @@ -2465,26 +2465,6 @@ the same behavior of using same next-hop and RMAC values. Enables or disables advertise-pip feature, specifiy system-IP and/or system-MAC parameters. -Support with VRF network namespace backend -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -It is possible to separate overlay networks contained in VXLAN interfaces from -underlay networks by using VRFs. VRF-lite and VRF-netns backends can be used for -that. In the latter case, this is necessary to set both bridge and vxlan interface -on the same network namespace, as below example illustrates: - -.. code-block:: shell - - # linux shell - ip netns add vrf1 - ip link add name vxlan101 type vxlan id 101 dstport 4789 dev eth0 local 10.1.1.1 - ip link set dev vxlan101 netns vrf1 - ip netns exec vrf1 ip link set dev lo up - ip netns exec vrf1 brctl addbr bridge101 - ip netns exec vrf1 brctl addif bridge101 vxlan101 - -This makes possible to separate not only layer 3 networks like VRF-lite networks. -Also, VRF netns based make possible to separate layer 2 networks on separate VRF -instances. .. _bgp-debugging: diff --git a/doc/user/sharp.rst b/doc/user/sharp.rst index 1c474193f2..76bdc48dc0 100644 --- a/doc/user/sharp.rst +++ b/doc/user/sharp.rst @@ -88,13 +88,13 @@ keyword. At present, no sharp commands will be preserved in the config. may have been turned on. .. index:: sharp lsp -.. clicmd:: sharp lsp (0-100000) nexthop-group NAME [prefix A.B.C.D/M TYPE [instance (0-255)]] +.. clicmd:: sharp lsp [update] (0-100000) nexthop-group NAME [prefix A.B.C.D/M TYPE [instance (0-255)]] Install an LSP using the specified in-label, with nexthops as - listed in nexthop-group ``NAME``. The LSP is installed as type - ZEBRA_LSP_SHARP. If ``prefix`` is specified, an existing route with - type ``TYPE`` (and optional ``instance`` id) will be updated to use - the LSP. + listed in nexthop-group ``NAME``. If ``update`` is included, the + update path is used. The LSP is installed as type ZEBRA_LSP_SHARP. + If ``prefix`` is specified, an existing route with type ``TYPE`` + (and optional ``instance`` id) will be updated to use the LSP. .. index:: sharp remove lsp .. clicmd:: sharp remove lsp (0-100000) nexthop-group NAME [prefix A.B.C.D/M TYPE [instance (0-255)]] diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index fb79481cb2..50011d55ec 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -293,4 +293,15 @@ struct br_mcast_stats { __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; __u64 mcast_packets[BR_MCAST_DIR_SIZE]; }; + +/* FDB notification bits for NDA_NOTIFY: + * - BR_FDB_NFY_STATIC - notify on activity/expire even for a static entry + * - BR_FDB_NFY_INACTIVE - mark as inactive to avoid double notification, + * used with BR_FDB_NFY_STATIC (kernel controlled) + */ +enum { + BR_FDB_NFY_STATIC, + BR_FDB_NFY_INACTIVE, + BR_FDB_NFY_MAX +}; #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index cd144e3099..33c17af1cc 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -29,6 +29,8 @@ enum { NDA_LINK_NETNSID, NDA_SRC_VNI, NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, + NDA_NOTIFY, __NDA_MAX }; diff --git a/include/linux/net_namespace.h b/include/linux/net_namespace.h index 0ed9dd61d3..0187c74d88 100644 --- a/include/linux/net_namespace.h +++ b/include/linux/net_namespace.h @@ -16,7 +16,6 @@ enum { NETNSA_NSID, NETNSA_PID, NETNSA_FD, - NETNSA_TARGET_NSID, __NETNSA_MAX, }; diff --git a/include/linux/nexthop.h b/include/linux/nexthop.h index e4d6e256ef..ee2a15b9c7 100644 --- a/include/linux/nexthop.h +++ b/include/linux/nexthop.h @@ -50,6 +50,7 @@ enum { */ NHA_GROUPS, /* flag; only return nexthop groups in dump */ NHA_MASTER, /* u32; only return nexthops with given master dev */ + NHA_FDB, /* nexthop belongs to a bridge fdb */ __NHA_MAX, }; diff --git a/lib/bitfield.h b/lib/bitfield.h index 72980165f9..244938933b 100644 --- a/lib/bitfield.h +++ b/lib/bitfield.h @@ -58,7 +58,7 @@ typedef unsigned int word_t; * @n: The current word number that is being used. * @m: total number of words in 'data' */ -#define bitfield_t struct { word_t *data; size_t n, m; } +typedef struct {word_t *data; size_t n, m; } bitfield_t; /** * Initialize the bits. @@ -97,6 +97,16 @@ typedef unsigned int word_t; #define bf_release_index(v, id) \ (v).data[bf_index(id)] &= ~(1 << (bf_offset(id))) +/* check if an id is in use */ +#define bf_test_index(v, id) \ + ((v).data[bf_index(id)] & (1 << (bf_offset(id)))) + +/* check if the bit field has been setup */ +#define bf_is_inited(v) ((v).data) + +/* compare two bitmaps of the same length */ +#define bf_cmp(v1, v2) (memcmp((v1).data, (v2).data, ((v1).m * sizeof(word_t)))) + /* * return 0th index back to bitfield */ @@ -146,6 +156,37 @@ typedef unsigned int word_t; (b) += (w * WORD_SIZE); \ } while (0) +static inline unsigned int bf_find_next_set_bit(bitfield_t v, + word_t start_index) +{ + int start_bit; + unsigned long i, offset; + + start_bit = start_index & (WORD_SIZE - 1); + + for (i = bf_index(start_index); i < v.m; ++i) { + if (v.data[i] == 0) { + /* if the whole word is empty move to the next */ + start_bit = 0; + continue; + } + /* scan one word for set bits */ + for (offset = start_bit; offset < WORD_SIZE; ++offset) { + if ((v.data[i] >> offset) & 1) + return ((i * WORD_SIZE) + offset); + } + /* move to the next word */ + start_bit = 0; + } + return WORD_MAX; +} + +/* iterate through all the set bits */ +#define bf_for_each_set_bit(v, b, max) \ + for ((b) = bf_find_next_set_bit((v), 0); \ + (b) < max; \ + (b) = bf_find_next_set_bit((v), (b) + 1)) + /* * Free the allocated memory for data * @v: an instance of bitfield_t struct. @@ -217,16 +217,14 @@ struct interface *if_create_name(const char *name, vrf_id_t vrf_id) return ifp; } -struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id, - char *optional_name) +struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id) { struct interface *ifp; ifp = if_new(vrf_id); if_set_index(ifp, ifindex); - if (optional_name) - if_set_name(ifp, optional_name); + hook_call(if_add, ifp); return ifp; } @@ -573,8 +571,7 @@ struct interface *if_get_by_name(const char *name, vrf_id_t vrf_id) return NULL; } -struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id, - char *optional_name) +struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id) { struct interface *ifp; @@ -584,7 +581,7 @@ struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id, ifp = if_lookup_by_ifindex(ifindex, vrf_id); if (ifp) return ifp; - return if_create_ifindex(ifindex, vrf_id, optional_name); + return if_create_ifindex(ifindex, vrf_id); case VRF_BACKEND_VRF_LITE: ifp = if_lookup_by_index_all_vrf(ifindex); if (ifp) { @@ -596,7 +593,7 @@ struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id, if_update_to_new_vrf(ifp, vrf_id); return ifp; } - return if_create_ifindex(ifindex, vrf_id, optional_name); + return if_create_ifindex(ifindex, vrf_id); } return NULL; @@ -509,8 +509,7 @@ extern void if_update_to_new_vrf(struct interface *, vrf_id_t vrf_id); extern struct interface *if_create_name(const char *name, vrf_id_t vrf_id); /* Create new interface, adds to index list only */ -extern struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id, - char *name); +extern struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id); extern struct interface *if_lookup_by_index(ifindex_t, vrf_id_t vrf_id); extern struct interface *if_lookup_by_index_all_vrf(ifindex_t); extern struct interface *if_lookup_exact_address(const void *matchaddr, @@ -527,8 +526,8 @@ extern struct interface *if_lookup_by_name_all_vrf(const char *ifname); extern struct interface *if_lookup_by_name_vrf(const char *name, struct vrf *vrf); extern struct interface *if_lookup_by_name(const char *ifname, vrf_id_t vrf_id); extern struct interface *if_get_by_name(const char *ifname, vrf_id_t vrf_id); -extern struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id, - char *optional_name); +extern struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id); + /* Sets the index and adds to index list */ extern int if_set_index(struct interface *ifp, ifindex_t ifindex); /* Sets the name and adds to name list */ diff --git a/lib/linklist.c b/lib/linklist.c index 272e153276..2936c5b502 100644 --- a/lib/linklist.c +++ b/lib/linklist.c @@ -38,16 +38,30 @@ static void list_free_internal(struct list *l) XFREE(MTYPE_LINK_LIST, l); } + /* Allocate new listnode. Internal use only. */ -static struct listnode *listnode_new(void) +static struct listnode *listnode_new(struct list *list, void *val) { - return XCALLOC(MTYPE_LINK_NODE, sizeof(struct listnode)); + struct listnode *node; + + /* if listnode memory is managed by the app then the val + * passed in is the listnode + */ + if (list->flags & LINKLIST_FLAG_NODE_MEM_BY_APP) { + node = val; + node->prev = node->next = NULL; + } else { + node = XCALLOC(MTYPE_LINK_NODE, sizeof(struct listnode)); + node->data = val; + } + return node; } /* Free listnode. */ -static void listnode_free(struct listnode *node) +static void listnode_free(struct list *list, struct listnode *node) { - XFREE(MTYPE_LINK_NODE, node); + if (!(list->flags & LINKLIST_FLAG_NODE_MEM_BY_APP)) + XFREE(MTYPE_LINK_NODE, node); } struct listnode *listnode_add(struct list *list, void *val) @@ -56,10 +70,9 @@ struct listnode *listnode_add(struct list *list, void *val) assert(val != NULL); - node = listnode_new(); + node = listnode_new(list, val); node->prev = list->tail; - node->data = val; if (list->head == NULL) list->head = node; @@ -78,10 +91,9 @@ void listnode_add_head(struct list *list, void *val) assert(val != NULL); - node = listnode_new(); + node = listnode_new(list, val); node->next = list->head; - node->data = val; if (list->head == NULL) list->head = node; @@ -97,15 +109,22 @@ bool listnode_add_sort_nodup(struct list *list, void *val) struct listnode *n; struct listnode *new; int ret; + void *data; assert(val != NULL); + if (list->flags & LINKLIST_FLAG_NODE_MEM_BY_APP) { + n = val; + data = n->data; + } else { + data = val; + } + if (list->cmp) { for (n = list->head; n; n = n->next) { - ret = (*list->cmp)(val, n->data); + ret = (*list->cmp)(data, n->data); if (ret < 0) { - new = listnode_new(); - new->data = val; + new = listnode_new(list, val); new->next = n; new->prev = n->prev; @@ -124,8 +143,7 @@ bool listnode_add_sort_nodup(struct list *list, void *val) } } - new = listnode_new(); - new->data = val; + new = listnode_new(list, val); LISTNODE_ATTACH(list, new); @@ -139,8 +157,8 @@ void listnode_add_sort(struct list *list, void *val) assert(val != NULL); - new = listnode_new(); - new->data = val; + new = listnode_new(list, val); + val = new->data; if (list->cmp) { for (n = list->head; n; n = n->next) { @@ -177,8 +195,7 @@ struct listnode *listnode_add_after(struct list *list, struct listnode *pp, assert(val != NULL); - nn = listnode_new(); - nn->data = val; + nn = listnode_new(list, val); if (pp == NULL) { if (list->head) @@ -212,8 +229,7 @@ struct listnode *listnode_add_before(struct list *list, struct listnode *pp, assert(val != NULL); - nn = listnode_new(); - nn->data = val; + nn = listnode_new(list, val); if (pp == NULL) { if (list->tail) @@ -276,7 +292,7 @@ void list_delete_all_node(struct list *list) next = node->next; if (*list->del) (*list->del)(node->data); - listnode_free(node); + listnode_free(list, node); } list->head = list->tail = NULL; list->count = 0; @@ -336,7 +352,7 @@ void list_delete_node(struct list *list, struct listnode *node) else list->tail = node->prev; list->count--; - listnode_free(node); + listnode_free(list, node); } void list_sort(struct list *list, int (*cmp)(const void **, const void **)) diff --git a/lib/linklist.h b/lib/linklist.h index 00cb9f8714..94a1a1604a 100644 --- a/lib/linklist.h +++ b/lib/linklist.h @@ -43,6 +43,12 @@ struct list { /* invariant: count is the number of listnodes in the list */ unsigned int count; + uint8_t flags; +/* Indicates that listnode memory is managed by the application and + * doesn't need to be freed by this library via listnode_delete etc. + */ +#define LINKLIST_FLAG_NODE_MEM_BY_APP (1 << 0) + /* * Returns -1 if val1 < val2, 0 if equal?, 1 if val1 > val2. * Used as definition of sorted for listnode_add_sort @@ -60,10 +66,14 @@ struct list { #define listhead(X) ((X) ? ((X)->head) : NULL) #define listhead_unchecked(X) ((X)->head) #define listtail(X) ((X) ? ((X)->tail) : NULL) +#define listtail_unchecked(X) ((X)->tail) #define listcount(X) ((X)->count) #define list_isempty(X) ((X)->head == NULL && (X)->tail == NULL) /* return X->data only if X and X->data are not NULL */ #define listgetdata(X) (assert(X), assert((X)->data != NULL), (X)->data) +/* App is going to manage listnode memory */ +#define listset_app_node_mem(X) ((X)->flags |= LINKLIST_FLAG_NODE_MEM_BY_APP) +#define listnode_init(X, val) ((X)->data = (val)) /* * Create a new linked list. @@ -95,7 +105,7 @@ extern struct listnode *listnode_add(struct list *list, void *data); * list to operate on * * data - * element to add + * If MEM_BY_APP is set this is listnode. Otherwise it is element to add. */ extern void listnode_add_head(struct list *list, void *data); @@ -112,7 +122,7 @@ extern void listnode_add_head(struct list *list, void *data); * list to operate on * * val - * element to add + * If MEM_BY_APP is set this is listnode. Otherwise it is element to add. */ extern void listnode_add_sort(struct list *list, void *val); @@ -128,7 +138,7 @@ extern void listnode_add_sort(struct list *list, void *val); * listnode to insert after * * data - * data to insert + * If MEM_BY_APP is set this is listnode. Otherwise it is element to add. * * Returns: * pointer to newly created listnode that contains the inserted data @@ -148,7 +158,7 @@ extern struct listnode *listnode_add_after(struct list *list, * listnode to insert before * * data - * data to insert + * If MEM_BY_APP is set this is listnode. Otherwise it is element to add. * * Returns: * pointer to newly created listnode that contains the inserted data @@ -313,7 +323,7 @@ extern void list_filter_out_nodes(struct list *list, bool (*cond)(void *data)); * list to operate on * * val - * element to add + * If MEM_BY_APP is set this is listnode. Otherwise it is element to add. */ extern bool listnode_add_sort_nodup(struct list *list, void *val); @@ -398,6 +398,10 @@ static const struct zebra_desc_table command_types[] = { DESC_ENTRY(ZEBRA_ADVERTISE_SUBNET), DESC_ENTRY(ZEBRA_LOCAL_ES_ADD), DESC_ENTRY(ZEBRA_LOCAL_ES_DEL), + DESC_ENTRY(ZEBRA_REMOTE_ES_VTEP_ADD), + DESC_ENTRY(ZEBRA_REMOTE_ES_VTEP_DEL), + DESC_ENTRY(ZEBRA_LOCAL_ES_EVI_ADD), + DESC_ENTRY(ZEBRA_LOCAL_ES_EVI_DEL), DESC_ENTRY(ZEBRA_VNI_ADD), DESC_ENTRY(ZEBRA_VNI_DEL), DESC_ENTRY(ZEBRA_L3VNI_ADD), diff --git a/lib/netns_linux.c b/lib/netns_linux.c index e1c0159fc5..98f359401e 100644 --- a/lib/netns_linux.c +++ b/lib/netns_linux.c @@ -379,20 +379,12 @@ struct ns *ns_lookup(ns_id_t ns_id) return ns_lookup_internal(ns_id); } -void ns_walk_func(int (*func)(struct ns *, - void *param_in, - void **param_out), - void *param_in, - void **param_out) +void ns_walk_func(int (*func)(struct ns *)) { struct ns *ns = NULL; - int ret; - RB_FOREACH (ns, ns_head, &ns_tree) { - ret = func(ns, param_in, param_out); - if (ret == NS_WALK_STOP) - return; - } + RB_FOREACH (ns, ns_head, &ns_tree) + func(ns); } const char *ns_get_name(struct ns *ns) @@ -592,33 +584,9 @@ int ns_socket(int domain, int type, int protocol, ns_id_t ns_id) return ret; } -/* if relative link_nsid matches default netns, - * then return default absolute netns value - * otherwise, return NS_UNKNOWN - */ -ns_id_t ns_id_get_absolute(ns_id_t ns_id_reference, ns_id_t link_nsid) -{ - struct ns *ns; - - ns = ns_lookup(ns_id_reference); - if (!ns) - return NS_UNKNOWN; - if (ns->relative_default_ns != link_nsid) - return NS_UNKNOWN; - ns = ns_get_default(); - assert(ns); - return ns->ns_id; -} - ns_id_t ns_get_default_id(void) { if (default_ns) return default_ns->ns_id; return NS_DEFAULT_INTERNAL; } - -struct ns *ns_get_default(void) -{ - return default_ns; -} - @@ -53,11 +53,6 @@ struct ns { /* Identifier, mapped on the NSID value */ ns_id_t internal_ns_id; - /* Identifier, value of NSID of default netns, - * relative value in that local netns - */ - ns_id_t relative_default_ns; - /* Name */ char *name; @@ -125,14 +120,7 @@ int ns_socket(int domain, int type, int protocol, ns_id_t ns_id); extern char *ns_netns_pathname(struct vty *vty, const char *name); /* Parse and execute a function on all the NETNS */ -#define NS_WALK_CONTINUE 0 -#define NS_WALK_STOP 1 - -extern void ns_walk_func(int (*func)(struct ns *, - void *, - void **), - void *param_in, - void **param_out); +extern void ns_walk_func(int (*func)(struct ns *)); /* API to get the NETNS name, from the ns pointer */ extern const char *ns_get_name(struct ns *ns); @@ -186,9 +174,7 @@ extern struct ns *ns_lookup_name(const char *name); */ extern int ns_enable(struct ns *ns, void (*func)(ns_id_t, void *)); extern struct ns *ns_get_created(struct ns *ns, char *name, ns_id_t ns_id); -extern ns_id_t ns_id_get_absolute(ns_id_t ns_id_reference, ns_id_t link_nsid); extern void ns_disable(struct ns *ns); -extern struct ns *ns_get_default(void); #ifdef __cplusplus } diff --git a/lib/prefix.h b/lib/prefix.h index 53e9dc3cb3..400f07386f 100644 --- a/lib/prefix.h +++ b/lib/prefix.h @@ -43,9 +43,36 @@ extern "C" { #define ETH_ALEN 6 #endif +/* EVPN route types. */ +typedef enum { + BGP_EVPN_AD_ROUTE = 1, /* Ethernet Auto-Discovery (A-D) route */ + BGP_EVPN_MAC_IP_ROUTE, /* MAC/IP Advertisement route */ + BGP_EVPN_IMET_ROUTE, /* Inclusive Multicast Ethernet Tag route */ + BGP_EVPN_ES_ROUTE, /* Ethernet Segment route */ + BGP_EVPN_IP_PREFIX_ROUTE, /* IP Prefix route */ +} bgp_evpn_route_type; + +/* value of first byte of ESI */ +#define ESI_TYPE_ARBITRARY 0 /* */ +#define ESI_TYPE_LACP 1 /* <> */ +#define ESI_TYPE_BRIDGE 2 /* <Root bridge Mac-6B>:<Root Br Priority-2B>:00 */ +#define ESI_TYPE_MAC 3 /* <Syst Mac Add-6B>:<Local Discriminator Value-3B> */ +#define ESI_TYPE_ROUTER 4 /* <RouterId-4B>:<Local Discriminator Value-4B> */ +#define ESI_TYPE_AS 5 /* <AS-4B>:<Local Discriminator Value-4B> */ + +#define MAX_ESI {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} + + +#define EVPN_ETH_TAG_BYTES 4 #define ESI_BYTES 10 #define ESI_STR_LEN (3 * ESI_BYTES) +/* Maximum number of VTEPs per-ES - + * XXX - temporary limit for allocating strings etc. + */ +#define ES_VTEP_MAX_CNT 10 +#define ES_VTEP_LIST_STR_SZ (ES_VTEP_MAX_CNT * 16) + #define ETHER_ADDR_STRLEN (3*ETH_ALEN) /* * there isn't a portable ethernet address type. We define our @@ -64,12 +91,13 @@ struct ethaddr { #define PREFIX_LEN_ROUTE_TYPE_5_IPV6 (30*8) typedef struct esi_t_ { - uint8_t val[10]; + uint8_t val[ESI_BYTES]; } esi_t; struct evpn_ead_addr { esi_t esi; uint32_t eth_tag; + struct ipaddr ip; }; struct evpn_macip_addr { @@ -217,39 +245,45 @@ struct prefix_evpn { static inline int is_evpn_prefix_ipaddr_none(const struct prefix_evpn *evp) { - if (evp->prefix.route_type == 2) + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) + return IS_IPADDR_NONE(&(evp)->prefix.ead_addr.ip); + if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) return IS_IPADDR_NONE(&(evp)->prefix.macip_addr.ip); - if (evp->prefix.route_type == 3) + if (evp->prefix.route_type == BGP_EVPN_IMET_ROUTE) return IS_IPADDR_NONE(&(evp)->prefix.imet_addr.ip); - if (evp->prefix.route_type == 4) + if (evp->prefix.route_type == BGP_EVPN_ES_ROUTE) return IS_IPADDR_NONE(&(evp)->prefix.es_addr.ip); - if (evp->prefix.route_type == 5) + if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) return IS_IPADDR_NONE(&(evp)->prefix.prefix_addr.ip); return 0; } static inline int is_evpn_prefix_ipaddr_v4(const struct prefix_evpn *evp) { - if (evp->prefix.route_type == 2) + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) + return IS_IPADDR_V4(&(evp)->prefix.ead_addr.ip); + if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) return IS_IPADDR_V4(&(evp)->prefix.macip_addr.ip); - if (evp->prefix.route_type == 3) + if (evp->prefix.route_type == BGP_EVPN_IMET_ROUTE) return IS_IPADDR_V4(&(evp)->prefix.imet_addr.ip); - if (evp->prefix.route_type == 4) + if (evp->prefix.route_type == BGP_EVPN_ES_ROUTE) return IS_IPADDR_V4(&(evp)->prefix.es_addr.ip); - if (evp->prefix.route_type == 5) + if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) return IS_IPADDR_V4(&(evp)->prefix.prefix_addr.ip); return 0; } static inline int is_evpn_prefix_ipaddr_v6(const struct prefix_evpn *evp) { - if (evp->prefix.route_type == 2) + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) + return IS_IPADDR_V6(&(evp)->prefix.ead_addr.ip); + if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) return IS_IPADDR_V6(&(evp)->prefix.macip_addr.ip); - if (evp->prefix.route_type == 3) + if (evp->prefix.route_type == BGP_EVPN_IMET_ROUTE) return IS_IPADDR_V6(&(evp)->prefix.imet_addr.ip); - if (evp->prefix.route_type == 4) + if (evp->prefix.route_type == BGP_EVPN_ES_ROUTE) return IS_IPADDR_V6(&(evp)->prefix.es_addr.ip); - if (evp->prefix.route_type == 5) + if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) return IS_IPADDR_V6(&(evp)->prefix.prefix_addr.ip); return 0; } diff --git a/lib/thread.c b/lib/thread.c index 5c7c104842..1df4eee25c 100644 --- a/lib/thread.c +++ b/lib/thread.c @@ -634,6 +634,36 @@ struct timeval thread_timer_remain(struct thread *thread) return remain; } +static int time_hhmmss(char *buf, int buf_size, long sec) +{ + long hh; + long mm; + int wr; + + zassert(buf_size >= 8); + + hh = sec / 3600; + sec %= 3600; + mm = sec / 60; + sec %= 60; + + wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec); + + return wr != 8; +} + +char *thread_timer_to_hhmmss(char *buf, int buf_size, + struct thread *t_timer) +{ + if (t_timer) { + time_hhmmss(buf, buf_size, + thread_timer_remain_second(t_timer)); + } else { + snprintf(buf, buf_size, "--:--:--"); + } + return buf; +} + /* Get new thread. */ static struct thread *thread_get(struct thread_master *m, uint8_t type, int (*func)(struct thread *), void *arg, diff --git a/lib/thread.h b/lib/thread.h index 412a4d93bf..c22b2105cd 100644 --- a/lib/thread.h +++ b/lib/thread.h @@ -140,6 +140,8 @@ struct cpu_thread_history { /* Thread yield time. */ #define THREAD_YIELD_TIME_SLOT 10 * 1000L /* 10ms */ +#define THREAD_TIMER_STRLEN 12 + /* Macros. */ #define THREAD_ARG(X) ((X)->arg) #define THREAD_FD(X) ((X)->u.fd) @@ -228,6 +230,8 @@ extern unsigned long thread_consumed_time(RUSAGE_T *after, RUSAGE_T *before, /* only for use in logging functions! */ extern pthread_key_t thread_current; +extern char *thread_timer_to_hhmmss(char *buf, int buf_size, + struct thread *t_timer); #ifdef __cplusplus } @@ -653,8 +653,7 @@ int vrf_handler_create(struct vty *vty, const char *vrfname, } int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname, - ns_id_t ns_id, ns_id_t internal_ns_id, - ns_id_t rel_def_ns_id) + ns_id_t ns_id, ns_id_t internal_ns_id) { struct ns *ns = NULL; @@ -701,7 +700,6 @@ int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname, } ns = ns_get_created(ns, pathname, ns_id); ns->internal_ns_id = internal_ns_id; - ns->relative_default_ns = rel_def_ns_id; ns->vrf_ctxt = (void *)vrf; vrf->ns_ctxt = (void *)ns; /* update VRF netns NAME */ @@ -797,9 +795,7 @@ DEFUN_NOSH (vrf_netns, frr_with_privs(vrf_daemon_privs) { ret = vrf_netns_handler_create(vty, vrf, pathname, - NS_UNKNOWN, - NS_UNKNOWN, - NS_UNKNOWN); + NS_UNKNOWN, NS_UNKNOWN); } return ret; } @@ -315,7 +315,7 @@ extern int vrf_handler_create(struct vty *vty, const char *name, */ extern int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname, ns_id_t ext_ns_id, - ns_id_t ns_id, ns_id_t rel_def_ns_id); + ns_id_t ns_id); /* used internally to enable or disable VRF. * Notify a change in the VRF ID of the VRF diff --git a/lib/zclient.c b/lib/zclient.c index 92ff2537d5..eb62350f4f 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -3519,6 +3519,16 @@ static int zclient_read(struct thread *thread) (*zclient->local_es_del)(command, zclient, length, vrf_id); break; + case ZEBRA_LOCAL_ES_EVI_ADD: + if (zclient->local_es_evi_add) + (*zclient->local_es_evi_add)(command, zclient, length, + vrf_id); + break; + case ZEBRA_LOCAL_ES_EVI_DEL: + if (zclient->local_es_evi_del) + (*zclient->local_es_evi_del)(command, zclient, length, + vrf_id); + break; case ZEBRA_VNI_ADD: if (zclient->local_vni_add) (*zclient->local_vni_add)(command, zclient, length, diff --git a/lib/zclient.h b/lib/zclient.h index 250824e612..da06239d01 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -157,6 +157,10 @@ typedef enum { ZEBRA_ADVERTISE_ALL_VNI, ZEBRA_LOCAL_ES_ADD, ZEBRA_LOCAL_ES_DEL, + ZEBRA_REMOTE_ES_VTEP_ADD, + ZEBRA_REMOTE_ES_VTEP_DEL, + ZEBRA_LOCAL_ES_EVI_ADD, + ZEBRA_LOCAL_ES_EVI_DEL, ZEBRA_VNI_ADD, ZEBRA_VNI_DEL, ZEBRA_L3VNI_ADD, @@ -321,6 +325,8 @@ struct zclient { int (*fec_update)(int, struct zclient *, uint16_t); int (*local_es_add)(ZAPI_CALLBACK_ARGS); int (*local_es_del)(ZAPI_CALLBACK_ARGS); + int (*local_es_evi_add)(ZAPI_CALLBACK_ARGS); + int (*local_es_evi_del)(ZAPI_CALLBACK_ARGS); int (*local_vni_add)(ZAPI_CALLBACK_ARGS); int (*local_vni_del)(ZAPI_CALLBACK_ARGS); int (*local_l3vni_add)(ZAPI_CALLBACK_ARGS); @@ -601,6 +607,11 @@ zapi_rule_notify_owner2str(enum zapi_rule_notify_owner note) #define ZEBRA_MACIP_TYPE_ROUTER_FLAG 0x04 /* Router Flag - proxy NA */ #define ZEBRA_MACIP_TYPE_OVERRIDE_FLAG 0x08 /* Override Flag */ #define ZEBRA_MACIP_TYPE_SVI_IP 0x10 /* SVI MAC-IP */ +#define ZEBRA_MACIP_TYPE_PROXY_ADVERT 0x20 /* Not locally active */ +#define ZEBRA_MACIP_TYPE_SYNC_PATH 0x40 /* sync path */ +/* XXX - flags is an u8; that needs to be changed to u32 if you need + * to allocate past 0x80 + */ enum zebra_neigh_state { ZEBRA_NEIGH_INACTIVE = 0, ZEBRA_NEIGH_ACTIVE = 1 }; diff --git a/sharpd/sharp_vty.c b/sharpd/sharp_vty.c index 1d2b87b9ba..6a120c8eff 100644 --- a/sharpd/sharp_vty.c +++ b/sharpd/sharp_vty.c @@ -394,27 +394,31 @@ DEFUN_NOSH (show_debugging_sharpd, return CMD_SUCCESS; } -DEFPY(sharp_lsp_prefix_v4, sharp_lsp_prefix_v4_cmd, - "sharp lsp (0-100000)$inlabel\ +DEFPY (sharp_lsp_prefix_v4, sharp_lsp_prefix_v4_cmd, + "sharp lsp [update]$update (0-100000)$inlabel\ nexthop-group NHGNAME$nhgname\ [prefix A.B.C.D/M$pfx\ " FRR_IP_REDIST_STR_ZEBRA "$type_str [instance (0-255)$instance]]", - "Sharp Routing Protocol\n" - "Add an LSP\n" - "The ingress label to use\n" - "Use nexthops from a nexthop-group\n" - "The nexthop-group name\n" - "Label a prefix\n" - "The v4 prefix to label\n" - FRR_IP_REDIST_HELP_STR_ZEBRA - "Instance to use\n" - "Instance\n") + "Sharp Routing Protocol\n" + "Add an LSP\n" + "Update an LSP\n" + "The ingress label to use\n" + "Use nexthops from a nexthop-group\n" + "The nexthop-group name\n" + "Label a prefix\n" + "The v4 prefix to label\n" + FRR_IP_REDIST_HELP_STR_ZEBRA + "Instance to use\n" + "Instance\n") { struct nexthop_group_cmd *nhgc = NULL; struct nexthop_group_cmd *backup_nhgc = NULL; struct nexthop_group *backup_nhg = NULL; struct prefix p = {}; int type = 0; + bool update_p; + + update_p = (update != NULL); /* We're offered a v4 prefix */ if (pfx->family > 0 && type_str) { @@ -458,7 +462,8 @@ DEFPY(sharp_lsp_prefix_v4, sharp_lsp_prefix_v4_cmd, backup_nhg = &(backup_nhgc->nhg); } - if (sharp_install_lsps_helper(true, pfx->family > 0 ? &p : NULL, + if (sharp_install_lsps_helper(true /*install*/, update_p, + pfx->family > 0 ? &p : NULL, type, instance, inlabel, &(nhgc->nhg), backup_nhg) == 0) return CMD_SUCCESS; @@ -523,7 +528,8 @@ DEFPY(sharp_remove_lsp_prefix_v4, sharp_remove_lsp_prefix_v4_cmd, nhg = &(nhgc->nhg); } - if (sharp_install_lsps_helper(false, pfx->family > 0 ? &p : NULL, + if (sharp_install_lsps_helper(false /*!install*/, false, + pfx->family > 0 ? &p : NULL, type, instance, inlabel, nhg, NULL) == 0) return CMD_SUCCESS; else { diff --git a/sharpd/sharp_zebra.c b/sharpd/sharp_zebra.c index 74e44014a9..e0f16d71f5 100644 --- a/sharpd/sharp_zebra.c +++ b/sharpd/sharp_zebra.c @@ -114,15 +114,16 @@ static int sharp_ifp_down(struct interface *ifp) return 0; } -int sharp_install_lsps_helper(bool install_p, const struct prefix *p, - uint8_t type, int instance, uint32_t in_label, +int sharp_install_lsps_helper(bool install_p, bool update_p, + const struct prefix *p, uint8_t type, + int instance, uint32_t in_label, const struct nexthop_group *nhg, const struct nexthop_group *backup_nhg) { struct zapi_labels zl = {}; struct zapi_nexthop *znh; const struct nexthop *nh; - int i, ret; + int i, cmd, ret; zl.type = ZEBRA_LSP_SHARP; zl.local_label = in_label; @@ -200,12 +201,17 @@ int sharp_install_lsps_helper(bool install_p, const struct prefix *p, zl.backup_nexthop_num = i; } - if (install_p) - ret = zebra_send_mpls_labels(zclient, ZEBRA_MPLS_LABELS_ADD, - &zl); - else - ret = zebra_send_mpls_labels(zclient, ZEBRA_MPLS_LABELS_DELETE, - &zl); + + if (install_p) { + if (update_p) + cmd = ZEBRA_MPLS_LABELS_REPLACE; + else + cmd = ZEBRA_MPLS_LABELS_ADD; + } else { + cmd = ZEBRA_MPLS_LABELS_DELETE; + } + + ret = zebra_send_mpls_labels(zclient, cmd, &zl); return ret; } diff --git a/sharpd/sharp_zebra.h b/sharpd/sharp_zebra.h index cb2f38a6ab..e40585aa6a 100644 --- a/sharpd/sharp_zebra.h +++ b/sharpd/sharp_zebra.h @@ -44,8 +44,9 @@ extern void sharp_install_routes_helper(struct prefix *p, vrf_id_t vrf_id, extern void sharp_remove_routes_helper(struct prefix *p, vrf_id_t vrf_id, uint8_t instance, uint32_t routes); -int sharp_install_lsps_helper(bool install_p, const struct prefix *p, - uint8_t type, int instance, uint32_t in_label, +int sharp_install_lsps_helper(bool install_p, bool update_p, + const struct prefix *p, uint8_t type, + int instance, uint32_t in_label, const struct nexthop_group *nhg, const struct nexthop_group *backup_nhg); diff --git a/tests/topotests/bgp-evpn-mh/evpn-mh-topo-tests.pdf b/tests/topotests/bgp-evpn-mh/evpn-mh-topo-tests.pdf Binary files differnew file mode 100644 index 0000000000..8858e21496 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/evpn-mh-topo-tests.pdf diff --git a/tests/topotests/bgp-evpn-mh/hostd11/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd11/evpn.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd11/evpn.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd11/pim.conf b/tests/topotests/bgp-evpn-mh/hostd11/pim.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd11/pim.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd11/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd11/zebra.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd11/zebra.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd12/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd12/evpn.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd12/evpn.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd12/pim.conf b/tests/topotests/bgp-evpn-mh/hostd12/pim.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd12/pim.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd12/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd12/zebra.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd12/zebra.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd21/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd21/evpn.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd21/evpn.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd21/pim.conf b/tests/topotests/bgp-evpn-mh/hostd21/pim.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd21/pim.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd21/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd21/zebra.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd21/zebra.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd22/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd22/evpn.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd22/evpn.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd22/pim.conf b/tests/topotests/bgp-evpn-mh/hostd22/pim.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd22/pim.conf diff --git a/tests/topotests/bgp-evpn-mh/hostd22/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd22/zebra.conf new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/hostd22/zebra.conf diff --git a/tests/topotests/bgp-evpn-mh/spine1/evpn.conf b/tests/topotests/bgp-evpn-mh/spine1/evpn.conf new file mode 100644 index 0000000000..2e26f60f44 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/spine1/evpn.conf @@ -0,0 +1,17 @@ +frr defaults datacenter +! +router bgp 65001 + bgp router-id 192.168.100.13 + no bgp ebgp-requires-policy + neighbor 192.168.1.2 remote-as external + neighbor 192.168.2.2 remote-as external + neighbor 192.168.3.2 remote-as external + neighbor 192.168.4.2 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.1.2 activate + neighbor 192.168.2.2 activate + neighbor 192.168.3.2 activate + neighbor 192.168.4.2 activate + exit-address-family +! diff --git a/tests/topotests/bgp-evpn-mh/spine1/pim.conf b/tests/topotests/bgp-evpn-mh/spine1/pim.conf new file mode 100644 index 0000000000..68e686e8c7 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/spine1/pim.conf @@ -0,0 +1,18 @@ +ip pim rp 192.168.100.13 +ip pim spt-switchover infinity-and-beyond +! +int lo + ip pim +! +int spine1-eth0 + ip pim +! +int spine1-eth1 + ip pim +! +int spine1-eth2 + ip pim +! +int spine1-eth3 + ip pim +! diff --git a/tests/topotests/bgp-evpn-mh/spine1/zebra.conf b/tests/topotests/bgp-evpn-mh/spine1/zebra.conf new file mode 100644 index 0000000000..80e9e5a263 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/spine1/zebra.conf @@ -0,0 +1,15 @@ +int spine1-eth0 + ip addr 192.168.1.1/24 +! +int spine1-eth1 + ip addr 192.168.2.1/24 +! +int spine1-eth2 + ip addr 192.168.3.1/24 +! +int spine1-eth3 + ip addr 192.168.4.1/24 +! +int lo + ip addr 192.168.100.13/32 + ip addr 192.168.100.100/32 diff --git a/tests/topotests/bgp-evpn-mh/spine2/evpn.conf b/tests/topotests/bgp-evpn-mh/spine2/evpn.conf new file mode 100644 index 0000000000..ec2e789276 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/spine2/evpn.conf @@ -0,0 +1,17 @@ +frr defaults datacenter +! +router bgp 65001 + bgp router-id 192.168.100.14 + no bgp ebgp-requires-policy + neighbor 192.168.5.2 remote-as external + neighbor 192.168.6.2 remote-as external + neighbor 192.168.7.2 remote-as external + neighbor 192.168.8.2 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.5.2 activate + neighbor 192.168.6.2 activate + neighbor 192.168.7.2 activate + neighbor 192.168.8.2 activate + exit-address-family +! diff --git a/tests/topotests/bgp-evpn-mh/spine2/pim.conf b/tests/topotests/bgp-evpn-mh/spine2/pim.conf new file mode 100644 index 0000000000..c1566240e6 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/spine2/pim.conf @@ -0,0 +1,18 @@ +ip pim rp 192.168.100.13 +ip pim spt-switchover infinity-and-beyond +! +int lo + ip pim +! +int spine2-eth0 + ip pim +! +int spine2-eth1 + ip pim +! +int spine2-eth2 + ip pim +! +int spine2-eth3 + ip pim +! diff --git a/tests/topotests/bgp-evpn-mh/spine2/zebra.conf b/tests/topotests/bgp-evpn-mh/spine2/zebra.conf new file mode 100644 index 0000000000..1cd1df8c81 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/spine2/zebra.conf @@ -0,0 +1,15 @@ +int spine2-eth0 + ip addr 192.168.5.1/24 +! +int spine2-eth1 + ip addr 192.168.6.1/24 +! +int spine2-eth2 + ip addr 192.168.7.1/24 +! +int spine2-eth3 + ip addr 192.168.8.1/24 +! +int lo + ip addr 192.168.100.14/32 + ip addr 192.168.100.100/32 diff --git a/tests/topotests/bgp-evpn-mh/test_evpn_mh.py b/tests/topotests/bgp-evpn-mh/test_evpn_mh.py new file mode 100755 index 0000000000..fe28f79bd4 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/test_evpn_mh.py @@ -0,0 +1,651 @@ +#!/usr/bin/env python + +# +# test_evpn_mh.py +# +# Copyright (c) 2020 by +# Cumulus Networks, Inc. +# Anuradha Karuppiah +# +# Permission to use, copy, modify, and/or distribute this software +# for any purpose with or without fee is hereby granted, provided +# that the above copyright notice and this permission notice appear +# in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND NETDEF DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NETDEF BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# + +""" +test_evpn_mh.py: Testing EVPN multihoming + +""" + +import os +import re +import sys +import pytest +import json +import platform +from functools import partial + +# Save the Current Working Directory to find configuration files. +CWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(CWD, "../")) + +# pylint: disable=C0413 +# Import topogen and topotest helpers +from lib import topotest +from lib.topogen import Topogen, TopoRouter, get_topogen +from lib.topolog import logger + +# Required to instantiate the topology builder class. +from mininet.topo import Topo + +##################################################### +## +## Network Topology Definition +## +## See topology picture at evpn-mh-topo-tests.pdf +##################################################### + + +class NetworkTopo(Topo): + ''' + EVPN Multihoming Topology - + 1. Two level CLOS + 2. Two spine switches - spine1, spine2 + 3. Two racks with Top-of-Rack switches per rack - tormx1, tormx2 + 4. Two dual attached hosts per-rack - hostdx1, hostdx2 + ''' + + def build(self, **_opts): + "Build function" + + tgen = get_topogen(self) + + tgen.add_router("spine1") + tgen.add_router("spine2") + tgen.add_router("torm11") + tgen.add_router("torm12") + tgen.add_router("torm21") + tgen.add_router("torm22") + tgen.add_router("hostd11") + tgen.add_router("hostd12") + tgen.add_router("hostd21") + tgen.add_router("hostd22") + + # On main router + # First switch is for a dummy interface (for local network) + + + ##################### spine1 ######################## + # spine1-eth0 is connected to torm11-eth0 + switch = tgen.add_switch("sw1") + switch.add_link(tgen.gears["spine1"]) + switch.add_link(tgen.gears["torm11"]) + + # spine1-eth1 is connected to torm12-eth0 + switch = tgen.add_switch("sw2") + switch.add_link(tgen.gears["spine1"]) + switch.add_link(tgen.gears["torm12"]) + + # spine1-eth2 is connected to torm21-eth0 + switch = tgen.add_switch("sw3") + switch.add_link(tgen.gears["spine1"]) + switch.add_link(tgen.gears["torm21"]) + + # spine1-eth3 is connected to torm22-eth0 + switch = tgen.add_switch("sw4") + switch.add_link(tgen.gears["spine1"]) + switch.add_link(tgen.gears["torm22"]) + + ##################### spine2 ######################## + # spine2-eth0 is connected to torm11-eth1 + switch = tgen.add_switch("sw5") + switch.add_link(tgen.gears["spine2"]) + switch.add_link(tgen.gears["torm11"]) + + # spine2-eth1 is connected to torm12-eth1 + switch = tgen.add_switch("sw6") + switch.add_link(tgen.gears["spine2"]) + switch.add_link(tgen.gears["torm12"]) + + # spine2-eth2 is connected to torm21-eth1 + switch = tgen.add_switch("sw7") + switch.add_link(tgen.gears["spine2"]) + switch.add_link(tgen.gears["torm21"]) + + # spine2-eth3 is connected to torm22-eth1 + switch = tgen.add_switch("sw8") + switch.add_link(tgen.gears["spine2"]) + switch.add_link(tgen.gears["torm22"]) + + ##################### torm11 ######################## + # torm11-eth2 is connected to hostd11-eth0 + switch = tgen.add_switch("sw9") + switch.add_link(tgen.gears["torm11"]) + switch.add_link(tgen.gears["hostd11"]) + + # torm11-eth3 is connected to hostd12-eth0 + switch = tgen.add_switch("sw10") + switch.add_link(tgen.gears["torm11"]) + switch.add_link(tgen.gears["hostd12"]) + + ##################### torm12 ######################## + # torm12-eth2 is connected to hostd11-eth1 + switch = tgen.add_switch("sw11") + switch.add_link(tgen.gears["torm12"]) + switch.add_link(tgen.gears["hostd11"]) + + # torm12-eth3 is connected to hostd12-eth1 + switch = tgen.add_switch("sw12") + switch.add_link(tgen.gears["torm12"]) + switch.add_link(tgen.gears["hostd12"]) + + ##################### torm21 ######################## + # torm21-eth2 is connected to hostd21-eth0 + switch = tgen.add_switch("sw13") + switch.add_link(tgen.gears["torm21"]) + switch.add_link(tgen.gears["hostd21"]) + + # torm21-eth3 is connected to hostd22-eth0 + switch = tgen.add_switch("sw14") + switch.add_link(tgen.gears["torm21"]) + switch.add_link(tgen.gears["hostd22"]) + + ##################### torm22 ######################## + # torm22-eth2 is connected to hostd21-eth1 + switch = tgen.add_switch("sw15") + switch.add_link(tgen.gears["torm22"]) + switch.add_link(tgen.gears["hostd21"]) + + # torm22-eth3 is connected to hostd22-eth1 + switch = tgen.add_switch("sw16") + switch.add_link(tgen.gears["torm22"]) + switch.add_link(tgen.gears["hostd22"]) + + +##################################################### +## +## Tests starting +## +##################################################### + +tor_ips = {"torm11" : "192.168.100.15", \ + "torm12" : "192.168.100.16", \ + "torm21" : "192.168.100.17", \ + "torm22" : "192.168.100.18"} + +svi_ips = {"torm11" : "45.0.0.2", \ + "torm12" : "45.0.0.3", \ + "torm21" : "45.0.0.4", \ + "torm22" : "45.0.0.5"} + +tor_ips_rack_1 = {"torm11" : "192.168.100.15", \ + "torm12" : "192.168.100.16"} + +tor_ips_rack_2 = {"torm21" : "192.168.100.17", \ + "torm22" : "192.168.100.18"} + +host_es_map = {"hostd11" : "03:44:38:39:ff:ff:01:00:00:01", + "hostd12" : "03:44:38:39:ff:ff:01:00:00:02", + "hostd21" : "03:44:38:39:ff:ff:02:00:00:01", + "hostd22" : "03:44:38:39:ff:ff:02:00:00:02"} + +def config_bond(node, bond_name, bond_members, bond_ad_sys_mac, br): + ''' + Used to setup bonds on the TORs and hosts for MH + ''' + node.run("ip link add dev %s type bond mode 802.3ad" % bond_name) + node.run("ip link set dev %s type bond lacp_rate 1" % bond_name) + node.run("ip link set dev %s type bond miimon 100" % bond_name) + node.run("ip link set dev %s type bond xmit_hash_policy layer3+4" % bond_name) + node.run("ip link set dev %s type bond min_links 1" % bond_name) + node.run("ip link set dev %s type bond ad_actor_system %s" %\ + (bond_name, bond_ad_sys_mac)) + + for bond_member in bond_members: + node.run("ip link set dev %s down" % bond_member) + node.run("ip link set dev %s master %s" % (bond_member, bond_name)) + node.run("ip link set dev %s up" % bond_member) + + node.run("ip link set dev %s up" % bond_name) + + # if bridge is specified add the bond as a bridge member + if br: + node.run(" ip link set dev %s master bridge" % bond_name) + node.run("/sbin/bridge link set dev %s priority 8" % bond_name) + node.run("/sbin/bridge vlan del vid 1 dev %s" % bond_name) + node.run("/sbin/bridge vlan del vid 1 untagged pvid dev %s" % bond_name) + node.run("/sbin/bridge vlan add vid 1000 dev %s" % bond_name) + node.run("/sbin/bridge vlan add vid 1000 untagged pvid dev %s"\ + % bond_name) + + +def config_mcast_tunnel_termination_device(node): + ''' + The kernel requires a device to terminate VxLAN multicast tunnels + when EVPN-PIM is used for flooded traffic + ''' + node.run("ip link add dev ipmr-lo type dummy") + node.run("ip link set dev ipmr-lo mtu 16000") + node.run("ip link set dev ipmr-lo mode dormant") + node.run("ip link set dev ipmr-lo up") + + +def config_bridge(node): + ''' + Create a VLAN aware bridge + ''' + node.run("ip link add dev bridge type bridge stp_state 0") + node.run("ip link set dev bridge type bridge vlan_filtering 1") + node.run("ip link set dev bridge mtu 9216") + node.run("ip link set dev bridge type bridge ageing_time 1800") + node.run("ip link set dev bridge type bridge mcast_snooping 0") + node.run("ip link set dev bridge type bridge vlan_stats_enabled 1") + node.run("ip link set dev bridge up") + node.run("/sbin/bridge vlan add vid 1000 dev bridge") + + +def config_vxlan(node, node_ip): + ''' + Create a VxLAN device for VNI 1000 and add it to the bridge. + VLAN-1000 is mapped to VNI-1000. + ''' + node.run("ip link add dev vx-1000 type vxlan id 1000 dstport 4789") + node.run("ip link set dev vx-1000 type vxlan nolearning") + node.run("ip link set dev vx-1000 type vxlan local %s" % node_ip) + node.run("ip link set dev vx-1000 type vxlan ttl 64") + node.run("ip link set dev vx-1000 mtu 9152") + node.run("ip link set dev vx-1000 type vxlan dev ipmr-lo group 239.1.1.100") + node.run("ip link set dev vx-1000 up") + + # bridge attrs + node.run("ip link set dev vx-1000 master bridge") + node.run("/sbin/bridge link set dev vx-1000 neigh_suppress on") + node.run("/sbin/bridge link set dev vx-1000 learning off") + node.run("/sbin/bridge link set dev vx-1000 priority 8") + node.run("/sbin/bridge vlan del vid 1 dev vx-1000") + node.run("/sbin/bridge vlan del vid 1 untagged pvid dev vx-1000") + node.run("/sbin/bridge vlan add vid 1000 dev vx-1000") + node.run("/sbin/bridge vlan add vid 1000 untagged pvid dev vx-1000") + + +def config_svi(node, svi_pip): + ''' + Create an SVI for VLAN 1000 + ''' + node.run("ip link add link bridge name vlan1000 type vlan id 1000 protocol 802.1q") + node.run("ip addr add %s/24 dev vlan1000" % svi_pip) + node.run("ip link set dev vlan1000 up") + node.run("/sbin/sysctl net.ipv4.conf.vlan1000.arp_accept=1") + node.run("ip link add link vlan1000 name vlan1000-v0 type macvlan mode private") + node.run("/sbin/sysctl net.ipv6.conf.vlan1000-v0.accept_dad=0") + node.run("/sbin/sysctl net.ipv6.conf.vlan1000-v0.dad_transmits") + node.run("/sbin/sysctl net.ipv6.conf.vlan1000-v0.dad_transmits=0") + node.run("ip link set dev vlan1000-v0 address 00:00:5e:00:01:01") + node.run("ip link set dev vlan1000-v0 up") + # metric 1024 is not working + node.run("ip addr add 45.0.0.1/24 dev vlan1000-v0") + + +def config_tor(tor_name, tor, tor_ip, svi_pip): + ''' + Create the bond/vxlan-bridge on the TOR which acts as VTEP and EPN-PE + ''' + # create a device for terminating VxLAN multicast tunnels + config_mcast_tunnel_termination_device(tor) + + # create a vlan aware bridge + config_bridge(tor) + + # create vxlan device and add it to bridge + config_vxlan(tor, tor_ip) + + # create hostbonds and add them to the bridge + if "torm1" in tor_name: + sys_mac = "44:38:39:ff:ff:01" + else: + sys_mac = "44:38:39:ff:ff:02" + bond_member = tor_name + "-eth2" + config_bond(tor, "hostbond1", [bond_member], sys_mac, "bridge") + + bond_member = tor_name + "-eth3" + config_bond(tor, "hostbond2", [bond_member], sys_mac, "bridge") + + # create SVI + config_svi(tor, svi_pip) + + +def config_tors(tgen, tors): + for tor_name in tors: + tor = tgen.gears[tor_name] + config_tor(tor_name, tor, tor_ips.get(tor_name), svi_ips.get(tor_name)) + +def compute_host_ip_mac(host_name): + host_id = host_name.split("hostd")[1] + host_ip = "45.0.0."+ host_id + "/24" + host_mac = "00:00:00:00:00:" + host_id + + return host_ip, host_mac + +def config_host(host_name, host): + ''' + Create the dual-attached bond on host nodes for MH + ''' + bond_members = [] + bond_members.append(host_name + "-eth0") + bond_members.append(host_name + "-eth1") + bond_name = "torbond" + config_bond(host, bond_name, bond_members, "00:00:00:00:00:00", None) + + host_ip, host_mac = compute_host_ip_mac(host_name) + host.run("ip addr add %s dev %s" % (host_ip, bond_name)) + host.run("ip link set dev %s address %s" % (bond_name, host_mac)) + + +def config_hosts(tgen, hosts): + for host_name in hosts: + host = tgen.gears[host_name] + config_host(host_name, host) + + +def setup_module(module): + "Setup topology" + tgen = Topogen(NetworkTopo, module.__name__) + tgen.start_topology() + + krel = platform.release() + if topotest.version_cmp(krel, "4.19") < 0: + tgen.errors = "kernel 4.19 needed for multihoming tests" + pytest.skip(tgen.errors) + + tors = [] + tors.append("torm11") + tors.append("torm12") + tors.append("torm21") + tors.append("torm22") + config_tors(tgen, tors) + + hosts = [] + hosts.append("hostd11") + hosts.append("hostd12") + hosts.append("hostd21") + hosts.append("hostd22") + config_hosts(tgen, hosts) + + # tgen.mininet_cli() + # This is a sample of configuration loading. + router_list = tgen.routers() + for rname, router in router_list.iteritems(): + router.load_config( + TopoRouter.RD_ZEBRA, os.path.join(CWD, "{}/zebra.conf".format(rname)) + ) + router.load_config( + TopoRouter.RD_PIM, os.path.join(CWD, "{}/pim.conf".format(rname)) + ) + router.load_config( + TopoRouter.RD_BGP, os.path.join(CWD, "{}/evpn.conf".format(rname)) + ) + tgen.start_router() + # tgen.mininet_cli() + + +def teardown_module(_mod): + "Teardown the pytest environment" + tgen = get_topogen() + + # This function tears down the whole topology. + tgen.stop_topology() + + +def check_local_es(esi, vtep_ips, dut_name, down_vteps): + ''' + Check if ES peers are setup correctly on local ESs + ''' + peer_ips = [] + if "torm1" in dut_name: + tor_ips_rack = tor_ips_rack_1 + else: + tor_ips_rack = tor_ips_rack_2 + + for tor_name, tor_ip in tor_ips_rack.iteritems(): + if dut_name not in tor_name: + peer_ips.append(tor_ip) + + # remove down VTEPs from the peer check list + peer_set = set(peer_ips) + down_vtep_set = set(down_vteps) + peer_set = peer_set - down_vtep_set + + vtep_set = set(vtep_ips) + diff = peer_set.symmetric_difference(vtep_set) + + return (esi, diff) if diff else None + + +def check_remote_es(esi, vtep_ips, dut_name, down_vteps): + ''' + Verify list of PEs associated with a remote ES + ''' + remote_ips = [] + + if "torm1" in dut_name: + tor_ips_rack = tor_ips_rack_2 + else: + tor_ips_rack = tor_ips_rack_1 + + for tor_name, tor_ip in tor_ips_rack.iteritems(): + remote_ips.append(tor_ip) + + # remove down VTEPs from the remote check list + remote_set = set(remote_ips) + down_vtep_set = set(down_vteps) + remote_set = remote_set - down_vtep_set + + vtep_set = set(vtep_ips) + diff = remote_set.symmetric_difference(vtep_set) + + return (esi, diff) if diff else None + +def check_es(dut): + ''' + Verify list of PEs associated all ESs, local and remote + ''' + bgp_es = dut.vtysh_cmd("show bgp l2vp evpn es json") + bgp_es_json = json.loads(bgp_es) + + result = None + + expected_es_set = set([v for k, v in host_es_map.iteritems()]) + curr_es_set = [] + + # check is ES content is correct + for es in bgp_es_json: + esi = es["esi"] + curr_es_set.append(esi) + types = es["type"] + vtep_ips = [] + for vtep in es["vteps"]: + vtep_ips.append(vtep["vtep_ip"]) + + if "local" in types: + result = check_local_es(esi, vtep_ips, dut.name, []) + else: + result = check_remote_es(esi, vtep_ips, dut.name, []) + + if result: + return result + + # check if all ESs are present + curr_es_set = set(curr_es_set) + result = curr_es_set.symmetric_difference(expected_es_set) + + return result if result else None + +def check_one_es(dut, esi, down_vteps): + ''' + Verify list of PEs associated all ESs, local and remote + ''' + bgp_es = dut.vtysh_cmd("show bgp l2vp evpn es %s json" % esi) + es = json.loads(bgp_es) + + if not es: + return "esi %s not found" % esi + + esi = es["esi"] + types = es["type"] + vtep_ips = [] + for vtep in es["vteps"]: + vtep_ips.append(vtep["vtep_ip"]) + + if "local" in types: + result = check_local_es(esi, vtep_ips, dut.name, down_vteps) + else: + result = check_remote_es(esi, vtep_ips, dut.name, down_vteps) + + return result + +def test_evpn_es(): + ''' + Two ES are setup on each rack. This test checks if - + 1. ES peer has been added to the local ES (via Type-1/EAD route) + 2. The remote ESs are setup with the right list of PEs (via Type-1) + ''' + + tgen = get_topogen() + + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + dut_name = "torm11" + dut = tgen.gears[dut_name] + test_fn = partial(check_es, dut) + _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3) + + assertmsg = '"{}" ES content incorrect'.format(dut_name) + assert result is None, assertmsg + # tgen.mininet_cli() + +def test_evpn_ead_update(): + ''' + Flap a host link one the remote rack and check if the EAD updates + are sent/processed for the corresponding ESI + ''' + tgen = get_topogen() + + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + # dut on rack1 and host link flap on rack2 + dut_name = "torm11" + dut = tgen.gears[dut_name] + + remote_tor_name = "torm21" + remote_tor = tgen.gears[remote_tor_name] + + host_name = "hostd21" + host = tgen.gears[host_name] + esi = host_es_map.get(host_name) + + # check if the VTEP list is right to start with + down_vteps = [] + test_fn = partial(check_one_es, dut, esi, down_vteps) + _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3) + assertmsg = '"{}" ES content incorrect'.format(dut_name) + assert result is None, assertmsg + + # down a remote host link and check if the EAD withdraw is rxed + # Note: LACP is not working as expected so I am temporarily shutting + # down the link on the remote TOR instead of the remote host + remote_tor.run("ip link set dev %s-%s down" % (remote_tor_name, "eth2")) + down_vteps.append(tor_ips.get(remote_tor_name)) + _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3) + assertmsg = '"{}" ES incorrect after remote link down'.format(dut_name) + assert result is None, assertmsg + + # bring up remote host link and check if the EAD update is rxed + down_vteps.remove(tor_ips.get(remote_tor_name)) + remote_tor.run("ip link set dev %s-%s up" % (remote_tor_name, "eth2")) + _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3) + assertmsg = '"{}" ES incorrect after remote link flap'.format(dut_name) + assert result is None, assertmsg + + # tgen.mininet_cli() + +def check_mac(dut, vni, mac, m_type, esi, intf): + ''' + checks if mac is present and if desination matches the one provided + ''' + + out = dut.vtysh_cmd("show evpn mac vni %d mac %s json" % (vni, mac)) + + mac_js = json.loads(out) + for mac, info in mac_js.iteritems(): + tmp_esi = info.get("esi", "") + tmp_m_type = info.get("type", "") + tmp_intf = info.get("intf", "") if tmp_m_type == "local" else "" + if tmp_esi == esi and tmp_m_type == m_type and intf == intf: + return None + + return "invalid vni %d mac %s out %s" % (vni, mac, mac_js) + +def test_evpn_mac(): + ''' + 1. Add a MAC on hostd11 and check if the MAC is synced between + torm11 and torm12. And installed as a local MAC. + 2. Add a MAC on hostd21 and check if the MAC is installed as a + remote MAC on torm11 and torm12 + ''' + + tgen = get_topogen() + + local_host = tgen.gears["hostd11"] + remote_host = tgen.gears["hostd21"] + tors = [] + tors.append(tgen.gears["torm11"]) + tors.append(tgen.gears["torm12"]) + + # ping the anycast gw from the local and remote hosts to populate + # the mac address on the PEs + local_host.run("arping -I torbond -c 1 45.0.0.1") + remote_host.run("arping -I torbond -c 1 45.0.0.1") + + vni = 1000 + + # check if the rack-1 host MAC is present on all rack-1 PEs + # and points to local access port + m_type = "local" + _, mac = compute_host_ip_mac(local_host.name) + esi = host_es_map.get(local_host.name) + intf = "hostbond1" + + for tor in tors: + test_fn = partial(check_mac, tor, vni, mac, m_type, esi, intf) + _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3) + assertmsg = '"{}" local MAC content incorrect'.format(tor.name) + assert result is None, assertmsg + + # check if the rack-2 host MAC is present on all rack-1 PEs + # and points to the remote ES destination + m_type = "remote" + _, mac = compute_host_ip_mac(remote_host.name) + esi = host_es_map.get(remote_host.name) + intf = "" + + for tor in tors: + test_fn = partial(check_mac, tor, vni, mac, m_type, esi, intf) + _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3) + assertmsg = '"{}" remote MAC content incorrect'.format(tor.name) + assert result is None, assertmsg + +if __name__ == "__main__": + args = ["-s"] + sys.argv[1:] + sys.exit(pytest.main(args)) diff --git a/tests/topotests/bgp-evpn-mh/torm11/evpn.conf b/tests/topotests/bgp-evpn-mh/torm11/evpn.conf new file mode 100644 index 0000000000..01f4b65704 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm11/evpn.conf @@ -0,0 +1,21 @@ +! +frr defaults datacenter +! +debug bgp evpn mh es +debug bgp evpn mh route +debug bgp zebra +! +! +router bgp 65002 + bgp router-id 192.168.100.15 + no bgp ebgp-requires-policy + neighbor 192.168.1.1 remote-as external + neighbor 192.168.5.1 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.1.1 activate + neighbor 192.168.5.1 activate + advertise-all-vni + advertise-svi-ip + exit-address-family +! diff --git a/tests/topotests/bgp-evpn-mh/torm11/pim.conf b/tests/topotests/bgp-evpn-mh/torm11/pim.conf new file mode 100644 index 0000000000..fbba735873 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm11/pim.conf @@ -0,0 +1,13 @@ +! +ip pim rp 192.168.100.13 239.1.1.0/24 +ip pim spt-switchover infinity-and-beyond +! +interface lo + ip igmp + ip pim +! +interface torm11-eth0 + ip pim +! +interface torm11-eth1 + ip pim diff --git a/tests/topotests/bgp-evpn-mh/torm11/zebra.conf b/tests/topotests/bgp-evpn-mh/torm11/zebra.conf new file mode 100644 index 0000000000..ee4e87e1c2 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm11/zebra.conf @@ -0,0 +1,23 @@ +debug zebra evpn mh es +debug zebra evpn mh mac +debug zebra evpn mh neigh +debug zebra evpn mh nh +debug zebra vxlan +! +int torm11-eth0 + ip addr 192.168.1.2/24 +! +int torm11-eth1 + ip addr 192.168.5.2/24 +! +int lo + ip addr 192.168.100.15/32 +! +interface hostbond1 + evpn mh es-id 1 + evpn mh es-sys-mac 44:38:39:ff:ff:01 +! +interface hostbond2 + evpn mh es-id 2 + evpn mh es-sys-mac 44:38:39:ff:ff:01 +! diff --git a/tests/topotests/bgp-evpn-mh/torm12/evpn.conf b/tests/topotests/bgp-evpn-mh/torm12/evpn.conf new file mode 100644 index 0000000000..2c13024bbc --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm12/evpn.conf @@ -0,0 +1,21 @@ +! +frr defaults datacenter +! +debug bgp evpn mh es +debug bgp evpn mh route +debug bgp zebra +! +! +router bgp 65003 + bgp router-id 192.168.100.16 + no bgp ebgp-requires-policy + neighbor 192.168.2.1 remote-as external + neighbor 192.168.6.1 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.2.1 activate + neighbor 192.168.6.1 activate + advertise-all-vni + advertise-svi-ip + exit-address-family +! diff --git a/tests/topotests/bgp-evpn-mh/torm12/pim.conf b/tests/topotests/bgp-evpn-mh/torm12/pim.conf new file mode 100644 index 0000000000..3dd63b44ca --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm12/pim.conf @@ -0,0 +1,13 @@ +! +ip pim rp 192.168.100.13 239.1.1.0/24 +ip pim spt-switchover infinity-and-beyond +! +interface lo + ip igmp + ip pim +! +interface torm12-eth0 + ip pim +! +interface torm12-eth1 + ip pim diff --git a/tests/topotests/bgp-evpn-mh/torm12/zebra.conf b/tests/topotests/bgp-evpn-mh/torm12/zebra.conf new file mode 100644 index 0000000000..736af4159e --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm12/zebra.conf @@ -0,0 +1,23 @@ +debug zebra evpn mh es +debug zebra evpn mh mac +debug zebra evpn mh neigh +debug zebra evpn mh nh +debug zebra vxlan +! +int torm12-eth0 + ip addr 192.168.2.2/24 +! +int torm12-eth1 + ip addr 192.168.6.2/24 +! +int lo + ip addr 192.168.100.16/32 +! +interface hostbond1 + evpn mh es-id 1 + evpn mh es-sys-mac 44:38:39:ff:ff:01 +! +interface hostbond2 + evpn mh es-id 2 + evpn mh es-sys-mac 44:38:39:ff:ff:01 +! diff --git a/tests/topotests/bgp-evpn-mh/torm21/evpn.conf b/tests/topotests/bgp-evpn-mh/torm21/evpn.conf new file mode 100644 index 0000000000..2a2ba061c6 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm21/evpn.conf @@ -0,0 +1,21 @@ +! +frr defaults datacenter +! +debug bgp evpn mh es +debug bgp evpn mh route +debug bgp zebra +! +! +router bgp 65004 + bgp router-id 192.168.100.17 + no bgp ebgp-requires-policy + neighbor 192.168.3.1 remote-as external + neighbor 192.168.7.1 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.3.1 activate + neighbor 192.168.7.1 activate + advertise-all-vni + advertise-svi-ip + exit-address-family +! diff --git a/tests/topotests/bgp-evpn-mh/torm21/pim.conf b/tests/topotests/bgp-evpn-mh/torm21/pim.conf new file mode 100644 index 0000000000..71aa91a06d --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm21/pim.conf @@ -0,0 +1,13 @@ +! +ip pim rp 192.168.100.13 239.1.1.0/24 +ip pim spt-switchover infinity-and-beyond +! +interface lo + ip igmp + ip pim +! +interface torm21-eth0 + ip pim +! +interface torm21-eth1 + ip pim diff --git a/tests/topotests/bgp-evpn-mh/torm21/zebra.conf b/tests/topotests/bgp-evpn-mh/torm21/zebra.conf new file mode 100644 index 0000000000..0ebe6f2d95 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm21/zebra.conf @@ -0,0 +1,23 @@ +debug zebra evpn mh es +debug zebra evpn mh mac +debug zebra evpn mh neigh +debug zebra evpn mh nh +debug zebra vxlan +! +int torm21-eth0 + ip addr 192.168.3.2/24 +! +int torm21-eth1 + ip addr 192.168.7.2/24 +! +int lo + ip addr 192.168.100.17/32 +! +interface hostbond1 + evpn mh es-id 1 + evpn mh es-sys-mac 44:38:39:ff:ff:02 +! +interface hostbond2 + evpn mh es-id 2 + evpn mh es-sys-mac 44:38:39:ff:ff:02 +! diff --git a/tests/topotests/bgp-evpn-mh/torm22/evpn.conf b/tests/topotests/bgp-evpn-mh/torm22/evpn.conf new file mode 100644 index 0000000000..b4f4f1dc25 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm22/evpn.conf @@ -0,0 +1,21 @@ +! +frr defaults datacenter +! +debug bgp evpn mh es +debug bgp evpn mh route +debug bgp zebra +! +! +router bgp 65005 + bgp router-id 192.168.100.18 + no bgp ebgp-requires-policy + neighbor 192.168.4.1 remote-as external + neighbor 192.168.8.1 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.4.1 activate + neighbor 192.168.8.1 activate + advertise-all-vni + advertise-svi-ip + exit-address-family +! diff --git a/tests/topotests/bgp-evpn-mh/torm22/pim.conf b/tests/topotests/bgp-evpn-mh/torm22/pim.conf new file mode 100644 index 0000000000..46f330f5cd --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm22/pim.conf @@ -0,0 +1,13 @@ +! +ip pim rp 192.168.100.13 239.1.1.0/24 +ip pim spt-switchover infinity-and-beyond +! +interface lo + ip igmp + ip pim +! +interface torm22-eth0 + ip pim +! +interface torm22-eth1 + ip pim diff --git a/tests/topotests/bgp-evpn-mh/torm22/zebra.conf b/tests/topotests/bgp-evpn-mh/torm22/zebra.conf new file mode 100644 index 0000000000..356d8a43e7 --- /dev/null +++ b/tests/topotests/bgp-evpn-mh/torm22/zebra.conf @@ -0,0 +1,23 @@ +debug zebra evpn mh es +debug zebra evpn mh mac +debug zebra evpn mh neigh +debug zebra evpn mh nh +debug zebra vxlan +! +int torm22-eth0 + ip addr 192.168.4.2/24 +! +int torm22-eth1 + ip addr 192.168.8.2/24 +! +int lo + ip addr 192.168.100.18/32 +! +interface hostbond1 + evpn mh es-id 1 + evpn mh es-sys-mac 44:38:39:ff:ff:02 +! +interface hostbond2 + evpn mh es-id 2 + evpn mh es-sys-mac 44:38:39:ff:ff:02 +! diff --git a/tests/topotests/lib/topogen.py b/tests/topotests/lib/topogen.py index efd5b90685..37b9715010 100644 --- a/tests/topotests/lib/topogen.py +++ b/tests/topotests/lib/topogen.py @@ -819,7 +819,9 @@ class TopoRouter(TopoGear): if memleak_file is None: return - self.stop() + self.stop(False, False) + self.stop(wait=True) + self.logger.info("running memory leak report") self.tgen.net[self.name].report_memory_leaks(memleak_file, testname) diff --git a/zebra/debug.c b/zebra/debug.c index c920fca5ff..8c53ab73e4 100644 --- a/zebra/debug.c +++ b/zebra/debug.c @@ -40,6 +40,7 @@ unsigned long zebra_debug_pw; unsigned long zebra_debug_dplane; unsigned long zebra_debug_mlag; unsigned long zebra_debug_nexthop; +unsigned long zebra_debug_evpn_mh; DEFINE_HOOK(zebra_debug_show_debugging, (struct vty *vty), (vty)); @@ -109,6 +110,18 @@ DEFUN_NOSH (show_debugging_zebra, else if (IS_ZEBRA_DEBUG_NHG) vty_out(vty, " Zebra nexthop debugging is on\n"); + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + vty_out(vty, " Zebra EVPN-MH ethernet segment debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + vty_out(vty, " Zebra EVPN-MH nexthop debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + vty_out(vty, " Zebra EVPN-MH MAC debugging is on\n"); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + vty_out(vty, " Zebra EVPN-MH Neigh debugging is on\n"); + hook_call(zebra_debug_show_debugging, vty); return CMD_SUCCESS; } @@ -320,6 +333,53 @@ DEFPY (debug_zebra_mlag, return CMD_SUCCESS; } +DEFPY (debug_zebra_evpn_mh, + debug_zebra_evpn_mh_cmd, + "[no$no] debug zebra evpn mh <es$es|mac$mac|neigh$neigh|nh$nh>", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "EVPN\n" + "Multihoming\n" + "Ethernet Segment Debugging\n" + "MAC Debugging\n" + "Neigh Debugging\n" + "Nexthop Debugging\n") +{ + if (es) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES); + } + + if (mac) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_MAC); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_MAC); + } + + if (neigh) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_NEIGH); + else + SET_FLAG(zebra_debug_evpn_mh, + ZEBRA_DEBUG_EVPN_MH_NEIGH); + } + + if (nh) { + if (no) + UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH); + else + SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH); + } + + return CMD_SUCCESS; +} + DEFUN (no_debug_zebra_events, no_debug_zebra_events_cmd, "no debug zebra events", @@ -553,6 +613,22 @@ static int config_write_debug(struct vty *vty) vty_out(vty, "debug zebra mlag\n"); write++; } + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) { + vty_out(vty, "debug zebra evpn mh es\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) { + vty_out(vty, "debug zebra evpn mh nh\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + vty_out(vty, "debug zebra evpn mh mac\n"); + write++; + } + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) { + vty_out(vty, "debug zebra evpn mh neigh\n"); + write++; + } if (IS_ZEBRA_DEBUG_PW) { vty_out(vty, "debug zebra pseudowires\n"); write++; @@ -589,6 +665,7 @@ void zebra_debug_init(void) zebra_debug_pw = 0; zebra_debug_dplane = 0; zebra_debug_mlag = 0; + zebra_debug_evpn_mh = 0; zebra_debug_nht = 0; zebra_debug_nexthop = 0; @@ -619,6 +696,7 @@ void zebra_debug_init(void) install_element(ENABLE_NODE, &no_debug_zebra_rib_cmd); install_element(ENABLE_NODE, &no_debug_zebra_fpm_cmd); install_element(ENABLE_NODE, &no_debug_zebra_dplane_cmd); + install_element(ENABLE_NODE, &debug_zebra_evpn_mh_cmd); install_element(CONFIG_NODE, &debug_zebra_events_cmd); install_element(CONFIG_NODE, &debug_zebra_nht_cmd); @@ -643,4 +721,5 @@ void zebra_debug_init(void) install_element(CONFIG_NODE, &no_debug_zebra_fpm_cmd); install_element(CONFIG_NODE, &no_debug_zebra_dplane_cmd); install_element(CONFIG_NODE, &debug_zebra_mlag_cmd); + install_element(CONFIG_NODE, &debug_zebra_evpn_mh_cmd); } diff --git a/zebra/debug.h b/zebra/debug.h index e513f8865d..8402224f19 100644 --- a/zebra/debug.h +++ b/zebra/debug.h @@ -62,6 +62,11 @@ extern "C" { #define ZEBRA_DEBUG_NHG 0x01 #define ZEBRA_DEBUG_NHG_DETAILED 0x02 +#define ZEBRA_DEBUG_EVPN_MH_ES 0x01 +#define ZEBRA_DEBUG_EVPN_MH_NH 0x02 +#define ZEBRA_DEBUG_EVPN_MH_MAC 0x04 +#define ZEBRA_DEBUG_EVPN_MH_NEIGH 0x08 + /* Debug related macro. */ #define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT) @@ -100,6 +105,15 @@ extern "C" { #define IS_ZEBRA_DEBUG_NHG_DETAIL \ (zebra_debug_nexthop & ZEBRA_DEBUG_NHG_DETAILED) +#define IS_ZEBRA_DEBUG_EVPN_MH_ES \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_ES) +#define IS_ZEBRA_DEBUG_EVPN_MH_NH \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NH) +#define IS_ZEBRA_DEBUG_EVPN_MH_MAC \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_MAC) +#define IS_ZEBRA_DEBUG_EVPN_MH_NEIGH \ + (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NEIGH) + extern unsigned long zebra_debug_event; extern unsigned long zebra_debug_packet; extern unsigned long zebra_debug_kernel; @@ -112,6 +126,7 @@ extern unsigned long zebra_debug_pw; extern unsigned long zebra_debug_dplane; extern unsigned long zebra_debug_mlag; extern unsigned long zebra_debug_nexthop; +extern unsigned long zebra_debug_evpn_mh; extern void zebra_debug_init(void); diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index c81d451693..4165fa1b3a 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -1015,7 +1015,8 @@ static void fpm_enqueue_rmac_table(struct hash_bucket *backet, void *arg) dplane_ctx_set_op(fra->ctx, DPLANE_OP_MAC_INSTALL); dplane_mac_init(fra->ctx, fra->zl3vni->vxlan_if, zif->brslave_info.br_if, vid, - &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, sticky); + &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, sticky, + 0 /*nhg*/, 0 /*update_flags*/); if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) { thread_add_timer(zrouter.master, fpm_rmac_send, fra->fnc, 1, &fra->fnc->t_rmacwalk); diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c index a15f932451..81f77d4f9b 100644 --- a/zebra/if_netlink.c +++ b/zebra/if_netlink.c @@ -70,6 +70,7 @@ #include "zebra/if_netlink.h" #include "zebra/zebra_errors.h" #include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" extern struct zebra_privs_t zserv_privs; @@ -245,6 +246,26 @@ static enum zebra_link_type netlink_to_zebra_link_type(unsigned int hwt) } } +static inline void zebra_if_set_ziftype(struct interface *ifp, + zebra_iftype_t zif_type, + zebra_slave_iftype_t zif_slave_type) +{ + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + zif->zif_slave_type = zif_slave_type; + + if (zif->zif_type != zif_type) { + zif->zif_type = zif_type; + /* If the if_type has been set to bond initialize ES info + * against it. XXX - note that we don't handle the case where + * a zif changes from bond to non-bond; it is really + * an unexpected/error condition. + */ + zebra_evpn_if_init(zif); + } +} + static void netlink_determine_zebra_iftype(const char *kind, zebra_iftype_t *zif_type) { @@ -557,6 +578,74 @@ static void netlink_interface_update_l2info(struct interface *ifp, } } +static int netlink_bridge_vxlan_update(struct interface *ifp, + struct rtattr *af_spec) +{ + struct rtattr *aftb[IFLA_BRIDGE_MAX + 1]; + struct bridge_vlan_info *vinfo; + vlanid_t access_vlan; + + /* There is a 1-to-1 mapping of VLAN to VxLAN - hence + * only 1 access VLAN is accepted. + */ + memset(aftb, 0, sizeof(aftb)); + parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, af_spec); + if (!aftb[IFLA_BRIDGE_VLAN_INFO]) + return 0; + + vinfo = RTA_DATA(aftb[IFLA_BRIDGE_VLAN_INFO]); + if (!(vinfo->flags & BRIDGE_VLAN_INFO_PVID)) + return 0; + + access_vlan = (vlanid_t)vinfo->vid; + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Access VLAN %u for VxLAN IF %s(%u)", access_vlan, + ifp->name, ifp->ifindex); + zebra_l2_vxlanif_update_access_vlan(ifp, access_vlan); + return 0; +} + +static void netlink_bridge_vlan_update(struct interface *ifp, + struct rtattr *af_spec) +{ + struct rtattr *i; + int rem; + uint16_t vid_range_start = 0; + struct zebra_if *zif; + bitfield_t old_vlan_bitmap; + struct bridge_vlan_info *vinfo; + + zif = (struct zebra_if *)ifp->info; + + /* cache the old bitmap addrs */ + old_vlan_bitmap = zif->vlan_bitmap; + /* create a new bitmap space for re-eval */ + bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX); + + for (i = RTA_DATA(af_spec), rem = RTA_PAYLOAD(af_spec); + RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { + + if (i->rta_type != IFLA_BRIDGE_VLAN_INFO) + continue; + + vinfo = RTA_DATA(i); + + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + vid_range_start = vinfo->vid; + continue; + } + + if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) + vid_range_start = vinfo->vid; + + zebra_vlan_bitmap_compute(ifp, vid_range_start, vinfo->vid); + } + + zebra_vlan_mbr_re_eval(ifp, old_vlan_bitmap); + + bf_free(old_vlan_bitmap); +} + static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id, int startup) { @@ -564,12 +653,8 @@ static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id, struct ifinfomsg *ifi; struct rtattr *tb[IFLA_MAX + 1]; struct interface *ifp; - struct rtattr *aftb[IFLA_BRIDGE_MAX + 1]; - struct { - uint16_t flags; - uint16_t vid; - } * vinfo; - vlanid_t access_vlan; + struct zebra_if *zif; + struct rtattr *af_spec; /* Fetch name and ifindex */ ifi = NLMSG_DATA(h); @@ -587,30 +672,22 @@ static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id, ifi->ifi_index); return 0; } - if (!IS_ZEBRA_IF_VXLAN(ifp)) - return 0; /* We are only interested in the access VLAN i.e., AF_SPEC */ - if (!tb[IFLA_AF_SPEC]) - return 0; + af_spec = tb[IFLA_AF_SPEC]; + if (!af_spec) + return 0; - /* There is a 1-to-1 mapping of VLAN to VxLAN - hence - * only 1 access VLAN is accepted. - */ - memset(aftb, 0, sizeof(aftb)); - parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, tb[IFLA_AF_SPEC]); - if (!aftb[IFLA_BRIDGE_VLAN_INFO]) - return 0; + if (IS_ZEBRA_IF_VXLAN(ifp)) + return netlink_bridge_vxlan_update(ifp, af_spec); - vinfo = RTA_DATA(aftb[IFLA_BRIDGE_VLAN_INFO]); - if (!(vinfo->flags & BRIDGE_VLAN_INFO_PVID)) - return 0; + /* build vlan bitmap associated with this interface if that + * device type is interested in the vlans + */ + zif = (struct zebra_if *)ifp->info; + if (bf_is_inited(zif->vlan_bitmap)) + netlink_bridge_vlan_update(ifp, af_spec); - access_vlan = (vlanid_t)vinfo->vid; - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug("Access VLAN %u for VxLAN IF %s(%u)", access_vlan, - name, ifi->ifi_index); - zebra_l2_vxlanif_update_access_vlan(ifp, access_vlan); return 0; } @@ -721,10 +798,8 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup) if (tb[IFLA_LINK]) link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]); - if (tb[IFLA_LINK_NETNSID]) { + if (tb[IFLA_LINK_NETNSID]) link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]); - link_nsid = ns_id_get_absolute(ns_id, link_nsid); - } /* Add interface. * We add by index first because in some cases such as the master @@ -732,9 +807,11 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup) * back references on the slave interfaces is painful if not done * this way, i.e. by creating by ifindex. */ - ifp = if_get_by_ifindex(ifi->ifi_index, vrf_id, name); + ifp = if_get_by_ifindex(ifi->ifi_index, vrf_id); set_ifindex(ifp, ifi->ifi_index, zns); /* add it to ns struct */ + if_set_name(ifp, name); + ifp->flags = ifi->ifi_flags & 0x0000fffff; ifp->mtu6 = ifp->mtu = *(uint32_t *)RTA_DATA(tb[IFLA_MTU]); ifp->metric = 0; @@ -771,7 +848,7 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup) netlink_interface_update_l2info(ifp, linkinfo[IFLA_INFO_DATA], 1, link_nsid); if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) - zebra_l2if_update_bridge_slave(ifp, bridge_ifindex, ns_id); + zebra_l2if_update_bridge_slave(ifp, bridge_ifindex); else if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) zebra_l2if_update_bond_slave(ifp, bond_ifindex); @@ -1263,10 +1340,9 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) if (tb[IFLA_LINK]) link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]); - if (tb[IFLA_LINK_NETNSID]) { + if (tb[IFLA_LINK_NETNSID]) link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]); - link_nsid = ns_id_get_absolute(ns_id, link_nsid); - } + if (tb[IFLA_IFALIAS]) { desc = (char *)RTA_DATA(tb[IFLA_IFALIAS]); } @@ -1354,8 +1430,7 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) 1, link_nsid); if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) zebra_l2if_update_bridge_slave(ifp, - bridge_ifindex, - ns_id); + bridge_ifindex); else if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) zebra_l2if_update_bond_slave(ifp, bond_ifindex); } else if (ifp->vrf_id != vrf_id) { @@ -1456,8 +1531,7 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) 0, link_nsid); if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) || was_bridge_slave) zebra_l2if_update_bridge_slave(ifp, - bridge_ifindex, - ns_id); + bridge_ifindex); else if (IS_ZEBRA_IF_BOND_SLAVE(ifp) || was_bond_slave) zebra_l2if_update_bond_slave(ifp, bond_ifindex); } diff --git a/zebra/interface.c b/zebra/interface.c index 53ae1d2089..b824e313ec 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -51,6 +51,7 @@ #include "zebra/interface.h" #include "zebra/zebra_vxlan.h" #include "zebra/zebra_errors.h" +#include "zebra/zebra_evpn_mh.h" DEFINE_MTYPE_STATIC(ZEBRA, ZINFO, "Zebra Interface Information") @@ -127,6 +128,7 @@ static int if_zebra_new_hook(struct interface *ifp) struct zebra_if *zebra_if; zebra_if = XCALLOC(MTYPE_ZINFO, sizeof(struct zebra_if)); + zebra_if->ifp = ifp; zebra_if->multicast = IF_ZEBRA_MULTICAST_UNSPEC; zebra_if->shutdown = IF_ZEBRA_SHUTDOWN_OFF; @@ -238,6 +240,8 @@ static int if_zebra_delete_hook(struct interface *ifp) list_delete(&rtadv->AdvDNSSLList); #endif /* HAVE_RTADV */ + zebra_evpn_if_cleanup(zebra_if); + if_nhg_dependents_release(ifp); zebra_if_nhg_dependents_free(zebra_if); @@ -831,6 +835,7 @@ void if_delete_update(struct interface *ifp) memset(&zif->l2info, 0, sizeof(union zebra_l2if_info)); memset(&zif->brslave_info, 0, sizeof(struct zebra_l2info_brslave)); + zebra_evpn_if_cleanup(zif); } if (!ifp->configured) { @@ -1072,6 +1077,8 @@ void if_up(struct interface *ifp) } else if (IS_ZEBRA_IF_MACVLAN(ifp)) zebra_vxlan_macvlan_up(ifp); + if (zif->es_info.es) + zebra_evpn_es_if_oper_state_change(zif, true /*up*/); } /* Interface goes down. We have to manage different behavior of based @@ -1106,6 +1113,8 @@ void if_down(struct interface *ifp) } else if (IS_ZEBRA_IF_MACVLAN(ifp)) zebra_vxlan_macvlan_down(ifp); + if (zif->es_info.es) + zebra_evpn_es_if_oper_state_change(zif, false /*up*/); /* Notify to the protocol daemons. */ zebra_interface_down_update(ifp); @@ -1233,23 +1242,6 @@ static void nbr_connected_dump_vty(struct vty *vty, vty_out(vty, "\n"); } -static const char *zebra_zifslavetype_2str(zebra_slave_iftype_t zif_slave_type) -{ - switch (zif_slave_type) { - case ZEBRA_IF_SLAVE_BRIDGE: - return "Bridge"; - case ZEBRA_IF_SLAVE_VRF: - return "Vrf"; - case ZEBRA_IF_SLAVE_BOND: - return "Bond"; - case ZEBRA_IF_SLAVE_OTHER: - return "Other"; - case ZEBRA_IF_SLAVE_NONE: - return "None"; - } - return "None"; -} - static const char *zebra_ziftype_2str(zebra_iftype_t zif_type) { switch (zif_type) { @@ -1477,9 +1469,6 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp) vty_out(vty, " Interface Type %s\n", zebra_ziftype_2str(zebra_if->zif_type)); - vty_out(vty, " Interface Slave Type %s\n", - zebra_zifslavetype_2str(zebra_if->zif_slave_type)); - if (IS_ZEBRA_IF_BRIDGE(ifp)) { struct zebra_l2info_bridge *bridge_info; @@ -1547,6 +1536,8 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp) } } + zebra_evpn_if_es_print(vty, zebra_if); + if (zebra_if->link_ifindex != IFINDEX_INTERNAL) { if (zebra_if->link) vty_out(vty, " Parent interface: %s\n", zebra_if->link->name); @@ -3588,7 +3579,7 @@ static int if_config_write(struct vty *vty) } hook_call(zebra_if_config_wr, vty, ifp); - + zebra_evpn_mh_if_write(vty, ifp); link_params_config_write(vty, ifp); vty_endframe(vty, "!\n"); @@ -3664,4 +3655,7 @@ void zebra_if_init(void) install_element(LINK_PARAMS_NODE, &link_params_use_bw_cmd); install_element(LINK_PARAMS_NODE, &no_link_params_use_bw_cmd); install_element(LINK_PARAMS_NODE, &exit_link_params_cmd); + + /* setup EVPN MH elements */ + zebra_evpn_interface_init(); } diff --git a/zebra/interface.h b/zebra/interface.h index 2dad0c3bb2..1a8e3caed5 100644 --- a/zebra/interface.h +++ b/zebra/interface.h @@ -25,6 +25,7 @@ #include "redistribute.h" #include "vrf.h" #include "hook.h" +#include "bitfield.h" #include "zebra/zebra_l2.h" #include "zebra/zebra_nhg_private.h" @@ -42,6 +43,8 @@ extern "C" { #define IF_ZEBRA_SHUTDOWN_OFF 0 #define IF_ZEBRA_SHUTDOWN_ON 1 +#define IF_VLAN_BITMAP_MAX 4096 + #if defined(HAVE_RTADV) /* Router advertisement parameter. From RFC4861, RFC6275 and RFC4191. */ struct rtadvconf { @@ -272,8 +275,19 @@ typedef enum { struct irdp_interface; +/* Ethernet segment info used for setting up EVPN multihoming */ +struct zebra_evpn_es; +struct zebra_es_if_info { + struct ethaddr sysmac; + uint32_t lid; /* local-id; has to be unique per-ES-sysmac */ + struct zebra_evpn_es *es; /* local ES */ +}; + /* `zebra' daemon local interface structure. */ struct zebra_if { + /* back pointer to the interface */ + struct interface *ifp; + /* Shutdown configuration. */ uint8_t shutdown; @@ -347,6 +361,12 @@ struct zebra_if { struct zebra_l2info_bondslave bondslave_info; + /* ethernet segment */ + struct zebra_es_if_info es_info; + + /* bitmap of vlans associated with this interface */ + bitfield_t vlan_bitmap; + /* Link fields - for sub-interfaces. */ ifindex_t link_ifindex; struct interface *link; @@ -370,17 +390,6 @@ DECLARE_HOOK(zebra_if_extra_info, (struct vty * vty, struct interface *ifp), DECLARE_HOOK(zebra_if_config_wr, (struct vty * vty, struct interface *ifp), (vty, ifp)) -static inline void zebra_if_set_ziftype(struct interface *ifp, - zebra_iftype_t zif_type, - zebra_slave_iftype_t zif_slave_type) -{ - struct zebra_if *zif; - - zif = (struct zebra_if *)ifp->info; - zif->zif_type = zif_type; - zif->zif_slave_type = zif_slave_type; -} - #define IS_ZEBRA_IF_VRF(ifp) \ (((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VRF) diff --git a/zebra/main.c b/zebra/main.c index 9c5a1ef9b6..92e94c2a2a 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -186,7 +186,7 @@ static void sigint(void) vrf_terminate(); rtadv_terminate(); - ns_walk_func(zebra_ns_early_shutdown, NULL, NULL); + ns_walk_func(zebra_ns_early_shutdown); zebra_ns_notify_close(); access_list_reset(); @@ -217,7 +217,7 @@ int zebra_finalize(struct thread *dummy) zlog_info("Zebra final shutdown"); /* Final shutdown of ns resources */ - ns_walk_func(zebra_ns_final_shutdown, NULL, NULL); + ns_walk_func(zebra_ns_final_shutdown); /* Stop dplane thread and finish any cleanup */ zebra_dplane_shutdown(); diff --git a/zebra/rt.h b/zebra/rt.h index 4b9a3f83fe..143e16b3ea 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -91,6 +91,11 @@ extern void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *ifp); extern void neigh_read_specific_ip(struct ipaddr *ip, struct interface *vlan_if); extern void route_read(struct zebra_ns *zns); +extern int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip); +extern int kernel_del_mac_nh(uint32_t nh_id); +extern int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids); +extern int kernel_del_mac_nhg(uint32_t nhg_id); #ifdef __cplusplus } diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 8d38b6defe..4daef42d7a 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -68,11 +68,27 @@ #include "zebra/zebra_mroute.h" #include "zebra/zebra_vxlan.h" #include "zebra/zebra_errors.h" +#include "zebra/zebra_evpn_mh.h" #ifndef AF_MPLS #define AF_MPLS 28 #endif +/* Re-defining as I am unable to include <linux/if_bridge.h> which has the + * UAPI for MAC sync. */ +#ifndef _UAPI_LINUX_IF_BRIDGE_H +/* FDB notification bits for NDA_NOTIFY: + * - BR_FDB_NFY_STATIC - notify on activity/expire even for a static entry + * - BR_FDB_NFY_INACTIVE - mark as inactive to avoid double notification, + * used with BR_FDB_NFY_STATIC (kernel controlled) + */ +enum { + BR_FDB_NFY_STATIC, + BR_FDB_NFY_INACTIVE, + BR_FDB_NFY_MAX +}; +#endif + static vlanid_t filter_vlan = 0; /* We capture whether the current kernel supports nexthop ids; by @@ -2521,6 +2537,15 @@ int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) /* We use the ID key'd nhg table for kernel updates */ id = *((uint32_t *)RTA_DATA(tb[NHA_ID])); + if (zebra_evpn_mh_is_fdb_nh(id)) { + /* If this is a L2 NH just ignore it */ + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x", + h->nlmsg_type, id); + } + return 0; + } + family = nhm->nh_family; afi = family2afi(family); @@ -2676,7 +2701,9 @@ int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, static ssize_t netlink_neigh_update_msg_encode( const struct zebra_dplane_ctx *ctx, int cmd, const struct ethaddr *mac, const struct ipaddr *ip, bool replace_obj, uint8_t family, uint8_t type, - uint8_t flags, uint16_t state, void *data, size_t datalen) + uint8_t flags, uint16_t state, uint32_t nhg_id, + bool nfy, uint8_t nfy_flags, + void *data, size_t datalen) { uint8_t protocol = RTPROT_ZEBRA; struct { @@ -2715,6 +2742,16 @@ static ssize_t netlink_neigh_update_msg_encode( return 0; } + if (nhg_id) { + if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id)) + return 0; + } + if (nfy) { + if (!nl_attr_put(&req->n, datalen, NDA_NOTIFY, + &nfy_flags, sizeof(nfy_flags))) + return 0; + } + ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN; if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr, ipa_len)) return 0; @@ -2747,8 +2784,9 @@ static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, if (netlink_neigh_update_msg_encode( ctx, cmd, &dst_mac, dplane_ctx_neigh_get_ipaddr(ctx), false, - PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT), nl_pkt, - sizeof(nl_pkt)) + PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT), + 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, + nl_pkt, sizeof(nl_pkt)) <= 0) return -1; @@ -2777,6 +2815,9 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id) char vid_buf[20]; char dst_buf[30]; bool sticky; + bool local_inactive = false; + bool dp_static = false; + uint32_t nhg_id = 0; ndm = NLMSG_DATA(h); @@ -2824,13 +2865,29 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id) inet_ntoa(vtep_ip)); } + if (tb[NDA_NH_ID]) + nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]); + + if (ndm->ndm_state & NUD_STALE) + local_inactive = true; + + if (tb[NDA_NOTIFY]) { + uint8_t nfy_flags; + + dp_static = true; + nfy_flags = *(uint8_t *)RTA_DATA(tb[NDA_NOTIFY]); + /* local activity has not been detected on the entry */ + if (nfy_flags & (1 << BR_FDB_NFY_INACTIVE)) + local_inactive = true; + } + if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %s%s", + zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %s%s nhg %d", nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex, vid_present ? vid_buf : "", ndm->ndm_state, ndm->ndm_flags, prefix_mac2str(&mac, buf, sizeof(buf)), - dst_present ? dst_buf : ""); + dst_present ? dst_buf : "", nhg_id); /* The interface should exist. */ ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), @@ -2853,7 +2910,7 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id) return 0; } - sticky = !!(ndm->ndm_state & NUD_NOARP); + sticky = !!(ndm->ndm_flags & NTF_STICKY); if (filter_vlan && vid != filter_vlan) { if (IS_ZEBRA_DEBUG_KERNEL) @@ -2881,7 +2938,7 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id) vid); return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid, - sticky); + sticky, local_inactive, dp_static); } /* This is a delete notification. @@ -2894,6 +2951,9 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id) * Note: We will get notifications from both bridge driver and VxLAN * driver. */ + if (nhg_id) + return 0; + if (dst_present) { u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; @@ -3091,18 +3151,43 @@ netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data, int cmd; uint8_t flags; uint16_t state; + uint32_t nhg_id; + uint32_t update_flags; + bool nfy = false; + uint8_t nfy_flags = 0; cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL ? RTM_NEWNEIGH : RTM_DELNEIGH; - flags = (NTF_SELF | NTF_MASTER); + flags = NTF_MASTER; state = NUD_REACHABLE; - if (dplane_ctx_mac_is_sticky(ctx)) - state |= NUD_NOARP; - else - flags |= NTF_EXT_LEARNED; + update_flags = dplane_ctx_mac_get_update_flags(ctx); + if (update_flags & DPLANE_MAC_REMOTE) { + flags |= NTF_SELF; + if (dplane_ctx_mac_is_sticky(ctx)) + flags |= NTF_STICKY; + else + flags |= NTF_EXT_LEARNED; + /* if it was static-local previously we need to clear the + * notify flags on replace with remote + */ + if (update_flags & DPLANE_MAC_WAS_STATIC) + nfy = true; + } else { + /* local mac */ + if (update_flags & DPLANE_MAC_SET_STATIC) { + nfy_flags |= (1 << BR_FDB_NFY_STATIC); + state |= NUD_NOARP; + } + + if (update_flags & DPLANE_MAC_SET_INACTIVE) + nfy_flags |= (1 << BR_FDB_NFY_INACTIVE); + + nfy = true; + } + nhg_id = dplane_ctx_mac_get_nhg_id(ctx); vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx)); SET_IPADDR_V4(&vtep_ip); @@ -3110,6 +3195,7 @@ netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data, char ipbuf[PREFIX_STRLEN]; char buf[ETHER_ADDR_STRLEN]; char vid_buf[20]; + const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx); vid = dplane_ctx_mac_get_vlan(ctx); if (vid > 0) @@ -3117,20 +3203,30 @@ netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data, else vid_buf[0] = '\0'; - const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx); - - zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s dst %s", + zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s dst %s nhg %u%s%s%s%s%s", nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE), dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx), vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "", prefix_mac2str(mac, buf, sizeof(buf)), - ipaddr2str(&vtep_ip, ipbuf, sizeof(ipbuf))); + ipaddr2str(&vtep_ip, ipbuf, sizeof(ipbuf)), + nhg_id, + (update_flags & + DPLANE_MAC_REMOTE) ? " rem" : "", + (update_flags & + DPLANE_MAC_WAS_STATIC) ? " clr_sync" : "", + (update_flags & + DPLANE_MAC_SET_STATIC) ? " static" : "", + (update_flags & + DPLANE_MAC_SET_INACTIVE) ? " inactive" : "", + (nfy & + DPLANE_MAC_SET_INACTIVE) ? " nfy" : ""); } total = netlink_neigh_update_msg_encode( ctx, cmd, dplane_ctx_mac_get_addr(ctx), &vtep_ip, true, - AF_BRIDGE, 0, flags, state, data, datalen); + AF_BRIDGE, 0, flags, state, nhg_id, nfy, nfy_flags, + data, datalen); return total; } @@ -3164,6 +3260,8 @@ static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif, #define NUD_VALID \ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \ | NUD_DELAY) +#define NUD_LOCAL_ACTIVE \ + (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE) static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) { @@ -3180,6 +3278,7 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) int mac_present = 0; bool is_ext; bool is_router; + bool local_inactive; ndm = NLMSG_DATA(h); @@ -3289,10 +3388,17 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) * result * in re-adding the neighbor if it is a valid "remote" neighbor. */ - if (ndm->ndm_state & NUD_VALID) + if (ndm->ndm_state & NUD_VALID) { + local_inactive = !(ndm->ndm_state & NUD_LOCAL_ACTIVE); + + /* XXX - populate dp-static based on the sync flags + * in the kernel + */ return zebra_vxlan_handle_kernel_neigh_update( ifp, link_if, &ip, &mac, ndm->ndm_state, - is_ext, is_router); + is_ext, is_router, local_inactive, + false /* dp_static */); + } return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip); } @@ -3547,8 +3653,9 @@ static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx, } if (netlink_neigh_update_msg_encode(ctx, cmd, mac, ip, true, family, - RTN_UNICAST, flags, state, nl_pkt, - sizeof(nl_pkt)) + RTN_UNICAST, flags, state, + 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, + nl_pkt, sizeof(nl_pkt)) <= 0) return -1; @@ -3757,4 +3864,172 @@ ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, return NLMSG_ALIGN(req->n.nlmsg_len); } + +/**************************************************************************** +* This code was developed in a branch that didn't have dplane APIs for +* MAC updates. Hence the use of the legacy style. It will be moved to +* the new dplane style pre-merge to master. XXX +*/ +static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_NEWNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + + zvrf = zebra_vrf_get_evpn(); + if (!zvrf) + return -1; + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_INET; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY, + &vtep_ip, IPV4_MAX_BYTELEN)) + return -1; + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Tx %s fdb-nh 0x%x %s", + nl_msg_type_to_str(cmd), nh_id, inet_ntoa(vtep_ip)); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + 0); +} + +static int netlink_fdb_nh_del(uint32_t nh_id) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_DELNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + + zvrf = zebra_vrf_get_evpn(); + if (!zvrf) + return -1; + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_UNSPEC; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id)) + return -1; + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + zlog_debug("Tx %s fdb-nh 0x%x", + nl_msg_type_to_str(cmd), nh_id); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + 0); +} + +static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[256]; + } req; + int cmd = RTM_NEWNEXTHOP; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + struct nexthop_grp grp[nh_cnt]; + uint32_t i; + + zvrf = zebra_vrf_get_evpn(); + if (!zvrf) + return -1; + zns = zvrf->zns; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + req.n.nlmsg_type = cmd; + req.nhm.nh_family = AF_UNSPEC; + + if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id)) + return -1; + if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0)) + return -1; + memset(&grp, 0, sizeof(grp)); + for (i = 0; i < nh_cnt; ++i) { + grp[i].id = nh_ids[i].id; + grp[i].weight = nh_ids[i].weight; + } + if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP, + grp, nh_cnt * sizeof(struct nexthop_grp))) + return -1; + + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) { + char vtep_str[ES_VTEP_LIST_STR_SZ]; + char nh_buf[16]; + + vtep_str[0] = '\0'; + for (i = 0; i < nh_cnt; ++i) { + snprintf(nh_buf, sizeof(nh_buf), "%u ", + grp[i].id); + strlcat(vtep_str, nh_buf, sizeof(vtep_str)); + } + + zlog_debug("Tx %s fdb-nhg 0x%x %s", + nl_msg_type_to_str(cmd), nhg_id, vtep_str); + } + + return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, + 0); +} + +static int netlink_fdb_nhg_del(uint32_t nhg_id) +{ + return netlink_fdb_nh_del(nhg_id); +} + +int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip) +{ + return netlink_fdb_nh_update(nh_id, vtep_ip); +} + +int kernel_del_mac_nh(uint32_t nh_id) +{ + return netlink_fdb_nh_del(nh_id); +} + +int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids); +} + +int kernel_del_mac_nhg(uint32_t nhg_id) +{ + return netlink_fdb_nhg_del(nhg_id); +} + #endif /* HAVE_NETLINK */ diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c index 2eadaf48f4..0271dc7f41 100644 --- a/zebra/rt_socket.c +++ b/zebra/rt_socket.c @@ -417,4 +417,25 @@ uint32_t kernel_get_speed(struct interface *ifp, int *error) return ifp->speed; } +int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip) +{ + return 0; +} + +int kernel_del_mac_nh(uint32_t nh_id) +{ + return 0; +} + +int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt, + struct nh_grp *nh_ids) +{ + return 0; +} + +int kernel_del_mac_nhg(uint32_t nhg_id) +{ + return 0; +} + #endif /* !HAVE_NETLINK */ diff --git a/zebra/subdir.am b/zebra/subdir.am index 49e60820bc..c552ca513e 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -12,6 +12,7 @@ vtysh_scan += \ zebra/rtadv.c \ zebra/zebra_gr.c \ zebra/zebra_mlag_vty.c \ + zebra/zebra_evpn_mh.c \ zebra/zebra_mpls_vty.c \ zebra/zebra_ptm.c \ zebra/zebra_pw.c \ @@ -108,6 +109,7 @@ zebra_zebra_SOURCES = \ zebra/zebra_vrf.c \ zebra/zebra_vty.c \ zebra/zebra_vxlan.c \ + zebra/zebra_evpn_mh.c \ zebra/zserv.c \ # end @@ -115,6 +117,7 @@ clippy_scan += \ zebra/debug.c \ zebra/interface.c \ zebra/rtadv.c \ + zebra/zebra_evpn_mh.c \ zebra/zebra_mlag_vty.c \ zebra/zebra_routemap.c \ zebra/zebra_vty.c \ @@ -167,6 +170,7 @@ noinst_HEADERS += \ zebra/zebra_vrf.h \ zebra/zebra_vxlan.h \ zebra/zebra_vxlan_private.h \ + zebra/zebra_evpn_mh.h \ zebra/zserv.h \ # end diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 2ca3e82fac..0a459b4d0a 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -51,6 +51,7 @@ #include "zebra/zebra_mpls.h" #include "zebra/zebra_mroute.h" #include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" #include "zebra/rt.h" #include "zebra/zebra_pbr.h" #include "zebra/table_manager.h" @@ -2892,6 +2893,8 @@ void (*const zserv_handlers[])(ZAPI_HANDLER_ARGS) = { [ZEBRA_ADVERTISE_SVI_MACIP] = zebra_vxlan_advertise_svi_macip, [ZEBRA_ADVERTISE_SUBNET] = zebra_vxlan_advertise_subnet, [ZEBRA_ADVERTISE_ALL_VNI] = zebra_vxlan_advertise_all_vni, + [ZEBRA_REMOTE_ES_VTEP_ADD] = zebra_evpn_proc_remote_es, + [ZEBRA_REMOTE_ES_VTEP_DEL] = zebra_evpn_proc_remote_es, [ZEBRA_REMOTE_VTEP_ADD] = zebra_vxlan_remote_vtep_add, [ZEBRA_REMOTE_VTEP_DEL] = zebra_vxlan_remote_vtep_del, [ZEBRA_REMOTE_MACIP_ADD] = zebra_vxlan_remote_macip_add, diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 53956e3aec..5dcf76db15 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -180,6 +180,8 @@ struct dplane_mac_info { struct ethaddr mac; struct in_addr vtep_ip; bool is_sticky; + uint32_t nhg_id; + uint32_t update_flags; }; /* @@ -190,6 +192,7 @@ struct dplane_neigh_info { struct ethaddr mac; uint32_t flags; uint16_t state; + uint32_t update_flags; }; /* @@ -441,13 +444,14 @@ static enum zebra_dplane_result mac_update_common( enum dplane_op_e op, const struct interface *ifp, const struct interface *br_ifp, vlanid_t vid, const struct ethaddr *mac, - struct in_addr vtep_ip, bool sticky); + struct in_addr vtep_ip, bool sticky, uint32_t nhg_id, + uint32_t update_flags); static enum zebra_dplane_result neigh_update_internal( enum dplane_op_e op, const struct interface *ifp, const struct ethaddr *mac, const struct ipaddr *ip, - uint32_t flags, uint16_t state); + uint32_t flags, uint16_t state, uint32_t update_flags); /* * Public APIs @@ -1552,6 +1556,18 @@ bool dplane_ctx_mac_is_sticky(const struct zebra_dplane_ctx *ctx) return ctx->u.macinfo.is_sticky; } +uint32_t dplane_ctx_mac_get_nhg_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.macinfo.nhg_id; +} + +uint32_t dplane_ctx_mac_get_update_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.macinfo.update_flags; +} + const struct ethaddr *dplane_ctx_mac_get_addr( const struct zebra_dplane_ctx *ctx) { @@ -1599,6 +1615,12 @@ uint16_t dplane_ctx_neigh_get_state(const struct zebra_dplane_ctx *ctx) return ctx->u.neigh.state; } +uint32_t dplane_ctx_neigh_get_update_flags(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.neigh.update_flags; +} + /* Accessors for PBR rule information */ int dplane_ctx_rule_get_sock(const struct zebra_dplane_ctx *ctx) { @@ -2542,8 +2564,8 @@ dplane_route_notif_update(struct route_node *rn, done: if (ret == AOK) result = ZEBRA_DPLANE_REQUEST_QUEUED; - else if (ctx) - dplane_ctx_free(&ctx); + else if (new_ctx) + dplane_ctx_free(&new_ctx); return result; } @@ -2895,35 +2917,75 @@ static enum zebra_dplane_result intf_addr_update_internal( /* * Enqueue vxlan/evpn mac add (or update). */ -enum zebra_dplane_result dplane_mac_add(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_mac_add(const struct interface *ifp, const struct interface *bridge_ifp, vlanid_t vid, const struct ethaddr *mac, struct in_addr vtep_ip, - bool sticky) + bool sticky, + uint32_t nhg_id, + bool was_static) { enum zebra_dplane_result result; + uint32_t update_flags = 0; + + update_flags |= DPLANE_MAC_REMOTE; + if (was_static) + update_flags |= DPLANE_MAC_WAS_STATIC; /* Use common helper api */ result = mac_update_common(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp, - vid, mac, vtep_ip, sticky); + vid, mac, vtep_ip, sticky, nhg_id, update_flags); return result; } /* * Enqueue vxlan/evpn mac delete. */ -enum zebra_dplane_result dplane_mac_del(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_mac_del(const struct interface *ifp, const struct interface *bridge_ifp, vlanid_t vid, const struct ethaddr *mac, struct in_addr vtep_ip) { enum zebra_dplane_result result; + uint32_t update_flags = 0; + + update_flags |= DPLANE_MAC_REMOTE; /* Use common helper api */ result = mac_update_common(DPLANE_OP_MAC_DELETE, ifp, bridge_ifp, - vid, mac, vtep_ip, false); + vid, mac, vtep_ip, false, 0, update_flags); + return result; +} + +/* + * Enqueue local mac add (or update). + */ +enum zebra_dplane_result dplane_local_mac_add(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + bool sticky, + uint32_t set_static, + uint32_t set_inactive) +{ + enum zebra_dplane_result result; + uint32_t update_flags = 0; + struct in_addr vtep_ip; + + if (set_static) + update_flags |= DPLANE_MAC_SET_STATIC; + + if (set_inactive) + update_flags |= DPLANE_MAC_SET_INACTIVE; + + vtep_ip.s_addr = 0; + + /* Use common helper api */ + result = mac_update_common(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp, + vid, mac, vtep_ip, sticky, 0, + update_flags); return result; } @@ -2937,7 +2999,9 @@ void dplane_mac_init(struct zebra_dplane_ctx *ctx, vlanid_t vid, const struct ethaddr *mac, struct in_addr vtep_ip, - bool sticky) + bool sticky, + uint32_t nhg_id, + uint32_t update_flags) { struct zebra_ns *zns; @@ -2958,6 +3022,8 @@ void dplane_mac_init(struct zebra_dplane_ctx *ctx, ctx->u.macinfo.mac = *mac; ctx->u.macinfo.vid = vid; ctx->u.macinfo.is_sticky = sticky; + ctx->u.macinfo.nhg_id = nhg_id; + ctx->u.macinfo.update_flags = update_flags; } /* @@ -2970,7 +3036,9 @@ mac_update_common(enum dplane_op_e op, vlanid_t vid, const struct ethaddr *mac, struct in_addr vtep_ip, - bool sticky) + bool sticky, + uint32_t nhg_id, + uint32_t update_flags) { enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; int ret; @@ -2990,7 +3058,8 @@ mac_update_common(enum dplane_op_e op, ctx->zd_op = op; /* Common init for the ctx */ - dplane_mac_init(ctx, ifp, br_ifp, vid, mac, vtep_ip, sticky); + dplane_mac_init(ctx, ifp, br_ifp, vid, mac, vtep_ip, sticky, + nhg_id, update_flags); /* Enqueue for processing on the dplane pthread */ ret = dplane_update_enqueue(ctx); @@ -3014,15 +3083,56 @@ mac_update_common(enum dplane_op_e op, /* * Enqueue evpn neighbor add for the dataplane. */ -enum zebra_dplane_result dplane_neigh_add(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_neigh_add(const struct interface *ifp, const struct ipaddr *ip, const struct ethaddr *mac, - uint32_t flags) + uint32_t flags, bool was_static) { enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + uint32_t update_flags = 0; + + update_flags |= DPLANE_NEIGH_REMOTE; + + if (was_static) + update_flags |= DPLANE_NEIGH_WAS_STATIC; result = neigh_update_internal(DPLANE_OP_NEIGH_INSTALL, - ifp, mac, ip, flags, DPLANE_NUD_NOARP); + ifp, mac, ip, flags, DPLANE_NUD_NOARP, + update_flags); + + return result; +} + +/* + * Enqueue local neighbor add for the dataplane. + */ +enum zebra_dplane_result dplane_local_neigh_add(const struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *mac, + bool set_router, bool set_static, + bool set_inactive) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + uint32_t update_flags = 0; + uint32_t ntf = 0; + uint16_t state; + + if (set_static) + update_flags |= DPLANE_NEIGH_SET_STATIC; + + if (set_inactive) { + update_flags |= DPLANE_NEIGH_SET_INACTIVE; + state = DPLANE_NUD_STALE; + } else { + state = DPLANE_NUD_REACHABLE; + } + + if (set_router) + ntf |= DPLANE_NTF_ROUTER; + + result = neigh_update_internal(DPLANE_OP_NEIGH_INSTALL, + ifp, mac, ip, ntf, + state, update_flags); return result; } @@ -3030,14 +3140,18 @@ enum zebra_dplane_result dplane_neigh_add(const struct interface *ifp, /* * Enqueue evpn neighbor update for the dataplane. */ -enum zebra_dplane_result dplane_neigh_update(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_neigh_update(const struct interface *ifp, const struct ipaddr *ip, const struct ethaddr *mac) { enum zebra_dplane_result result; + uint32_t update_flags = 0; + + update_flags |= DPLANE_NEIGH_REMOTE; result = neigh_update_internal(DPLANE_OP_NEIGH_UPDATE, - ifp, mac, ip, 0, DPLANE_NUD_PROBE); + ifp, mac, ip, 0, DPLANE_NUD_PROBE, + update_flags); return result; } @@ -3045,13 +3159,16 @@ enum zebra_dplane_result dplane_neigh_update(const struct interface *ifp, /* * Enqueue evpn neighbor delete for the dataplane. */ -enum zebra_dplane_result dplane_neigh_delete(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_neigh_delete(const struct interface *ifp, const struct ipaddr *ip) { enum zebra_dplane_result result; + uint32_t update_flags = 0; + + update_flags |= DPLANE_NEIGH_REMOTE; result = neigh_update_internal(DPLANE_OP_NEIGH_DELETE, - ifp, NULL, ip, 0, 0); + ifp, NULL, ip, 0, 0, update_flags); return result; } @@ -3075,7 +3192,7 @@ enum zebra_dplane_result dplane_vtep_add(const struct interface *ifp, addr.ipaddr_v4 = *ip; result = neigh_update_internal(DPLANE_OP_VTEP_ADD, - ifp, &mac, &addr, 0, 0); + ifp, &mac, &addr, 0, 0, 0); return result; } @@ -3100,7 +3217,7 @@ enum zebra_dplane_result dplane_vtep_delete(const struct interface *ifp, addr.ipaddr_v4 = *ip; result = neigh_update_internal(DPLANE_OP_VTEP_DELETE, - ifp, &mac, &addr, 0, 0); + ifp, &mac, &addr, 0, 0, 0); return result; } @@ -3113,7 +3230,8 @@ neigh_update_internal(enum dplane_op_e op, const struct interface *ifp, const struct ethaddr *mac, const struct ipaddr *ip, - uint32_t flags, uint16_t state) + uint32_t flags, uint16_t state, + uint32_t update_flags) { enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; int ret; @@ -3150,6 +3268,7 @@ neigh_update_internal(enum dplane_op_e op, ctx->u.neigh.mac = *mac; ctx->u.neigh.flags = flags; ctx->u.neigh.state = state; + ctx->u.neigh.update_flags = update_flags; /* Enqueue for processing on the dplane pthread */ ret = dplane_update_enqueue(ctx); diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 0fa21f620d..32032ed77d 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -168,6 +168,18 @@ enum dplane_op_e { #define DPLANE_NUD_NOARP 0x04 #define DPLANE_NUD_PROBE 0x08 +/* MAC update flags - dplane_mac_info.update_flags */ +#define DPLANE_MAC_REMOTE (1 << 0) +#define DPLANE_MAC_WAS_STATIC (1 << 1) +#define DPLANE_MAC_SET_STATIC (1 << 2) +#define DPLANE_MAC_SET_INACTIVE (1 << 3) + +/* Neigh update flags - dplane_neigh_info.update_flags */ +#define DPLANE_NEIGH_REMOTE (1 << 0) +#define DPLANE_NEIGH_WAS_STATIC (1 << 1) +#define DPLANE_NEIGH_SET_STATIC (1 << 2) +#define DPLANE_NEIGH_SET_INACTIVE (1 << 3) + /* Enable system route notifications */ void dplane_enable_sys_route_notifs(void); @@ -386,6 +398,8 @@ const char *dplane_ctx_get_intf_label(const struct zebra_dplane_ctx *ctx); /* Accessors for MAC information */ vlanid_t dplane_ctx_mac_get_vlan(const struct zebra_dplane_ctx *ctx); bool dplane_ctx_mac_is_sticky(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_mac_get_update_flags(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_mac_get_nhg_id(const struct zebra_dplane_ctx *ctx); const struct ethaddr *dplane_ctx_mac_get_addr( const struct zebra_dplane_ctx *ctx); const struct in_addr *dplane_ctx_mac_get_vtep_ip( @@ -399,6 +413,7 @@ const struct ethaddr *dplane_ctx_neigh_get_mac( const struct zebra_dplane_ctx *ctx); uint32_t dplane_ctx_neigh_get_flags(const struct zebra_dplane_ctx *ctx); uint16_t dplane_ctx_neigh_get_state(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_neigh_get_update_flags(const struct zebra_dplane_ctx *ctx); /* Accessors for policy based routing rule information */ int dplane_ctx_rule_get_sock(const struct zebra_dplane_ctx *ctx); @@ -497,20 +512,24 @@ enum zebra_dplane_result dplane_intf_addr_unset(const struct interface *ifp, /* * Enqueue evpn mac operations for the dataplane. */ -extern struct zebra_dplane_ctx *mac_update_internal( - enum dplane_op_e op, const struct interface *ifp, - const struct interface *br_ifp, - vlanid_t vid, const struct ethaddr *mac, - struct in_addr vtep_ip, bool sticky); - -enum zebra_dplane_result dplane_mac_add(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_mac_add(const struct interface *ifp, const struct interface *bridge_ifp, vlanid_t vid, const struct ethaddr *mac, struct in_addr vtep_ip, - bool sticky); + bool sticky, + uint32_t nhg_id, + bool was_static); + +enum zebra_dplane_result dplane_local_mac_add(const struct interface *ifp, + const struct interface *bridge_ifp, + vlanid_t vid, + const struct ethaddr *mac, + bool sticky, + uint32_t set_static, + uint32_t set_inactive); -enum zebra_dplane_result dplane_mac_del(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_mac_del(const struct interface *ifp, const struct interface *bridge_ifp, vlanid_t vid, const struct ethaddr *mac, @@ -523,19 +542,25 @@ void dplane_mac_init(struct zebra_dplane_ctx *ctx, vlanid_t vid, const struct ethaddr *mac, struct in_addr vtep_ip, - bool sticky); + bool sticky, + uint32_t nhg_id, uint32_t update_flags); /* * Enqueue evpn neighbor updates for the dataplane. */ -enum zebra_dplane_result dplane_neigh_add(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_neigh_add(const struct interface *ifp, + const struct ipaddr *ip, + const struct ethaddr *mac, + uint32_t flags, bool was_static); +enum zebra_dplane_result dplane_local_neigh_add(const struct interface *ifp, const struct ipaddr *ip, const struct ethaddr *mac, - uint32_t flags); -enum zebra_dplane_result dplane_neigh_update(const struct interface *ifp, + bool set_router, bool set_static, + bool set_inactive); +enum zebra_dplane_result dplane_rem_neigh_update(const struct interface *ifp, const struct ipaddr *ip, const struct ethaddr *mac); -enum zebra_dplane_result dplane_neigh_delete(const struct interface *ifp, +enum zebra_dplane_result dplane_rem_neigh_delete(const struct interface *ifp, const struct ipaddr *ip); /* diff --git a/zebra/zebra_errors.h b/zebra/zebra_errors.h index 5f2a7a12c6..03953ed17f 100644 --- a/zebra/zebra_errors.h +++ b/zebra/zebra_errors.h @@ -134,6 +134,7 @@ enum zebra_log_refs { EC_ZEBRA_BAD_NHG_MESSAGE, EC_ZEBRA_DUPLICATE_NHG_MESSAGE, EC_ZEBRA_VRF_MISCONFIGURED, + EC_ZEBRA_ES_CREATE, }; void zebra_error_init(void); diff --git a/zebra/zebra_evpn_mh.c b/zebra/zebra_evpn_mh.c new file mode 100644 index 0000000000..fae36ec6fa --- /dev/null +++ b/zebra/zebra_evpn_mh.c @@ -0,0 +1,2145 @@ +/* + * Zebra EVPN multihoming code + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <zebra.h> + +#include "command.h" +#include "hash.h" +#include "if.h" +#include "jhash.h" +#include "linklist.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "stream.h" +#include "table.h" +#include "vlan.h" +#include "vxlan.h" + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_l2.h" +#include "zebra/zebra_memory.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_nhg.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZACC_BD, "Access Broadcast Domain"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES, "Ethernet Segment"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES_EVI, "ES info per-EVI"); +DEFINE_MTYPE_STATIC(ZEBRA, ZMH_INFO, "MH global info"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES_VTEP, "VTEP attached to the ES"); + +static void zebra_evpn_es_get_one_base_vni(void); +static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es, + zebra_vni_t *vni, bool add); +static void zebra_evpn_local_es_del(struct zebra_evpn_es *es); +static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid, + struct ethaddr *sysmac); + +esi_t zero_esi_buf, *zero_esi = &zero_esi_buf; + +/*****************************************************************************/ +/* Ethernet Segment to EVI association - + * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI + * (zebra_vni_t.es_evi_rb_tree). + * 2. Each local ES-EVI entry is sent to BGP which advertises it as an + * EAD-EVI (Type-1 EVPN) route + * 3. Local ES-EVI setup is re-evaluated on the following triggers - + * a. When an ESI is set or cleared on an access port. + * b. When an access port associated with an ESI is deleted. + * c. When VLAN member ship changes on an access port. + * d. When a VXLAN_IF is set or cleared on an access broadcast domain. + * e. When a L2-VNI is added or deleted for a VxLAN_IF. + * 4. Currently zebra doesn't remote ES-EVIs. Those are managed and maintained + * entirely in BGP which consolidates them into a remote ES. The remote ES + * is then sent to zebra which allocates a NHG for it. + */ + +/* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */ +static int zebra_es_evi_rb_cmp(const struct zebra_evpn_es_evi *es_evi1, + const struct zebra_evpn_es_evi *es_evi2) +{ + return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES); +} +RB_GENERATE(zebra_es_evi_rb_head, zebra_evpn_es_evi, + rb_node, zebra_es_evi_rb_cmp); + +/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES + * tables. + */ +static struct zebra_evpn_es_evi *zebra_evpn_es_evi_new(struct zebra_evpn_es *es, + zebra_vni_t *zvni) +{ + struct zebra_evpn_es_evi *es_evi; + + es_evi = XCALLOC(MTYPE_ZES_EVI, sizeof(struct zebra_evpn_es_evi)); + + es_evi->es = es; + es_evi->zvni = zvni; + + /* insert into the VNI-ESI rb tree */ + if (RB_INSERT(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, es_evi)) { + XFREE(MTYPE_ZES_EVI, es_evi); + return NULL; + } + + /* add to the ES's VNI list */ + listnode_init(&es_evi->es_listnode, es_evi); + listnode_add(es->es_evi_list, &es_evi->es_listnode); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s evi %d new", + es_evi->es->esi_str, es_evi->zvni->vni); + + return es_evi; +} + +/* returns TRUE if the VNI is ready to be sent to BGP */ +static inline bool zebra_evpn_vni_send_to_client_ok(zebra_vni_t *zvni) +{ + return !!(zvni->flags & ZVNI_READY_FOR_BGP); +} + +/* Evaluate if the es_evi is ready to be sent BGP - + * 1. If it is ready an add is sent to BGP + * 2. If it is not ready a del is sent (if the ES had been previously added + * to BGP). + */ +static void zebra_evpn_es_evi_re_eval_send_to_client( + struct zebra_evpn_es_evi *es_evi) +{ + bool old_ready; + bool new_ready; + + old_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP); + + /* ES and L2-VNI have to be individually ready for BGP */ + if ((es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) && + (es_evi->es->flags & ZEBRA_EVPNES_READY_FOR_BGP) && + zebra_evpn_vni_send_to_client_ok(es_evi->zvni)) + es_evi->flags |= ZEBRA_EVPNES_EVI_READY_FOR_BGP; + else + es_evi->flags &= ~ZEBRA_EVPNES_EVI_READY_FOR_BGP; + + new_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP); + + if (old_ready == new_ready) + return; + + if (new_ready) + zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zvni, + true /* add */); + else + zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zvni, + false /* add */); +} + +/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free + * up the memory. + */ +static void zebra_evpn_es_evi_free(struct zebra_evpn_es_evi *es_evi) +{ + struct zebra_evpn_es *es = es_evi->es; + zebra_vni_t *zvni = es_evi->zvni; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s evi %d free", + es_evi->es->esi_str, es_evi->zvni->vni); + + /* remove from the ES's VNI list */ + list_delete_node(es->es_evi_list, &es_evi->es_listnode); + + /* remove from the VNI-ESI rb tree */ + RB_REMOVE(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, es_evi); + + /* remove from the VNI-ESI rb tree */ + XFREE(MTYPE_ZES_EVI, es_evi); +} + +/* find the ES-EVI in the per-L2-VNI RB tree */ +static struct zebra_evpn_es_evi *zebra_evpn_es_evi_find( + struct zebra_evpn_es *es, zebra_vni_t *zvni) +{ + struct zebra_evpn_es_evi es_evi; + + es_evi.es = es; + + return RB_FIND(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, &es_evi); +} + +/* Tell BGP about an ES-EVI deletion and then delete it */ +static void zebra_evpn_local_es_evi_do_del(struct zebra_evpn_es_evi *es_evi) +{ + if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("local es %s evi %d del", + es_evi->es->esi_str, es_evi->zvni->vni); + + if (es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP) { + /* send a del only if add was sent for it earlier */ + zebra_evpn_es_evi_send_to_client(es_evi->es, + es_evi->zvni, false /* add */); + } + + /* delete it from the VNI's local list */ + list_delete_node(es_evi->zvni->local_es_evi_list, + &es_evi->l2vni_listnode); + + es_evi->flags &= ~ZEBRA_EVPNES_EVI_LOCAL; + zebra_evpn_es_evi_free(es_evi); +} +static void zebra_evpn_local_es_evi_del(struct zebra_evpn_es *es, + zebra_vni_t *zvni) +{ + struct zebra_evpn_es_evi *es_evi; + + es_evi = zebra_evpn_es_evi_find(es, zvni); + if (es_evi) + zebra_evpn_local_es_evi_do_del(es_evi); +} + +/* Create an ES-EVI if it doesn't already exist and tell BGP */ +static void zebra_evpn_local_es_evi_add(struct zebra_evpn_es *es, + zebra_vni_t *zvni) +{ + struct zebra_evpn_es_evi *es_evi; + + es_evi = zebra_evpn_es_evi_find(es, zvni); + if (!es_evi) { + es_evi = zebra_evpn_es_evi_new(es, zvni); + if (!es_evi) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("local es %s evi %d add", + es_evi->es->esi_str, es_evi->zvni->vni); + es_evi->flags |= ZEBRA_EVPNES_EVI_LOCAL; + /* add to the VNI's local list */ + listnode_init(&es_evi->l2vni_listnode, es_evi); + listnode_add(zvni->local_es_evi_list, &es_evi->l2vni_listnode); + + zebra_evpn_es_evi_re_eval_send_to_client(es_evi); + } +} + +static void zebra_evpn_es_evi_show_entry(struct vty *vty, + struct zebra_evpn_es_evi *es_evi, json_object *json) +{ + char type_str[4]; + + if (json) { + /* XXX */ + } else { + type_str[0] = '\0'; + if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + + vty_out(vty, "%-8d %-30s %-4s\n", + es_evi->zvni->vni, es_evi->es->esi_str, + type_str); + } +} + +static void zebra_evpn_es_evi_show_entry_detail(struct vty *vty, + struct zebra_evpn_es_evi *es_evi, json_object *json) +{ + char type_str[4]; + + if (json) { + /* XXX */ + } else { + type_str[0] = '\0'; + if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + + vty_out(vty, "VNI %d ESI: %s\n", + es_evi->zvni->vni, es_evi->es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " Ready for BGP: %s\n", + (es_evi->flags & + ZEBRA_EVPNES_EVI_READY_FOR_BGP) ? + "yes" : "no"); + vty_out(vty, "\n"); + } +} + +static void zebra_evpn_es_evi_show_one_vni(zebra_vni_t *zvni, + struct vty *vty, json_object *json, int detail) +{ + struct zebra_evpn_es_evi *es_evi; + + RB_FOREACH(es_evi, zebra_es_evi_rb_head, &zvni->es_evi_rb_tree) { + if (detail) + zebra_evpn_es_evi_show_entry_detail(vty, es_evi, json); + else + zebra_evpn_es_evi_show_entry(vty, es_evi, json); + } +} + +struct evpn_mh_show_ctx { + struct vty *vty; + json_object *json; + int detail; +}; + +static void zebra_evpn_es_evi_show_one_vni_hash_cb(struct hash_bucket *bucket, + void *ctxt) +{ + zebra_vni_t *zvni = (zebra_vni_t *)bucket->data; + struct evpn_mh_show_ctx *wctx = (struct evpn_mh_show_ctx *)ctxt; + + zebra_evpn_es_evi_show_one_vni(zvni, wctx->vty, + wctx->json, wctx->detail); +} + +void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail) +{ + json_object *json = NULL; + struct zebra_vrf *zvrf; + struct evpn_mh_show_ctx wctx; + + zvrf = zebra_vrf_get_evpn(); + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json; + wctx.detail = detail; + + if (!detail && !json) { + vty_out(vty, "Type: L local, R remote\n"); + vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type"); + } + /* Display all L2-VNIs */ + hash_iterate(zvrf->vni_table, zebra_evpn_es_evi_show_one_vni_hash_cb, + &wctx); +} + +void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, vni_t vni, int detail) +{ + json_object *json = NULL; + zebra_vni_t *zvni; + + zvni = zvni_lookup(vni); + if (zvni) { + if (!detail && !json) { + vty_out(vty, "Type: L local, R remote\n"); + vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type"); + } + } else { + if (!uj) + vty_out(vty, "VNI %d doesn't exist\n", vni); + } + zebra_evpn_es_evi_show_one_vni(zvni, vty, json, detail); +} + +/* Initialize the ES tables maintained per-L2_VNI */ +void zebra_evpn_vni_es_init(zebra_vni_t *zvni) +{ + /* Initialize the ES-EVI RB tree */ + RB_INIT(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree); + + /* Initialize the local and remote ES lists maintained for quick + * walks by type + */ + zvni->local_es_evi_list = list_new(); + listset_app_node_mem(zvni->local_es_evi_list); +} + +/* Cleanup the ES info maintained per-L2_VNI */ +void zebra_evpn_vni_es_cleanup(zebra_vni_t *zvni) +{ + struct zebra_evpn_es_evi *es_evi; + struct zebra_evpn_es_evi *es_evi_next; + + RB_FOREACH_SAFE(es_evi, zebra_es_evi_rb_head, + &zvni->es_evi_rb_tree, es_evi_next) { + zebra_evpn_local_es_evi_do_del(es_evi); + } + + list_delete(&zvni->local_es_evi_list); + zebra_evpn_es_clear_base_vni(zvni); +} + +/* called when the oper state or bridge membership changes for the + * vxlan device + */ +void zebra_evpn_vni_update_all_es(zebra_vni_t *zvni) +{ + struct zebra_evpn_es_evi *es_evi; + struct listnode *node; + + /* the VNI is now elgible as a base for EVPN-MH */ + if (zebra_evpn_vni_send_to_client_ok(zvni)) + zebra_evpn_es_set_base_vni(zvni); + else + zebra_evpn_es_clear_base_vni(zvni); + + for (ALL_LIST_ELEMENTS_RO(zvni->local_es_evi_list, node, es_evi)) + zebra_evpn_es_evi_re_eval_send_to_client(es_evi); +} + +/*****************************************************************************/ +/* Access broadcast domains (BD) + * 1. These broadcast domains can be VLAN aware (in which case + * the key is VID) or VLAN unaware (in which case the key is + * 2. A VID-BD is created when a VLAN is associated with an access port or + * when the VLAN is associated with VXLAN_IF + * 3. A BD is translated into ES-EVI entries when a VNI is associated + * with the broadcast domain + */ +/* Hash key for VLAN based broadcast domains */ +static unsigned int zebra_evpn_acc_vl_hash_keymake(const void *p) +{ + const struct zebra_evpn_access_bd *acc_bd = p; + + return jhash_1word(acc_bd->vid, 0); +} + +/* Compare two VLAN based broadcast domains */ +static bool zebra_evpn_acc_vl_cmp(const void *p1, const void *p2) +{ + const struct zebra_evpn_access_bd *acc_bd1 = p1; + const struct zebra_evpn_access_bd *acc_bd2 = p2; + + if (acc_bd1 == NULL && acc_bd2 == NULL) + return true; + + if (acc_bd1 == NULL || acc_bd2 == NULL) + return false; + + return (acc_bd1->vid == acc_bd2->vid); +} + +/* Lookup VLAN based broadcast domain */ +static struct zebra_evpn_access_bd *zebra_evpn_acc_vl_find(vlanid_t vid) +{ + struct zebra_evpn_access_bd *acc_bd; + struct zebra_evpn_access_bd tmp; + + tmp.vid = vid; + acc_bd = hash_lookup(zmh_info->evpn_vlan_table, &tmp); + + return acc_bd; +} + +/* A new broadcast domain can be created when a VLAN member or VLAN<=>VxLAN_IF + * mapping is added. + */ +static struct zebra_evpn_access_bd *zebra_evpn_acc_vl_new(vlanid_t vid) +{ + struct zebra_evpn_access_bd *acc_bd; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d add", vid); + + acc_bd = XCALLOC(MTYPE_ZACC_BD, sizeof(struct zebra_evpn_access_bd)); + + acc_bd->vid = vid; + + /* Initialize the mbr list */ + acc_bd->mbr_zifs = list_new(); + + /* Add to hash */ + if (!hash_get(zmh_info->evpn_vlan_table, acc_bd, hash_alloc_intern)) { + XFREE(MTYPE_ZACC_BD, acc_bd); + return NULL; + } + + return acc_bd; +} + +/* Free VLAN based broadcast domain - + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +static void zebra_evpn_acc_vl_free(struct zebra_evpn_access_bd *acc_bd) +{ + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d del", acc_bd->vid); + + /* cleanup resources maintained against the ES */ + list_delete(&acc_bd->mbr_zifs); + + /* remove EVI from various tables */ + hash_release(zmh_info->evpn_vlan_table, acc_bd); + + XFREE(MTYPE_ZACC_BD, acc_bd); +} + +static void zebra_evpn_acc_vl_cleanup_all(struct hash_bucket *bucket, void *arg) +{ + struct zebra_evpn_access_bd *acc_bd = bucket->data; + + zebra_evpn_acc_vl_free(acc_bd); +} + +/* called when a bd mbr is removed or VxLAN_IF is diassociated from the access + * VLAN + */ +static void zebra_evpn_acc_bd_free_on_deref(struct zebra_evpn_access_bd *acc_bd) +{ + if (!list_isempty(acc_bd->mbr_zifs) || acc_bd->vxlan_zif) + return; + + /* if there are no references free the EVI */ + zebra_evpn_acc_vl_free(acc_bd); +} + +/* called when a EVPN-L2VNI is set or cleared against a BD */ +static void zebra_evpn_acc_bd_vni_set(struct zebra_evpn_access_bd *acc_bd, + zebra_vni_t *zvni, zebra_vni_t *old_zvni) +{ + struct zebra_if *zif; + struct listnode *node; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d l2-vni %u set", + acc_bd->vid, zvni ? zvni->vni : 0); + + for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) { + if (!zif->es_info.es) + continue; + + if (zvni) + zebra_evpn_local_es_evi_add(zif->es_info.es, zvni); + else if (old_zvni) + zebra_evpn_local_es_evi_del(zif->es_info.es, old_zvni); + } +} + +/* handle VLAN->VxLAN_IF association */ +void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif) +{ + struct zebra_evpn_access_bd *acc_bd; + struct zebra_if *old_vxlan_zif; + zebra_vni_t *old_zvni; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + acc_bd = zebra_evpn_acc_vl_new(vid); + + old_vxlan_zif = acc_bd->vxlan_zif; + acc_bd->vxlan_zif = vxlan_zif; + if (vxlan_zif == old_vxlan_zif) + return; + + old_zvni = acc_bd->zvni; + acc_bd->zvni = zvni_lookup(vxlan_zif->l2info.vxl.vni); + if (acc_bd->zvni == old_zvni) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d vni %u ref", + acc_bd->vid, vxlan_zif->l2info.vxl.vni); + + if (old_zvni) + zebra_evpn_acc_bd_vni_set(acc_bd, NULL, old_zvni); + + if (acc_bd->zvni) + zebra_evpn_acc_bd_vni_set(acc_bd, acc_bd->zvni, NULL); +} + +/* handle VLAN->VxLAN_IF deref */ +void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif) +{ + struct zebra_evpn_access_bd *acc_bd; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + return; + + /* clear vxlan_if only if it matches */ + if (acc_bd->vxlan_zif != vxlan_zif) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d vni %u deref", + acc_bd->vid, vxlan_zif->l2info.vxl.vni); + + if (acc_bd->zvni) + zebra_evpn_acc_bd_vni_set(acc_bd, NULL, acc_bd->zvni); + + acc_bd->zvni = NULL; + acc_bd->vxlan_zif = NULL; + + /* if there are no other references the access_bd can be freed */ + zebra_evpn_acc_bd_free_on_deref(acc_bd); +} + +/* handle EVPN L2VNI add/del */ +void zebra_evpn_vxl_vni_set(struct zebra_if *zif, zebra_vni_t *zvni, + bool set) +{ + struct zebra_l2info_vxlan *vxl; + struct zebra_evpn_access_bd *acc_bd; + + if (!zif) + return; + + /* locate access_bd associated with the vxlan device */ + vxl = &zif->l2info.vxl; + acc_bd = zebra_evpn_acc_vl_find(vxl->access_vlan); + if (!acc_bd) + return; + + if (set) { + zebra_evpn_es_set_base_vni(zvni); + if (acc_bd->zvni != zvni) { + acc_bd->zvni = zvni; + zebra_evpn_acc_bd_vni_set(acc_bd, zvni, NULL); + } + } else { + if (acc_bd->zvni) { + zebra_vni_t *old_zvni = acc_bd->zvni; + acc_bd->zvni = NULL; + zebra_evpn_acc_bd_vni_set(acc_bd, NULL, old_zvni); + } + } +} + +/* handle addition of new VLAN members */ +void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif) +{ + struct zebra_evpn_access_bd *acc_bd; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + acc_bd = zebra_evpn_acc_vl_new(vid); + + if (listnode_lookup(acc_bd->mbr_zifs, zif)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d mbr %s ref", + vid, zif->ifp->name); + + listnode_add(acc_bd->mbr_zifs, zif); + if (acc_bd->zvni && zif->es_info.es) + zebra_evpn_local_es_evi_add(zif->es_info.es, acc_bd->zvni); +} + +/* handle deletion of VLAN members */ +void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif) +{ + struct zebra_evpn_access_bd *acc_bd; + struct listnode *node; + + if (!vid) + return; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) + return; + + node = listnode_lookup(acc_bd->mbr_zifs, zif); + if (!node) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("access vlan %d mbr %s deref", + vid, zif->ifp->name); + + list_delete_node(acc_bd->mbr_zifs, node); + + if (acc_bd->zvni && zif->es_info.es) + zebra_evpn_local_es_evi_del(zif->es_info.es, acc_bd->zvni); + + /* if there are no other references the access_bd can be freed */ + zebra_evpn_acc_bd_free_on_deref(acc_bd); +} + +static void zebra_evpn_acc_vl_show_entry_detail(struct vty *vty, + struct zebra_evpn_access_bd *acc_bd, json_object *json) +{ + struct zebra_if *zif; + struct listnode *node; + + if (json) { + /* XXX */ + } else { + vty_out(vty, "VLAN: %u\n", acc_bd->vid); + vty_out(vty, " VxLAN Interface: %s\n", + acc_bd->vxlan_zif ? + acc_bd->vxlan_zif->ifp->name : "-"); + vty_out(vty, " L2-VNI: %d\n", + acc_bd->zvni ? acc_bd->zvni->vni : 0); + vty_out(vty, " Member Count: %d\n", + listcount(acc_bd->mbr_zifs)); + vty_out(vty, " Members: \n"); + for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) + vty_out(vty, " %s\n", zif->ifp->name); + vty_out(vty, "\n"); + } +} + +static void zebra_evpn_acc_vl_show_entry(struct vty *vty, + struct zebra_evpn_access_bd *acc_bd, json_object *json) +{ + if (!json) + vty_out(vty, "%-5u %21s %-8d %u\n", + acc_bd->vid, + acc_bd->vxlan_zif ? + acc_bd->vxlan_zif->ifp->name : "-", + acc_bd->zvni ? acc_bd->zvni->vni : 0, + listcount(acc_bd->mbr_zifs)); +} + +static void zebra_evpn_acc_vl_show_hash(struct hash_bucket *bucket, void *ctxt) +{ + struct evpn_mh_show_ctx *wctx = ctxt; + struct zebra_evpn_access_bd *acc_bd = bucket->data; + + if (wctx->detail) + zebra_evpn_acc_vl_show_entry_detail(wctx->vty, + acc_bd, wctx->json); + else + zebra_evpn_acc_vl_show_entry(wctx->vty, + acc_bd, wctx->json); +} + +void zebra_evpn_acc_vl_show(struct vty *vty, bool uj) +{ + json_object *json = NULL; + struct evpn_mh_show_ctx wctx; + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json; + wctx.detail = false; + + if (!json) + vty_out(vty, "%-5s %21s %-8s %s\n", + "VLAN", "VxLAN-IF", "L2-VNI", "# Members"); + + hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash, + &wctx); +} + +void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj) +{ + json_object *json = NULL; + struct evpn_mh_show_ctx wctx; + + memset(&wctx, 0, sizeof(wctx)); + wctx.vty = vty; + wctx.json = json; + wctx.detail = true; + + hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash, + &wctx); +} + +void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid) +{ + json_object *json = NULL; + struct zebra_evpn_access_bd *acc_bd; + + acc_bd = zebra_evpn_acc_vl_find(vid); + if (!acc_bd) { + if (!json) { + vty_out(vty, "VLAN %u not present\n", vid); + return; + } + } + zebra_evpn_acc_vl_show_entry_detail(vty, acc_bd, json); +} + +/* Initialize VLAN member bitmap on an interface. Although VLAN membership + * is independent of EVPN we only process it if its of interest to EVPN-MH + * i.e. on access ports that can be setup as Ethernet Segments. And that is + * intended as an optimization. + */ +void zebra_evpn_if_init(struct zebra_if *zif) +{ + if (!zebra_evpn_is_if_es_capable(zif)) + return; + + if (!bf_is_inited(zif->vlan_bitmap)) + bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX); + + /* if an es_id and sysmac are already present against the interface + * activate it + */ + zebra_evpn_local_es_update(zif, zif->es_info.lid, &zif->es_info.sysmac); +} + +/* handle deletion of an access port by removing it from all associated + * broadcast domains. + */ +void zebra_evpn_if_cleanup(struct zebra_if *zif) +{ + vlanid_t vid; + + if (!bf_is_inited(zif->vlan_bitmap)) + return; + + bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { + zebra_evpn_vl_mbr_deref(vid, zif); + } + + bf_free(zif->vlan_bitmap); + + /* Delete associated Ethernet Segment */ + if (zif->es_info.es) + zebra_evpn_local_es_del(zif->es_info.es); +} + +/***************************************************************************** + * L2 NH/NHG Management + * A L2 NH entry is programmed in the kernel for every ES-VTEP entry. This + * NH is then added to the L2-ECMP-NHG associated with the ES. + */ +static uint32_t zebra_evpn_nhid_alloc(bool is_nhg) +{ + uint32_t id; + int type; + + bf_assign_index(zmh_info->nh_id_bitmap, id); + + if (!id) + return 0; + + type = is_nhg ? EVPN_NHG_ID_TYPE_BIT : EVPN_NH_ID_TYPE_BIT; + return (id | type); +} + +static void zebra_evpn_nhid_free(uint32_t nh_id) +{ + uint32_t id = (nh_id & EVPN_NH_ID_VAL_MASK); + + if (!id) + return; + + bf_release_index(zmh_info->nh_id_bitmap, id); +} + +/* The MAC ECMP group is activated on the first VTEP */ +static void zebra_evpn_nhg_update(struct zebra_evpn_es *es) +{ + uint32_t nh_cnt = 0; + struct nh_grp nh_ids[ES_VTEP_MAX_CNT]; + struct zebra_evpn_es_vtep *es_vtep; + struct listnode *node; + + if (!es->nhg_id) + return; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (!es_vtep->nh_id) + continue; + + if (nh_cnt >= ES_VTEP_MAX_CNT) + break; + + memset(&nh_ids[nh_cnt], 0, sizeof(struct nh_grp)); + nh_ids[nh_cnt].id = es_vtep->nh_id; + ++nh_cnt; + } + + if (nh_cnt) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) { + char nh_str[ES_VTEP_LIST_STR_SZ]; + uint32_t i; + char nh_buf[16]; + + nh_str[0] = '\0'; + for (i = 0; i < nh_cnt; ++i) { + snprintf(nh_buf, sizeof(nh_buf), "%u ", + nh_ids[i].id); + strlcat(nh_str, nh_buf, sizeof(nh_str)); + } + zlog_debug("es %s nhg 0x%x add %s", + es->esi_str, es->nhg_id, nh_str); + } + + es->flags |= ZEBRA_EVPNES_NHG_ACTIVE; + kernel_upd_mac_nhg(es->nhg_id, nh_cnt, nh_ids); + } else { + if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + zlog_debug("es %s nhg 0x%x del", + es->esi_str, es->nhg_id); + es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE; + kernel_del_mac_nhg(es->nhg_id); + } + } + + /* XXX - update remote macs associated with the ES */ +} + +static void zebra_evpn_nh_add(struct zebra_evpn_es_vtep *es_vtep) +{ + if (es_vtep->nh_id) + return; + + es_vtep->nh_id = zebra_evpn_nhid_alloc(false); + + if (!es_vtep->nh_id) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + zlog_debug("es %s vtep %s nh 0x%x add", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), es_vtep->nh_id); + /* install the NH */ + kernel_upd_mac_nh(es_vtep->nh_id, es_vtep->vtep_ip); + /* add the NH to the parent NHG */ + zebra_evpn_nhg_update(es_vtep->es); +} + +static void zebra_evpn_nh_del(struct zebra_evpn_es_vtep *es_vtep) +{ + uint32_t nh_id; + + if (!es_vtep->nh_id) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NH) + zlog_debug("es %s vtep %s nh 0x%x del", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), es_vtep->nh_id); + + nh_id = es_vtep->nh_id; + es_vtep->nh_id = 0; + + /* remove the NH from the parent NHG */ + zebra_evpn_nhg_update(es_vtep->es); + /* uninstall the NH */ + kernel_del_mac_nh(nh_id); + zebra_evpn_nhid_free(nh_id); + +} + +/*****************************************************************************/ +/* Ethernet Segment Management + * 1. Ethernet Segment is a collection of links attached to the same + * server (MHD) or switch (MHN) + * 2. An Ethernet Segment can span multiple PEs and is identified by the + * 10-byte ES-ID. + * 3. Zebra manages the local ESI configuration. + * 4. It also maintains the aliasing that maps an ESI (local or remote) + * to one or more PEs/VTEPs. + * 5. remote ESs are added by BGP (on rxing EAD Type-1 routes) + */ +/* A list of remote VTEPs is maintained for each ES. This list includes - + * 1. VTEPs for which we have imported the ESR i.e. ES-peers + * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI + * have been imported into one or more VNIs + */ +static int zebra_evpn_es_vtep_cmp(void *p1, void *p2) +{ + const struct zebra_evpn_es_vtep *es_vtep1 = p1; + const struct zebra_evpn_es_vtep *es_vtep2 = p2; + + return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr; +} + +static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_new( + struct zebra_evpn_es *es, struct in_addr vtep_ip) +{ + struct zebra_evpn_es_vtep *es_vtep; + + es_vtep = XCALLOC(MTYPE_ZES_VTEP, sizeof(*es_vtep)); + + es_vtep->es = es; + es_vtep->vtep_ip.s_addr = vtep_ip.s_addr; + listnode_init(&es_vtep->es_listnode, es_vtep); + listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode); + + return es_vtep; +} + +static void zebra_evpn_es_vtep_free(struct zebra_evpn_es_vtep *es_vtep) +{ + struct zebra_evpn_es *es = es_vtep->es; + + list_delete_node(es->es_vtep_list, &es_vtep->es_listnode); + /* update the L2-NHG associated with the ES */ + zebra_evpn_nh_del(es_vtep); + XFREE(MTYPE_ZES_VTEP, es_vtep); +} + + +/* check if VTEP is already part of the list */ +static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_find( + struct zebra_evpn_es *es, struct in_addr vtep_ip) +{ + struct listnode *node = NULL; + struct zebra_evpn_es_vtep *es_vtep; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr) + return es_vtep; + } + return NULL; +} + +static void zebra_evpn_es_vtep_add(struct zebra_evpn_es *es, + struct in_addr vtep_ip) +{ + struct zebra_evpn_es_vtep *es_vtep; + + es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip); + + if (!es_vtep) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s vtep %s add", + es->esi_str, inet_ntoa(vtep_ip)); + es_vtep = zebra_evpn_es_vtep_new(es, vtep_ip); + /* update the L2-NHG associated with the ES */ + zebra_evpn_nh_add(es_vtep); + } +} + +static void zebra_evpn_es_vtep_del(struct zebra_evpn_es *es, + struct in_addr vtep_ip) +{ + struct zebra_evpn_es_vtep *es_vtep; + + es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip); + + if (es_vtep) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s vtep %s del", + es->esi_str, inet_ntoa(vtep_ip)); + zebra_evpn_es_vtep_free(es_vtep); + } +} + +/* compare ES-IDs for the global ES RB tree */ +static int zebra_es_rb_cmp(const struct zebra_evpn_es *es1, + const struct zebra_evpn_es *es2) +{ + return memcmp(&es1->esi, &es2->esi, ESI_BYTES); +} +RB_GENERATE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp); + +/* Lookup ES */ +struct zebra_evpn_es *zebra_evpn_es_find(esi_t *esi) +{ + struct zebra_evpn_es tmp; + + memcpy(&tmp.esi, esi, sizeof(esi_t)); + return RB_FIND(zebra_es_rb_head, &zmh_info->es_rb_tree, &tmp); +} + +/* A new local es is created when a local-es-id and sysmac is configured + * against an interface. + */ +static struct zebra_evpn_es *zebra_evpn_es_new(esi_t *esi) +{ + struct zebra_evpn_es *es; + + es = XCALLOC(MTYPE_ZES, sizeof(struct zebra_evpn_es)); + + /* fill in ESI */ + memcpy(&es->esi, esi, sizeof(esi_t)); + esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str)); + + /* Add to rb_tree */ + if (RB_INSERT(zebra_es_rb_head, &zmh_info->es_rb_tree, es)) { + XFREE(MTYPE_ZES, es); + return NULL; + } + + /* Initialise the ES-EVI list */ + es->es_evi_list = list_new(); + listset_app_node_mem(es->es_evi_list); + + /* Initialise the VTEP list */ + es->es_vtep_list = list_new(); + listset_app_node_mem(es->es_vtep_list); + es->es_vtep_list->cmp = zebra_evpn_es_vtep_cmp; + + /* mac entries associated with the ES */ + es->mac_list = list_new(); + listset_app_node_mem(es->mac_list); + + /* reserve a NHG */ + es->nhg_id = zebra_evpn_nhid_alloc(true); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s nhg 0x%x new", es->esi_str, es->nhg_id); + + return es; +} + +/* Free a given ES - + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +static struct zebra_evpn_es *zebra_evpn_es_free(struct zebra_evpn_es *es) +{ + /* If the ES has a local or remote reference it cannot be freed. + * Free is also prevented if there are MAC entries referencing + * it. + */ + if ((es->flags & (ZEBRA_EVPNES_LOCAL | ZEBRA_EVPNES_REMOTE)) || + listcount(es->mac_list)) + return es; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s free", es->esi_str); + + /* If the NHG is still installed uninstall it and free the id */ + if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) { + es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE; + kernel_del_mac_nhg(es->nhg_id); + } + zebra_evpn_nhid_free(es->nhg_id); + + /* cleanup resources maintained against the ES */ + list_delete(&es->es_evi_list); + list_delete(&es->es_vtep_list); + list_delete(&es->mac_list); + + /* remove from the VNI-ESI rb tree */ + RB_REMOVE(zebra_es_rb_head, &zmh_info->es_rb_tree, es); + + XFREE(MTYPE_ZES, es); + + return NULL; +} + +/* Inform BGP about local ES addition */ +static int zebra_evpn_es_send_add_to_client(struct zebra_evpn_es *es) +{ + struct zserv *client; + struct stream *s; + uint8_t oper_up; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_LOCAL_ES_ADD, zebra_vrf_get_evpn_id()); + stream_put(s, &es->esi, sizeof(esi_t)); + stream_put_ipv4(s, zmh_info->es_originator_ip.s_addr); + oper_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP); + stream_putc(s, oper_up); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("send add local es %s %s to %s", + es->esi_str, + inet_ntoa(zmh_info->es_originator_ip), + zebra_route_string(client->proto)); + + client->local_es_add_cnt++; + return zserv_send_message(client, s); +} + +/* Inform BGP about local ES deletion */ +static int zebra_evpn_es_send_del_to_client(struct zebra_evpn_es *es) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + stream_reset(s); + + zclient_create_header(s, ZEBRA_LOCAL_ES_DEL, zebra_vrf_get_evpn_id()); + stream_put(s, &es->esi, sizeof(esi_t)); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("send del local es %s to %s", es->esi_str, + zebra_route_string(client->proto)); + + client->local_es_del_cnt++; + return zserv_send_message(client, s); +} + +/* XXX - call any time ZEBRA_EVPNES_LOCAL gets set or cleared */ +static void zebra_evpn_es_re_eval_send_to_client(struct zebra_evpn_es *es, + bool es_evi_re_reval) +{ + bool old_ready; + bool new_ready; + struct listnode *node; + struct zebra_evpn_es_evi *es_evi; + + old_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP); + + if ((es->flags & ZEBRA_EVPNES_LOCAL) && + zmh_info->es_originator_ip.s_addr) + es->flags |= ZEBRA_EVPNES_READY_FOR_BGP; + else + es->flags &= ~ZEBRA_EVPNES_READY_FOR_BGP; + + new_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP); + if (old_ready == new_ready) + return; + + if (new_ready) + zebra_evpn_es_send_add_to_client(es); + else + zebra_evpn_es_send_del_to_client(es); + + /* re-eval associated EVIs */ + if (es_evi_re_reval) { + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, node, es_evi)) { + if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)) + continue; + zebra_evpn_es_evi_re_eval_send_to_client(es_evi); + } + } +} + +void zebra_evpn_es_send_all_to_client(bool add) +{ + struct listnode *es_node; + struct listnode *evi_node; + struct zebra_evpn_es *es; + struct zebra_evpn_es_evi *es_evi; + + if (!zmh_info) + return; + + for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, es_node, es)) { + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) { + if (add) + zebra_evpn_es_send_add_to_client(es); + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, + evi_node, es_evi)) { + if (!(es_evi->flags & + ZEBRA_EVPNES_EVI_READY_FOR_BGP)) + continue; + + if (add) + zebra_evpn_es_evi_send_to_client( + es, es_evi->zvni, + true /* add */); + else + zebra_evpn_es_evi_send_to_client( + es, es_evi->zvni, + false /* add */); + } + if (!add) + zebra_evpn_es_send_del_to_client(es); + } + } +} + +/* walk the vlan bitmap associated with the zif and create or delete + * es_evis for all vlans associated with a VNI. + * XXX: This API is really expensive. optimize later if possible. + */ +static void zebra_evpn_es_setup_evis(struct zebra_evpn_es *es) +{ + struct zebra_if *zif = es->zif; + uint16_t vid; + struct zebra_evpn_access_bd *acc_bd; + + + bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { + acc_bd = zebra_evpn_acc_vl_find(vid); + if (acc_bd->zvni) + zebra_evpn_local_es_evi_add(es, acc_bd->zvni); + } +} + +static void zebra_evpn_es_local_mac_update(struct zebra_evpn_es *es, + bool force_clear_static) +{ + zebra_mac_t *mac; + struct listnode *node; + + for (ALL_LIST_ELEMENTS_RO(es->mac_list, node, mac)) { + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) { + zebra_vxlan_sync_mac_dp_install(mac, + false /* set_inactive */, + force_clear_static, __func__); + } + } +} + +static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es, + struct zebra_if *zif) +{ + if (es->flags & ZEBRA_EVPNES_LOCAL) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("local es %s add; nhg 0x%x if %s", + es->esi_str, es->nhg_id, zif->ifp->name); + + es->flags |= ZEBRA_EVPNES_LOCAL; + listnode_init(&es->local_es_listnode, es); + listnode_add(zmh_info->local_es_list, &es->local_es_listnode); + + /* attach es to interface */ + zif->es_info.es = es; + + /* attach interface to es */ + es->zif = zif; + if (if_is_operative(zif->ifp)) + es->flags |= ZEBRA_EVPNES_OPER_UP; + + /* setup base-vni if one doesn't already exist; the ES will get sent + * to BGP as a part of that process + */ + if (!zmh_info->es_base_vni) + zebra_evpn_es_get_one_base_vni(); + else + /* send notification to bgp */ + zebra_evpn_es_re_eval_send_to_client(es, + false /* es_evi_re_reval */); + + /* Setup ES-EVIs for all VxLAN stretched VLANs associated with + * the zif + */ + zebra_evpn_es_setup_evis(es); + /* if there any local macs referring to the ES as dest we + * need to set the static reference on them if the MAC is + * synced from an ES peer + */ + zebra_evpn_es_local_mac_update(es, + false /* force_clear_static */); +} + +static void zebra_evpn_es_local_info_clear(struct zebra_evpn_es *es) +{ + struct zebra_if *zif; + + if (!(es->flags & ZEBRA_EVPNES_LOCAL)) + return; + + es->flags &= ~ZEBRA_EVPNES_LOCAL; + /* if there any local macs referring to the ES as dest we + * need to clear the static reference on them + */ + zebra_evpn_es_local_mac_update(es, + true /* force_clear_static */); + + /* clear the es from the parent interface */ + zif = es->zif; + zif->es_info.es = NULL; + es->zif = NULL; + + /* remove from the ES list */ + list_delete_node(zmh_info->local_es_list, &es->local_es_listnode); + + /* free up the ES if there is no remote reference */ + zebra_evpn_es_free(es); +} + +/* Delete an ethernet segment and inform BGP */ +static void zebra_evpn_local_es_del(struct zebra_evpn_es *es) +{ + struct zebra_evpn_es_evi *es_evi; + struct listnode *node = NULL; + struct listnode *nnode = NULL; + struct zebra_if *zif; + + if (!CHECK_FLAG(es->flags, ZEBRA_EVPNES_LOCAL)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) { + zif = es->zif; + zlog_debug("local es %s del; nhg 0x%x if %s", + es->esi_str, es->nhg_id, + zif ? zif->ifp->name : "-"); + } + + /* remove all ES-EVIs associated with the ES */ + for (ALL_LIST_ELEMENTS(es->es_evi_list, node, nnode, es_evi)) + zebra_evpn_local_es_evi_do_del(es_evi); + + /* send a del if the ES had been sent to BGP earlier */ + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_del_to_client(es); + + zebra_evpn_es_local_info_clear(es); +} + +/* eval remote info associated with the ES */ +static void zebra_evpn_es_remote_info_re_eval(struct zebra_evpn_es *es) +{ + /* if there are remote VTEPs the ES-EVI is classified as "remote" */ + if (listcount(es->es_vtep_list)) { + if (!(es->flags & ZEBRA_EVPNES_REMOTE)) { + es->flags |= ZEBRA_EVPNES_REMOTE; + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s add; nhg 0x%x", + es->esi_str, es->nhg_id); + } + } else { + if (es->flags & ZEBRA_EVPNES_REMOTE) { + es->flags &= ~ZEBRA_EVPNES_REMOTE; + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s del; nhg 0x%x", + es->esi_str, es->nhg_id); + zebra_evpn_es_free(es); + } + } +} + +/* A new local es is created when a local-es-id and sysmac is configured + * against an interface. + */ +static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid, + struct ethaddr *sysmac) +{ + struct zebra_evpn_es *old_es = zif->es_info.es; + struct zebra_evpn_es *es; + esi_t esi; + int offset = 0; + int field_bytes = 0; + + /* Complete config of the ES-ID bootstraps the ES */ + if (!lid || is_zero_mac(sysmac)) { + /* if in ES is attached to zif delete it */ + if (old_es) + zebra_evpn_local_es_del(old_es); + return 0; + } + + /* build 10-byte type-3-ESI - + * Type(1-byte), MAC(6-bytes), ES-LID (3-bytes) + */ + field_bytes = 1; + esi.val[offset] = ESI_TYPE_MAC; + offset += field_bytes; + + field_bytes = ETH_ALEN; + memcpy(&esi.val[offset], (uint8_t *)sysmac, field_bytes); + offset += field_bytes; + + esi.val[offset++] = (uint8_t)(lid >> 16); + esi.val[offset++] = (uint8_t)(lid >> 8); + esi.val[offset++] = (uint8_t)lid; + + if (old_es && !memcmp(&old_es->esi, &esi, sizeof(esi_t))) + /* dup - nothing to be done */ + return 0; + + /* release the old_es against the zif */ + if (old_es) + zebra_evpn_local_es_del(old_es); + + es = zebra_evpn_es_find(&esi); + if (es) { + /* if it exists against another interface flag an error */ + if (es->zif && es->zif != zif) + return -1; + } else { + /* create new es */ + es = zebra_evpn_es_new(&esi); + } + + zebra_evpn_es_local_info_set(es, zif); + + return 0; +} + +static int zebra_evpn_remote_es_del(esi_t *esi, struct in_addr vtep_ip) +{ + char buf[ESI_STR_LEN]; + struct zebra_evpn_es *es; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s vtep %s del", + esi_to_str(esi, buf, sizeof(buf)), + inet_ntoa(vtep_ip)); + + es = zebra_evpn_es_find(esi); + if (!es) { + /* XXX - error log */ + return -1; + } + + zebra_evpn_es_vtep_del(es, vtep_ip); + zebra_evpn_es_remote_info_re_eval(es); + + return 0; +} + +/* force delete a remote ES on the way down */ +static void zebra_evpn_remote_es_flush(struct zebra_evpn_es *es) +{ + struct zebra_evpn_es_vtep *es_vtep; + struct listnode *node; + struct listnode *nnode; + + for (ALL_LIST_ELEMENTS(es->es_vtep_list, node, nnode, es_vtep)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s vtep %s flush", + es->esi_str, + inet_ntoa(es_vtep->vtep_ip)); + zebra_evpn_es_vtep_free(es_vtep); + zebra_evpn_es_remote_info_re_eval(es); + } +} + +static int zebra_evpn_remote_es_add(esi_t *esi, struct in_addr vtep_ip) +{ + char buf[ESI_STR_LEN]; + struct zebra_evpn_es *es; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("remote es %s vtep %s add", + esi_to_str(esi, buf, sizeof(buf)), + inet_ntoa(vtep_ip)); + + es = zebra_evpn_es_find(esi); + if (!es) { + es = zebra_evpn_es_new(esi); + if (!es) { + /* XXX - error log */ + return -1; + } + } + + zebra_evpn_es_vtep_add(es, vtep_ip); + zebra_evpn_es_remote_info_re_eval(es); + + return 0; +} + +void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct in_addr vtep_ip; + esi_t esi; + + if (!is_evpn_enabled()) { + zlog_debug( + "%s: EVPN not enabled yet we received a es_add zapi call", + __PRETTY_FUNCTION__); + return; + } + + memset(&esi, 0, sizeof(esi_t)); + s = msg; + + stream_get(&esi, s, sizeof(esi_t)); + vtep_ip.s_addr = stream_get_ipv4(s); + + if (hdr->command == ZEBRA_REMOTE_ES_VTEP_ADD) + zebra_evpn_remote_es_add(&esi, vtep_ip); + else + zebra_evpn_remote_es_del(&esi, vtep_ip); +} + +void zebra_evpn_es_mac_deref_entry(zebra_mac_t *mac) +{ + struct zebra_evpn_es *es = mac->es; + + mac->es = NULL; + if (!es) + return; + + list_delete_node(es->mac_list, &mac->es_listnode); + if (!listcount(es->mac_list)) + zebra_evpn_es_free(es); +} + +/* Associate a MAC entry with a local or remote ES. Returns false if there + * was no ES change. + */ +bool zebra_evpn_es_mac_ref_entry(zebra_mac_t *mac, struct zebra_evpn_es *es) +{ + if (mac->es == es) + return false; + + if (mac->es) + zebra_evpn_es_mac_deref_entry(mac); + + if (!es) + return true; + + mac->es = es; + listnode_init(&mac->es_listnode, mac); + listnode_add(es->mac_list, &mac->es_listnode); + + return true; +} + +bool zebra_evpn_es_mac_ref(zebra_mac_t *mac, esi_t *esi) +{ + struct zebra_evpn_es *es; + + es = zebra_evpn_es_find(esi); + if (!es) { + es = zebra_evpn_es_new(esi); + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("auto es %s add on mac ref", es->esi_str); + } + + return zebra_evpn_es_mac_ref_entry(mac, es); +} + +/* Inform BGP about local ES-EVI add or del */ +static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es, + zebra_vni_t *zvni, bool add) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); + /* BGP may not be running. */ + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, + add ? ZEBRA_LOCAL_ES_EVI_ADD : ZEBRA_LOCAL_ES_EVI_DEL, + zebra_vrf_get_evpn_id()); + stream_put(s, &es->esi, sizeof(esi_t)); + stream_putl(s, zvni->vni); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("send %s local es %s evi %u to %s", + add ? "add" : "del", + es->esi_str, zvni->vni, + zebra_route_string(client->proto)); + + client->local_es_add_cnt++; + return zserv_send_message(client, s); +} + +/* sysmac part of a local ESI has changed */ +static int zebra_evpn_es_sys_mac_update(struct zebra_if *zif, + struct ethaddr *sysmac) +{ + int rv; + + rv = zebra_evpn_local_es_update(zif, zif->es_info.lid, sysmac); + if (!rv) + memcpy(&zif->es_info.sysmac, sysmac, sizeof(struct ethaddr)); + + return rv; +} + +/* local-ID part of ESI has changed */ +static int zebra_evpn_es_lid_update(struct zebra_if *zif, uint32_t lid) +{ + int rv; + + rv = zebra_evpn_local_es_update(zif, lid, &zif->es_info.sysmac); + if (!rv) + zif->es_info.lid = lid; + + return rv; +} + +void zebra_evpn_es_cleanup(void) +{ + struct zebra_evpn_es *es; + struct zebra_evpn_es *es_next; + + RB_FOREACH_SAFE(es, zebra_es_rb_head, + &zmh_info->es_rb_tree, es_next) { + zebra_evpn_local_es_del(es); + zebra_evpn_remote_es_flush(es); + } +} + +/* Only certain types of access ports can be setup as an Ethernet Segment */ +bool zebra_evpn_is_if_es_capable(struct zebra_if *zif) +{ + if (zif->zif_type == ZEBRA_IF_BOND) + return true; + + /* XXX: allow swpX i.e. a regular ethernet port to be an ES link too */ + return false; +} + +void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif) +{ + char buf[ETHER_ADDR_STRLEN]; + + if (zif->es_info.lid || !is_zero_mac(&zif->es_info.sysmac)) + vty_out(vty, " EVPN MH: ES id %u ES sysmac %s\n", + zif->es_info.lid, + prefix_mac2str(&zif->es_info.sysmac, + buf, sizeof(buf))); +} + +void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up) +{ + struct zebra_evpn_es *es = zif->es_info.es; + bool old_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP); + + if (old_up == up) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s state changed to %s ", + es->esi_str, + up ? "up" : "down"); + if (up) + es->flags |= ZEBRA_EVPNES_OPER_UP; + else + es->flags &= ~ZEBRA_EVPNES_OPER_UP; + + /* inform BGP of the ES oper state change */ + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_add_to_client(es); +} + +static char *zebra_evpn_es_vtep_str(char *vtep_str, struct zebra_evpn_es *es, + uint8_t vtep_str_size) +{ + struct zebra_evpn_es_vtep *zvtep; + struct listnode *node; + bool first = true; + + vtep_str[0] = '\0'; + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, zvtep)) { + if (first) { + first = false; + strlcat(vtep_str, inet_ntoa(zvtep->vtep_ip), + vtep_str_size); + } else { + strlcat(vtep_str, ",", vtep_str_size); + strlcat(vtep_str, inet_ntoa(zvtep->vtep_ip), + vtep_str_size); + } + } + return vtep_str; +} + +static void zebra_evpn_es_show_entry(struct vty *vty, + struct zebra_evpn_es *es, json_object *json) +{ + char type_str[4]; + char vtep_str[ES_VTEP_LIST_STR_SZ]; + + if (json) { + /* XXX */ + } else { + type_str[0] = '\0'; + if (es->flags & ZEBRA_EVPNES_LOCAL) + strlcat(type_str, "L", sizeof(type_str)); + if (es->flags & ZEBRA_EVPNES_REMOTE) + strlcat(type_str, "R", sizeof(type_str)); + + zebra_evpn_es_vtep_str(vtep_str, es, sizeof(vtep_str)); + + vty_out(vty, "%-30s %-4s %-21s %s\n", + es->esi_str, type_str, + es->zif ? es->zif->ifp->name : "-", + vtep_str); + } +} + +static void zebra_evpn_es_show_entry_detail(struct vty *vty, + struct zebra_evpn_es *es, json_object *json) +{ + char type_str[80]; + struct zebra_evpn_es_vtep *zvtep; + struct listnode *node; + + if (json) { + /* XXX */ + } else { + type_str[0] = '\0'; + if (es->flags & ZEBRA_EVPNES_LOCAL) + strlcat(type_str, "Local", sizeof(type_str)); + if (es->flags & ZEBRA_EVPNES_REMOTE) { + if (strnlen(type_str, sizeof(type_str))) + strlcat(type_str, ",", sizeof(type_str)); + strlcat(type_str, "Remote", sizeof(type_str)); + } + + vty_out(vty, "ESI: %s\n", es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " Interface: %s\n", + (es->zif) ? + es->zif->ifp->name : "-"); + vty_out(vty, " State: %s\n", + (es->flags & ZEBRA_EVPNES_OPER_UP) ? + "up" : "down"); + vty_out(vty, " Ready for BGP: %s\n", + (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) ? + "yes" : "no"); + vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list)); + vty_out(vty, " MAC Count: %d\n", listcount(es->mac_list)); + vty_out(vty, " Nexthop group: 0x%x\n", es->nhg_id); + vty_out(vty, " VTEPs:\n"); + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, zvtep)) + vty_out(vty, " %s nh: 0x%x\n", + inet_ntoa(zvtep->vtep_ip), + zvtep->nh_id); + + vty_out(vty, "\n"); + } +} + +void zebra_evpn_es_show(struct vty *vty, bool uj) +{ + struct zebra_evpn_es *es; + json_object *json = NULL; + + if (uj) { + /* XXX */ + } else { + vty_out(vty, "Type: L local, R remote\n"); + vty_out(vty, "%-30s %-4s %-21s %s\n", + "ESI", "Type", "ES-IF", "VTEPs"); + } + + RB_FOREACH(es, zebra_es_rb_head, &zmh_info->es_rb_tree) + zebra_evpn_es_show_entry(vty, es, json); +} + +void zebra_evpn_es_show_detail(struct vty *vty, bool uj) +{ + struct zebra_evpn_es *es; + json_object *json = NULL; + + RB_FOREACH(es, zebra_es_rb_head, &zmh_info->es_rb_tree) + zebra_evpn_es_show_entry_detail(vty, es, json); +} + +void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi) +{ + struct zebra_evpn_es *es; + char esi_str[ESI_STR_LEN]; + json_object *json = NULL; + + es = zebra_evpn_es_find(esi); + + if (!es) { + esi_to_str(esi, esi_str, sizeof(esi_str)); + vty_out(vty, "ESI %s does not exist\n", esi_str); + return; + } + + zebra_evpn_es_show_entry_detail(vty, es, json); +} + +int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp) +{ + struct zebra_if *zif = ifp->info; + char buf[ETHER_ADDR_STRLEN]; + + if (zif->es_info.lid) + vty_out(vty, " evpn mh es-id %u\n", zif->es_info.lid); + + if (!is_zero_mac(&zif->es_info.sysmac)) + vty_out(vty, " evpn mh es-sys-mac %s\n", + prefix_mac2str(&zif->es_info.sysmac, + buf, sizeof(buf))); + return 0; +} + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_evpn_mh_clippy.c" +#endif +/* CLI for setting up sysmac part of ESI on an access port */ +DEFPY(zebra_evpn_es_sys_mac, + zebra_evpn_es_sys_mac_cmd, + "[no$no] evpn mh es-sys-mac [X:X:X:X:X:X$mac]", + NO_STR + "EVPN\n" + EVPN_MH_VTY_STR + "Ethernet segment system MAC\n" + MAC_STR +) +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif; + int ret = 0; + + zif = ifp->info; + + if (no) { + static struct ethaddr zero_mac; + + ret = zebra_evpn_es_sys_mac_update(zif, &zero_mac); + if (ret == -1) { + vty_out(vty, "%%Failed to clear ES sysmac\n"); + return CMD_WARNING; + } + } else { + + if (!zebra_evpn_is_if_es_capable(zif)) { + vty_out(vty, + "%%ESI cannot be associated with this interface type\n"); + return CMD_WARNING; + } + + if (!mac || is_zero_mac(&mac->eth_addr)) { + vty_out(vty, "%%ES sysmac value is invalid\n"); + return CMD_WARNING; + } + + ret = zebra_evpn_es_sys_mac_update(zif, &mac->eth_addr); + if (ret == -1) { + vty_out(vty, "%%ESI already exists on a different interface\n"); + return CMD_WARNING; + } + } + return CMD_SUCCESS; +} + +/* CLI for setting up local-ID part of ESI on an access port */ +DEFPY(zebra_evpn_es_id, + zebra_evpn_es_id_cmd, + "[no$no] evpn mh es-id [(1-16777215)$es_lid]", + NO_STR + "EVPN\n" + EVPN_MH_VTY_STR + "Ethernet segment local identifier\n" + "ID\n" +) +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif; + int ret; + + zif = ifp->info; + + if (no) { + ret = zebra_evpn_es_lid_update(zif, 0); + if (ret == -1) { + vty_out(vty, "%%Failed to clear ES local id\n"); + return CMD_WARNING; + } + } else { + if (!zebra_evpn_is_if_es_capable(zif)) { + vty_out(vty, + "%%ESI cannot be associated with this interface type\n"); + return CMD_WARNING; + } + + if (!es_lid) { + vty_out(vty, "%%Specify local ES ID\n"); + return CMD_WARNING; + } + ret = zebra_evpn_es_lid_update(zif, es_lid); + if (ret == -1) { + vty_out(vty, + "%%ESI already exists on a different interface\n"); + return CMD_WARNING; + } + } + return CMD_SUCCESS; +} + +/*****************************************************************************/ +/* A base L2-VNI is maintained to derive parameters such as ES originator-IP. + * XXX: once single vxlan device model becomes available this will not be + * necessary + */ +/* called when a new vni is added or becomes oper up or becomes a bridge port */ +void zebra_evpn_es_set_base_vni(zebra_vni_t *zvni) +{ + struct listnode *node; + struct zebra_evpn_es *es; + + if (zmh_info->es_base_vni) { + if (zmh_info->es_base_vni != zvni) { + /* unrelated VNI; ignore it */ + return; + } + /* check if the local vtep-ip has changed */ + } else { + /* check if the VNI can be used as base VNI */ + if (!zebra_evpn_vni_send_to_client_ok(zvni)) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es base vni set to %d", + zvni->vni); + zmh_info->es_base_vni = zvni; + } + + /* update local VTEP-IP */ + if (zmh_info->es_originator_ip.s_addr == + zmh_info->es_base_vni->local_vtep_ip.s_addr) + return; + + zmh_info->es_originator_ip.s_addr = + zmh_info->es_base_vni->local_vtep_ip.s_addr; + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es originator ip set to %s", + inet_ntoa(zmh_info->es_base_vni->local_vtep_ip)); + + /* if originator ip changes we need to update bgp */ + for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) { + if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) + zebra_evpn_es_send_add_to_client(es); + else + zebra_evpn_es_re_eval_send_to_client(es, + true /* es_evi_re_reval */); + } +} + +/* called when a vni is removed or becomes oper down or is removed from a + * bridge + */ +void zebra_evpn_es_clear_base_vni(zebra_vni_t *zvni) +{ + struct listnode *node; + struct zebra_evpn_es *es; + + if (zmh_info->es_base_vni != zvni) + return; + + zmh_info->es_base_vni = NULL; + /* lost current base VNI; try to find a new one */ + zebra_evpn_es_get_one_base_vni(); + + /* couldn't locate an eligible base vni */ + if (!zmh_info->es_base_vni && zmh_info->es_originator_ip.s_addr) { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es originator ip cleared"); + + zmh_info->es_originator_ip.s_addr = 0; + /* lost originator ip */ + for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) { + zebra_evpn_es_re_eval_send_to_client(es, + true /* es_evi_re_reval */); + } + } +} + +/* Locate an "eligible" L2-VNI to follow */ +static int zebra_evpn_es_get_one_base_vni_cb(struct hash_bucket *b, void *data) +{ + zebra_vni_t *zvni = b->data; + + zebra_evpn_es_set_base_vni(zvni); + + if (zmh_info->es_base_vni) + return HASHWALK_ABORT; + + return HASHWALK_CONTINUE; +} + +/* locate a base_vni to follow for the purposes of common params like + * originator IP + */ +static void zebra_evpn_es_get_one_base_vni(void) +{ + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + hash_walk(zvrf->vni_table, zebra_evpn_es_get_one_base_vni_cb, NULL); +} + +/*****************************************************************************/ +void zebra_evpn_mh_config_write(struct vty *vty) +{ + if (zmh_info->mac_hold_time != EVPN_MH_MAC_HOLD_TIME_DEF) + vty_out(vty, "evpn mh mac-holdtime %ld\n", + zmh_info->mac_hold_time); + + if (zmh_info->neigh_hold_time != EVPN_MH_NEIGH_HOLD_TIME_DEF) + vty_out(vty, "evpn mh neigh-holdtime %ld\n", + zmh_info->neigh_hold_time); +} + +int zebra_evpn_mh_neigh_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default) +{ + if (set_default) + zmh_info->neigh_hold_time = EVPN_MH_NEIGH_HOLD_TIME_DEF; + + zmh_info->neigh_hold_time = duration; + + return 0; +} + +int zebra_evpn_mh_mac_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default) +{ + if (set_default) + duration = EVPN_MH_MAC_HOLD_TIME_DEF; + + zmh_info->mac_hold_time = duration; + + return 0; +} + +void zebra_evpn_interface_init(void) +{ + install_element(INTERFACE_NODE, &zebra_evpn_es_id_cmd); + install_element(INTERFACE_NODE, &zebra_evpn_es_sys_mac_cmd); +} + +void zebra_evpn_mh_init(void) +{ + zrouter.mh_info = XCALLOC(MTYPE_ZMH_INFO, sizeof(*zrouter.mh_info)); + + zmh_info->mac_hold_time = EVPN_MH_MAC_HOLD_TIME_DEF; + zmh_info->neigh_hold_time = EVPN_MH_NEIGH_HOLD_TIME_DEF; + /* setup ES tables */ + RB_INIT(zebra_es_rb_head, &zmh_info->es_rb_tree); + zmh_info->local_es_list = list_new(); + listset_app_node_mem(zmh_info->local_es_list); + + bf_init(zmh_info->nh_id_bitmap, EVPN_NH_ID_MAX); + bf_assign_zero_index(zmh_info->nh_id_bitmap); + + /* setup broadcast domain tables */ + zmh_info->evpn_vlan_table = hash_create(zebra_evpn_acc_vl_hash_keymake, + zebra_evpn_acc_vl_cmp, "access VLAN hash table"); +} + +void zebra_evpn_mh_terminate(void) +{ + list_delete(&zmh_info->local_es_list); + + hash_iterate(zmh_info->evpn_vlan_table, + zebra_evpn_acc_vl_cleanup_all, NULL); + hash_free(zmh_info->evpn_vlan_table); +} diff --git a/zebra/zebra_evpn_mh.h b/zebra/zebra_evpn_mh.h new file mode 100644 index 0000000000..46c25a04bc --- /dev/null +++ b/zebra/zebra_evpn_mh.h @@ -0,0 +1,239 @@ +/* + * Zebra EVPN MH Data structures and definitions + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ZEBRA_EVPN_MH_H +#define _ZEBRA_EVPN_MH_H + +#include <zebra.h> + +#include "if.h" +#include "linklist.h" +#include "bitfield.h" +#include "zebra_vxlan.h" +#include "zebra_vxlan_private.h" + +#define EVPN_MH_VTY_STR "Multihoming\n" + +/* Ethernet Segment entry - + * - Local and remote ESs are maintained in a global RB tree, + * zmh_info->es_rb_tree using ESI as key + * - Local ESs are added via zebra config (ZEBRA_EVPNES_LOCAL) when an + * access port is associated with an ES-ID + * - Remotes ESs are added by BGP based on received/remote EAD/Type-1 routes + * (ZEBRA_EVPNES_REMOTE) + * - An ES can be simulatenously LOCAL and REMOTE; infact all LOCAL ESs are + * expected to have REMOTE ES peers. + */ +struct zebra_evpn_es { + esi_t esi; + char esi_str[ESI_STR_LEN]; + + /* ES flags */ + uint32_t flags; +#define ZEBRA_EVPNES_LOCAL (1 << 0) /* configured in zebra */ +#define ZEBRA_EVPNES_REMOTE (1 << 1) /* added by bgp */ +#define ZEBRA_EVPNES_OPER_UP (1 << 2) /* es->ifp is oper-up */ +#define ZEBRA_EVPNES_READY_FOR_BGP (1 << 3) /* ready to be sent to BGP */ +#define ZEBRA_EVPNES_NHG_ACTIVE (1 << 4) /* NHG has been installed */ + + /* memory used for adding the es to zmh_info->es_rb_tree */ + RB_ENTRY(zebra_evpn_es) rb_node; + + /* [EVPNES_LOCAL] memory used for linking the es to + * zmh_info->local_es_list + */ + struct listnode local_es_listnode; + + /* [EVPNES_LOCAL] corresponding interface */ + struct zebra_if *zif; + + /* list of ES-EVIs associated with the ES */ + struct list *es_evi_list; + + /* [!EVPNES_LOCAL] List of remote VTEPs (zebra_evpn_es_vtep) */ + struct list *es_vtep_list; + + /* list of zebra_mac entries using this ES as destination */ + struct list *mac_list; + + /* Nexthop group id */ + uint32_t nhg_id; +}; +RB_HEAD(zebra_es_rb_head, zebra_evpn_es); +RB_PROTOTYPE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp); + +/* ES per-EVI info + * - ES-EVIs are maintained per-VNI (vni->es_evi_rb_tree) + * - Local ES-EVIs are linked to per-VNI list for quick access + * - Although some infrastucture is present for remote ES-EVIs, currently + * BGP does NOT send remote ES-EVIs to zebra. This may change in the + * future (but must be changed thoughtfully and only if needed as ES-EVI + * can get prolific and come in the way of rapid failovers) + */ +struct zebra_evpn_es_evi { + struct zebra_evpn_es *es; + zebra_vni_t *zvni; + + /* ES-EVI flags */ + uint32_t flags; + /* local ES-EVI */ +#define ZEBRA_EVPNES_EVI_LOCAL (1 << 0) /* created by zebra */ +#define ZEBRA_EVPNES_EVI_READY_FOR_BGP (1 << 1) /* ready to be sent to BGP */ + + /* memory used for adding the es_evi to + * es_evi->zvni->es_evi_rb_tree + */ + RB_ENTRY(zebra_evpn_es_evi) rb_node; + /* memory used for linking the es_evi to + * es_evi->zvni->local_es_evi_list + */ + struct listnode l2vni_listnode; + /* memory used for linking the es_evi to + * es_evi->es->es_evi_list + */ + struct listnode es_listnode; +}; + +/* PE attached to an ES */ +struct zebra_evpn_es_vtep { + struct zebra_evpn_es *es; /* parent ES */ + struct in_addr vtep_ip; + + /* memory used for adding the entry to es->es_vtep_list */ + struct listnode es_listnode; + + /* MAC nexthop */ + uint32_t nh_id; + + /* XXX - maintain a backpointer to zebra_vtep_t */ +}; + +/* Local/access-side broadcast domain - zebra_evpn_access_bd is added to - + * zrouter->evpn_vlan_table (for VLAN aware bridges) OR + * zrouter->evpn_bridge_table (for VLAN unaware bridges) + * XXX - support for VLAN unaware bridges is yet to be flushed out + */ +struct zebra_evpn_access_bd { + vlanid_t vid; + + struct zebra_if *vxlan_zif; /* vxlan device */ + /* list of members associated with the BD i.e. (potential) ESs */ + struct list *mbr_zifs; + /* presence of zvni activates the EVI on all the ESs in mbr_zifs */ + zebra_vni_t *zvni; +}; + +/* multihoming information stored in zrouter */ +#define zmh_info (zrouter.mh_info) +struct zebra_evpn_mh_info { + /* RB tree of Ethernet segments (used for EVPN-MH) */ + struct zebra_es_rb_head es_rb_tree; + /* List of local ESs */ + struct list *local_es_list; + + /* EVPN MH broadcast domains indexed by the VID */ + struct hash *evpn_vlan_table; + + /* A base L2-VNI is maintained to derive parameters such as + * ES originator-IP. + * XXX: once single vxlan device model becomes available this will + * not be necessary + */ + zebra_vni_t *es_base_vni; + struct in_addr es_originator_ip; + + /* L2 NH and NHG ids - + * Most significant 8 bits is type. Lower 24 bits is the value + * allocated from the nh_id_bitmap. + */ + bitfield_t nh_id_bitmap; +#define EVPN_NH_ID_MAX (16*1024) +#define EVPN_NH_ID_VAL_MASK 0xffffff +#define EVPN_NH_ID_TYPE_POS 24 +/* The purpose of using different types for NHG and NH is NOT to manage the + * id space separately. It is simply to make debugging easier. + */ +#define EVPN_NH_ID_TYPE_BIT (1 << EVPN_NH_ID_TYPE_POS) +#define EVPN_NHG_ID_TYPE_BIT (2 << EVPN_NH_ID_TYPE_POS) + + /* XXX - re-visit the default hold timer value */ +#define EVPN_MH_MAC_HOLD_TIME_DEF (18 * 60) + long mac_hold_time; +#define EVPN_MH_NEIGH_HOLD_TIME_DEF (18 * 60) + long neigh_hold_time; +}; + +static inline bool zebra_evpn_mac_is_es_local(zebra_mac_t *mac) +{ + return mac->es && (mac->es->flags & ZEBRA_EVPNES_LOCAL); +} + +/* Returns true if the id is of L2-NHG or L2-NH type */ +static inline bool zebra_evpn_mh_is_fdb_nh(uint32_t id) +{ + return ((id & EVPN_NHG_ID_TYPE_BIT) || + (id & EVPN_NH_ID_TYPE_BIT)); +} + +/*****************************************************************************/ +extern esi_t *zero_esi; +extern void zebra_evpn_mh_init(void); +extern void zebra_evpn_mh_terminate(void); +extern bool zebra_evpn_is_if_es_capable(struct zebra_if *zif); +extern void zebra_evpn_if_init(struct zebra_if *zif); +extern void zebra_evpn_if_cleanup(struct zebra_if *zif); +extern void zebra_evpn_vni_es_init(zebra_vni_t *zvni); +extern void zebra_evpn_vni_es_cleanup(zebra_vni_t *zvni); +extern void zebra_evpn_vxl_vni_set(struct zebra_if *zif, zebra_vni_t *zvni, + bool set); +extern void zebra_evpn_es_set_base_vni(zebra_vni_t *zvni); +extern void zebra_evpn_es_clear_base_vni(zebra_vni_t *zvni); +extern void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif); +extern void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif); +extern void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif); +extern void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif); +extern void zebra_evpn_es_send_all_to_client(bool add); +extern void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up); +extern void zebra_evpn_es_show(struct vty *vty, bool uj); +extern void zebra_evpn_es_show_detail(struct vty *vty, bool uj); +extern void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi); +extern void zebra_evpn_vni_update_all_es(zebra_vni_t *zvni); +extern void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS); +extern void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail); +extern void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, + vni_t vni, int detail); +extern void zebra_evpn_es_mac_deref_entry(zebra_mac_t *mac); +extern bool zebra_evpn_es_mac_ref_entry(zebra_mac_t *mac, + struct zebra_evpn_es *es); +extern bool zebra_evpn_es_mac_ref(zebra_mac_t *mac, esi_t *esi); +extern struct zebra_evpn_es *zebra_evpn_es_find(esi_t *esi); +extern void zebra_evpn_interface_init(void); +extern int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp); +extern void zebra_evpn_acc_vl_show(struct vty *vty, bool uj); +extern void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj); +extern void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid); +extern void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif); +extern void zebra_evpn_es_cleanup(void); +extern int zebra_evpn_mh_mac_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default); +void zebra_evpn_mh_config_write(struct vty *vty); +int zebra_evpn_mh_neigh_holdtime_update(struct vty *vty, + uint32_t duration, bool set_default); + +#endif /* _ZEBRA_EVPN_MH_H */ diff --git a/zebra/zebra_l2.c b/zebra/zebra_l2.c index 4c0cc62fbf..1758c8f96a 100644 --- a/zebra/zebra_l2.c +++ b/zebra/zebra_l2.c @@ -43,6 +43,7 @@ #include "zebra/rt_netlink.h" #include "zebra/zebra_l2.h" #include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" /* definitions */ @@ -53,13 +54,7 @@ static void map_slaves_to_bridge(struct interface *br_if, int link) { struct vrf *vrf; struct interface *ifp; - struct zebra_vrf *zvrf; - struct zebra_ns *zns; - zvrf = zebra_vrf_lookup_by_id(br_if->vrf_id); - assert(zvrf); - zns = zvrf->zns; - assert(zns); RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { FOR_ALL_INTERFACES (vrf, ifp) { struct zebra_if *zif; @@ -78,8 +73,7 @@ static void map_slaves_to_bridge(struct interface *br_if, int link) br_slave = &zif->brslave_info; if (link) { - if (br_slave->bridge_ifindex == br_if->ifindex && - br_slave->ns_id == zns->ns_id) + if (br_slave->bridge_ifindex == br_if->ifindex) br_slave->br_if = br_if; } else { if (br_slave->br_if == br_if) @@ -90,14 +84,12 @@ static void map_slaves_to_bridge(struct interface *br_if, int link) } /* Public functions */ -void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave, - struct zebra_ns *zns) +void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave) { struct interface *br_if; /* TODO: Handle change of master */ - assert(zns); - br_if = if_lookup_by_index_per_ns(zebra_ns_lookup(zns->ns_id), + br_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), br_slave->bridge_ifindex); if (br_if) br_slave->br_if = br_if; @@ -119,7 +111,7 @@ void zebra_l2_map_slave_to_bond(struct zebra_l2info_bondslave *bond_slave, bond_slave->bond_if = bond_if; else bond_slave->bond_if = if_create_ifindex(bond_slave->bond_ifindex, - vrf_id, NULL); + vrf_id); } void zebra_l2_unmap_slave_from_bond(struct zebra_l2info_bondslave *bond_slave) @@ -191,6 +183,7 @@ void zebra_l2_vxlanif_add_update(struct interface *ifp, if (add) { memcpy(&zif->l2info.vxl, vxlan_info, sizeof(*vxlan_info)); + zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif); zebra_vxlan_if_add(ifp); return; } @@ -229,6 +222,9 @@ void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp, return; zif->l2info.vxl.access_vlan = access_vlan; + + zebra_evpn_vl_vxl_deref(old_access_vlan, zif); + zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif); zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_VLAN_CHANGE); } @@ -237,6 +233,12 @@ void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp, */ void zebra_l2_vxlanif_del(struct interface *ifp) { + struct zebra_if *zif; + + zif = ifp->info; + assert(zif); + + zebra_evpn_vl_vxl_deref(zif->l2info.vxl.access_vlan, zif); zebra_vxlan_if_del(ifp); } @@ -246,32 +248,23 @@ void zebra_l2_vxlanif_del(struct interface *ifp) * from a bridge before it can be mapped to another bridge. */ void zebra_l2if_update_bridge_slave(struct interface *ifp, - ifindex_t bridge_ifindex, - ns_id_t ns_id) + ifindex_t bridge_ifindex) { struct zebra_if *zif; ifindex_t old_bridge_ifindex; - ns_id_t old_ns_id; - struct zebra_vrf *zvrf; zif = ifp->info; assert(zif); - zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id); - if (!zvrf) - return; - old_bridge_ifindex = zif->brslave_info.bridge_ifindex; - old_ns_id = zif->brslave_info.ns_id; - if (old_bridge_ifindex == bridge_ifindex && - old_ns_id == zif->brslave_info.ns_id) + if (old_bridge_ifindex == bridge_ifindex) return; - zif->brslave_info.ns_id = ns_id; zif->brslave_info.bridge_ifindex = bridge_ifindex; + /* Set up or remove link with master */ if (bridge_ifindex != IFINDEX_INTERNAL) { - zebra_l2_map_slave_to_bridge(&zif->brslave_info, zvrf->zns); + zebra_l2_map_slave_to_bridge(&zif->brslave_info); /* In the case of VxLAN, invoke the handler for EVPN. */ if (zif->zif_type == ZEBRA_IF_VXLAN) zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_MASTER_CHANGE); @@ -307,3 +300,43 @@ void zebra_l2if_update_bond_slave(struct interface *ifp, ifindex_t bond_ifindex) else if (old_bond_ifindex != IFINDEX_INTERNAL) zebra_l2_unmap_slave_from_bond(&zif->bondslave_info); } + +void zebra_vlan_bitmap_compute(struct interface *ifp, + uint32_t vid_start, uint16_t vid_end) +{ + uint32_t vid; + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + assert(zif); + + for (vid = vid_start; vid <= vid_end; ++vid) + bf_set_bit(zif->vlan_bitmap, vid); +} + +void zebra_vlan_mbr_re_eval(struct interface *ifp, bitfield_t old_vlan_bitmap) +{ + uint32_t vid; + struct zebra_if *zif; + + zif = (struct zebra_if *)ifp->info; + assert(zif); + + if (!bf_cmp(zif->vlan_bitmap, old_vlan_bitmap)) + /* no change */ + return; + + bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { + /* if not already set create new reference */ + if (!bf_test_index(old_vlan_bitmap, vid)) + zebra_evpn_vl_mbr_ref(vid, zif); + + /* also clear from the old vlan bitmap */ + bf_release_index(old_vlan_bitmap, vid); + } + + /* any bits remaining in the old vlan bitmap are stale references */ + bf_for_each_set_bit(old_vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { + zebra_evpn_vl_mbr_deref(vid, zif); + } +} diff --git a/zebra/zebra_l2.h b/zebra/zebra_l2.h index a3c780ee09..2735d915ec 100644 --- a/zebra/zebra_l2.h +++ b/zebra/zebra_l2.h @@ -37,7 +37,6 @@ extern "C" { struct zebra_l2info_brslave { ifindex_t bridge_ifindex; /* Bridge Master */ struct interface *br_if; /* Pointer to master */ - ns_id_t ns_id; /* network namespace where bridge is */ }; /* zebra L2 interface information - bridge interface */ @@ -82,8 +81,7 @@ union zebra_l2if_info { #define IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zif) ((zif)->l2info.br.vlan_aware == 1) -extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave, - struct zebra_ns *zns); +extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave); extern void zebra_l2_unmap_slave_from_bridge(struct zebra_l2info_brslave *br_slave); extern void @@ -103,11 +101,14 @@ extern void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp, vlanid_t access_vlan); extern void zebra_l2_vxlanif_del(struct interface *ifp); extern void zebra_l2if_update_bridge_slave(struct interface *ifp, - ifindex_t bridge_ifindex, - ns_id_t ns_id); + ifindex_t bridge_ifindex); extern void zebra_l2if_update_bond_slave(struct interface *ifp, ifindex_t bond_ifindex); +extern void zebra_vlan_bitmap_compute(struct interface *ifp, + uint32_t vid_start, uint16_t vid_end); +extern void zebra_vlan_mbr_re_eval(struct interface *ifp, + bitfield_t vlan_bitmap); #ifdef __cplusplus } diff --git a/zebra/zebra_memory.c b/zebra/zebra_memory.c index a9c2c5fe58..da8121774e 100644 --- a/zebra/zebra_memory.c +++ b/zebra/zebra_memory.c @@ -28,3 +28,5 @@ DEFINE_MGROUP(ZEBRA, "zebra") DEFINE_MTYPE(ZEBRA, RE, "Route Entry") DEFINE_MTYPE(ZEBRA, RIB_DEST, "RIB destination") +DEFINE_MTYPE(ZEBRA, ZVLAN, "VLAN") +DEFINE_MTYPE(ZEBRA, ZVLAN_BITMAP, "VLAN bitmap") diff --git a/zebra/zebra_netns_id.c b/zebra/zebra_netns_id.c index 79121bb086..8de4daf439 100644 --- a/zebra/zebra_netns_id.c +++ b/zebra/zebra_netns_id.c @@ -159,34 +159,27 @@ static ns_id_t extract_nsid(struct nlmsghdr *nlh, char *buf) return ns_id; } -/* fd_param = -1 is ignored. - * netnspath set to null is ignored. - * one of the 2 params is mandatory. netnspath is looked in priority - */ -ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) +ns_id_t zebra_ns_id_get(const char *netnspath) { int ns_id = -1; struct sockaddr_nl snl; - int fd = -1, sock, ret; + int fd, sock, ret; unsigned int seq; ns_id_t return_nsid = NS_UNKNOWN; /* netns path check */ - if (!netnspath && fd_param == -1) + if (!netnspath) return NS_UNKNOWN; - if (netnspath) { - fd = open(netnspath, O_RDONLY); - if (fd == -1) - return NS_UNKNOWN; - } else if (fd_param != -1) - fd = fd_param; + fd = open(netnspath, O_RDONLY); + if (fd == -1) + return NS_UNKNOWN; + /* netlink socket */ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock < 0) { flog_err_sys(EC_LIB_SOCKET, "netlink( %u) socket() error: %s", sock, safe_strerror(errno)); - if (fd_param == -1) - close(fd); + close(fd); return NS_UNKNOWN; } memset(&snl, 0, sizeof(snl)); @@ -199,8 +192,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) "netlink( %u) socket() bind error: %s", sock, safe_strerror(errno)); close(sock); - if (fd_param == -1) - close(fd); + close(fd); return NS_UNKNOWN; } @@ -222,8 +214,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) ret = send_receive(sock, nlh, seq, buf); if (ret < 0) { close(sock); - if (fd_param == -1) - close(fd); + close(fd); return NS_UNKNOWN; } nlh = (struct nlmsghdr *)buf; @@ -267,8 +258,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) "netlink( %u) recvfrom() error 2 when reading: %s", fd, safe_strerror(errno)); close(sock); - if (fd_param == -1) - close(fd); + close(fd); if (errno == ENOTSUP) { zlog_debug("NEWNSID locally generated"); return zebra_ns_id_get_fallback(netnspath); @@ -289,8 +279,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) ret = send_receive(sock, nlh, seq, buf); if (ret < 0) { close(sock); - if (fd_param == -1) - close(fd); + close(fd); return NS_UNKNOWN; } nlh = (struct nlmsghdr *)buf; @@ -321,18 +310,16 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) } while (len != 0 && ret == 0); } - if (fd_param == -1) - close(fd); + close(fd); close(sock); return return_nsid; } #else -ns_id_t zebra_ns_id_get(const char *netnspath, int fd __attribute__ ((unused))) +ns_id_t zebra_ns_id_get(const char *netnspath) { return zebra_ns_id_get_fallback(netnspath); } - #endif /* ! defined(HAVE_NETLINK) */ #ifdef HAVE_NETNS @@ -368,7 +355,7 @@ ns_id_t zebra_ns_id_get_default(void) return NS_DEFAULT_INTERNAL; } close(fd); - return zebra_ns_id_get((char *)NS_DEFAULT_NAME, -1); + return zebra_ns_id_get((char *)NS_DEFAULT_NAME); #else /* HAVE_NETNS */ return NS_DEFAULT_INTERNAL; #endif /* !HAVE_NETNS */ diff --git a/zebra/zebra_netns_id.h b/zebra/zebra_netns_id.h index dd9eab18e0..7a5f6851f4 100644 --- a/zebra/zebra_netns_id.h +++ b/zebra/zebra_netns_id.h @@ -24,7 +24,7 @@ extern "C" { #endif -extern ns_id_t zebra_ns_id_get(const char *netnspath, int fd); +extern ns_id_t zebra_ns_id_get(const char *netnspath); extern ns_id_t zebra_ns_id_get_default(void); #ifdef __cplusplus diff --git a/zebra/zebra_netns_notify.c b/zebra/zebra_netns_notify.c index 995fa6fb5a..ec7681bf23 100644 --- a/zebra/zebra_netns_notify.c +++ b/zebra/zebra_netns_notify.c @@ -72,14 +72,13 @@ static void zebra_ns_notify_create_context_from_entry_name(const char *name) char *netnspath = ns_netns_pathname(NULL, name); struct vrf *vrf; int ret; - ns_id_t ns_id, ns_id_external, ns_id_relative = NS_UNKNOWN; - struct ns *default_ns; + ns_id_t ns_id, ns_id_external; if (netnspath == NULL) return; frr_with_privs(&zserv_privs) { - ns_id = zebra_ns_id_get(netnspath, -1); + ns_id = zebra_ns_id_get(netnspath); } if (ns_id == NS_UNKNOWN) return; @@ -98,21 +97,9 @@ static void zebra_ns_notify_create_context_from_entry_name(const char *name) ns_map_nsid_with_external(ns_id, false); return; } - - default_ns = ns_get_default(); - - /* force kernel ns_id creation in that new vrf */ - frr_with_privs(&zserv_privs) { - ns_switch_to_netns(netnspath); - ns_id_relative = zebra_ns_id_get(NULL, default_ns->fd); - ns_switchback_to_initial(); - } - frr_with_privs(&zserv_privs) { ret = vrf_netns_handler_create(NULL, vrf, netnspath, - ns_id_external, - ns_id, - ns_id_relative); + ns_id_external, ns_id); } if (ret != CMD_SUCCESS) { flog_warn(EC_ZEBRA_NS_VRF_CREATION_FAILED, diff --git a/zebra/zebra_ns.c b/zebra/zebra_ns.c index 6462daf687..4e51437337 100644 --- a/zebra/zebra_ns.c +++ b/zebra/zebra_ns.c @@ -153,25 +153,20 @@ static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete) /* During zebra shutdown, do partial cleanup while the async dataplane * is still running. */ -int zebra_ns_early_shutdown(struct ns *ns, - void *param_in __attribute__((unused)), - void **param_out __attribute__((unused))) +int zebra_ns_early_shutdown(struct ns *ns) { struct zebra_ns *zns = ns->info; if (zns == NULL) return 0; - zebra_ns_disable_internal(zns, false); - return NS_WALK_CONTINUE; + return zebra_ns_disable_internal(zns, false); } /* During zebra shutdown, do final cleanup * after all dataplane work is complete. */ -int zebra_ns_final_shutdown(struct ns *ns, - void *param_in __attribute__((unused)), - void **param_out __attribute__((unused))) +int zebra_ns_final_shutdown(struct ns *ns) { struct zebra_ns *zns = ns->info; @@ -180,7 +175,7 @@ int zebra_ns_final_shutdown(struct ns *ns, kernel_terminate(zns, true); - return NS_WALK_CONTINUE; + return 0; } int zebra_ns_init(const char *optional_default_name) @@ -188,16 +183,12 @@ int zebra_ns_init(const char *optional_default_name) struct ns *default_ns; ns_id_t ns_id; ns_id_t ns_id_external; - struct ns *ns; frr_with_privs(&zserv_privs) { ns_id = zebra_ns_id_get_default(); } ns_id_external = ns_map_nsid_with_external(ns_id, true); ns_init_management(ns_id_external, ns_id); - ns = ns_get_default(); - if (ns) - ns->relative_default_ns = ns_id; default_ns = ns_lookup(ns_get_default_id()); if (!default_ns) { diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h index f7d1f40782..dc79a83db0 100644 --- a/zebra/zebra_ns.h +++ b/zebra/zebra_ns.h @@ -67,12 +67,9 @@ struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id); int zebra_ns_init(const char *optional_default_name); int zebra_ns_enable(ns_id_t ns_id, void **info); int zebra_ns_disabled(struct ns *ns); -int zebra_ns_early_shutdown(struct ns *ns, - void *param_in __attribute__((unused)), - void **param_out __attribute__((unused))); -int zebra_ns_final_shutdown(struct ns *ns, - void *param_in __attribute__((unused)), - void **param_out __attribute__((unused))); +int zebra_ns_early_shutdown(struct ns *ns); +int zebra_ns_final_shutdown(struct ns *ns); + int zebra_ns_config_write(struct vty *vty, struct ns *ns); #ifdef __cplusplus diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h index 863c5fa71c..f73a8f2d59 100644 --- a/zebra/zebra_router.h +++ b/zebra/zebra_router.h @@ -125,6 +125,12 @@ struct zebra_router { /* L3-VNI hash table (for EVPN). Only in default instance */ struct hash *l3vni_table; + /* Tables and other global info maintained for EVPN multihoming */ + struct zebra_evpn_mh_info *mh_info; + + /* EVPN MH broadcast domains indexed by the VID */ + struct hash *evpn_vlan_table; + struct hash *rules_hash; struct hash *ipset_hash; diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 2ca57f1c56..2ea04eee2e 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -44,6 +44,7 @@ #include "zebra/zebra_routemap.h" #include "lib/json.h" #include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h" #ifndef VTYSH_EXTRACT_PL #include "zebra/zebra_vty_clippy.c" #endif @@ -2516,6 +2517,81 @@ DEFUN (show_evpn_global, return CMD_SUCCESS; } +DEFPY(show_evpn_es, + show_evpn_es_cmd, + "show evpn es [NAME$esi_str] [json$json] [detail$detail]", + SHOW_STR + "EVPN\n" + "Ethernet Segment\n" + "ES ID\n" + JSON_STR + "Detailed information\n") +{ + esi_t esi; + bool uj = !!json; + + if (esi_str) { + if (!str_to_esi(esi_str, &esi)) { + vty_out(vty, "%% Malformed ESI\n"); + return CMD_WARNING; + } + zebra_evpn_es_show_esi(vty, uj, &esi); + } else { + if (detail) + zebra_evpn_es_show_detail(vty, uj); + else + zebra_evpn_es_show(vty, uj); + } + + return CMD_SUCCESS; +} + +DEFPY(show_evpn_es_evi, + show_evpn_es_evi_cmd, + "show evpn es-evi [vni (1-16777215)$vni] [json$json] [detail$detail]", + SHOW_STR + "EVPN\n" + "Ethernet Segment per EVI\n" + "VxLAN Network Identifier\n" + "VNI\n" + JSON_STR + "Detailed information\n") +{ + bool uj = !!json; + bool ud = !!detail; + + if (vni) + zebra_evpn_es_evi_show_vni(vty, uj, vni, ud); + else + zebra_evpn_es_evi_show(vty, uj, ud); + + return CMD_SUCCESS; +} + +DEFPY(show_evpn_access_vlan, + show_evpn_access_vlan_cmd, + "show evpn access-vlan [(1-4094)$vid] [json$json] [detail$detail]", + SHOW_STR + "EVPN\n" + "Access VLANs\n" + "VLAN ID\n" + JSON_STR + "Detailed information\n") +{ + bool uj = !!json; + + if (vid) { + zebra_evpn_acc_vl_show_vid(vty, uj, vid); + } else { + if (detail) + zebra_evpn_acc_vl_show_detail(vty, uj); + else + zebra_evpn_acc_vl_show(vty, uj); + } + + return CMD_SUCCESS; +} + DEFUN (show_evpn_vni, show_evpn_vni_cmd, "show evpn vni [json]", @@ -3734,6 +3810,9 @@ void zebra_vty_init(void) install_element(VIEW_NODE, &show_evpn_vni_cmd); install_element(VIEW_NODE, &show_evpn_vni_detail_cmd); install_element(VIEW_NODE, &show_evpn_vni_vni_cmd); + install_element(VIEW_NODE, &show_evpn_es_cmd); + install_element(VIEW_NODE, &show_evpn_es_evi_cmd); + install_element(VIEW_NODE, &show_evpn_access_vlan_cmd); install_element(VIEW_NODE, &show_evpn_rmac_vni_mac_cmd); install_element(VIEW_NODE, &show_evpn_rmac_vni_cmd); install_element(VIEW_NODE, &show_evpn_rmac_vni_all_cmd); diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index 1bb673c940..ff09b48dcf 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -50,6 +50,7 @@ #include "zebra/zebra_vrf.h" #include "zebra/zebra_vxlan.h" #include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_evpn_mh.h" #include "zebra/zebra_router.h" DEFINE_MTYPE_STATIC(ZEBRA, HOST_PREFIX, "host prefix"); @@ -74,7 +75,6 @@ static const struct message zvtep_flood_str[] = { {0} }; - /* static function declarations */ static int ip_prefix_send_to_client(vrf_id_t vrf_id, struct prefix *p, uint16_t cmd); @@ -95,22 +95,26 @@ static void zvni_print_hash(struct hash_bucket *bucket, void *ctxt[]); static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr, struct ipaddr *ip, uint8_t flags, - uint32_t seq, int state, uint16_t cmd); + uint32_t seq, int state, + struct zebra_evpn_es *es, + uint16_t cmd); static unsigned int neigh_hash_keymake(const void *p); static void *zvni_neigh_alloc(void *p); static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip, - struct ethaddr *mac); + struct ethaddr *mac, zebra_mac_t *zmac, + uint32_t n_flags); static int zvni_neigh_del(zebra_vni_t *zvni, zebra_neigh_t *n); static void zvni_neigh_del_all(zebra_vni_t *zvni, int uninstall, int upd_client, uint32_t flags); static zebra_neigh_t *zvni_neigh_lookup(zebra_vni_t *zvni, struct ipaddr *ip); static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip, - struct ethaddr *macaddr, - uint8_t flags, uint32_t seq); + struct ethaddr *mac, zebra_mac_t *zmac, + uint32_t flags, uint32_t seq); static int zvni_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip, - struct ethaddr *macaddr, - uint8_t flags, int state); -static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n); + struct ethaddr *mac, + uint32_t flags, int state, bool force); +static int zvni_rem_neigh_install(zebra_vni_t *zvni, + zebra_neigh_t *n, bool was_static); static int zvni_neigh_uninstall(zebra_vni_t *zvni, zebra_neigh_t *n); static int zvni_neigh_probe(zebra_vni_t *zvni, zebra_neigh_t *n); static zebra_vni_t *zvni_from_svi(struct interface *ifp, @@ -157,21 +161,22 @@ static void zvni_mac_del_all(zebra_vni_t *zvni, int uninstall, int upd_client, uint32_t flags); static zebra_mac_t *zvni_mac_lookup(zebra_vni_t *zvni, struct ethaddr *macaddr); static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr, - uint8_t flags, uint32_t seq); -static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr); + uint32_t flags, uint32_t seq, struct zebra_evpn_es *es); +static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr, + uint32_t flags, bool force); static zebra_vni_t *zvni_map_vlan(struct interface *ifp, struct interface *br_if, vlanid_t vid); -static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac); -static int zvni_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac); +static int zvni_rem_mac_install(zebra_vni_t *zvni, + zebra_mac_t *mac, bool was_static); +static int zvni_rem_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac); static void zvni_install_mac_hash(struct hash_bucket *bucket, void *ctxt); static unsigned int vni_hash_keymake(const void *p); static void *zvni_alloc(void *p); -static zebra_vni_t *zvni_lookup(vni_t vni); static zebra_vni_t *zvni_add(vni_t vni); static int zvni_del(zebra_vni_t *zvni); static int zvni_send_add_to_client(zebra_vni_t *zvni); -static int zvni_send_del_to_client(vni_t vni); +static int zvni_send_del_to_client(zebra_vni_t *zvni); static void zvni_build_hash_table(void); static int zvni_vtep_match(struct in_addr *vtep_ip, zebra_vtep_t *zvtep); static zebra_vtep_t *zvni_vtep_find(zebra_vni_t *zvni, struct in_addr *vtep_ip); @@ -224,6 +229,22 @@ static void zebra_vxlan_sg_cleanup(struct hash_bucket *bucket, void *arg); static void zvni_send_mac_to_client(zebra_vni_t *zvn); static void zvni_send_neigh_to_client(zebra_vni_t *zvni); +static void zebra_vxlan_rem_mac_del(zebra_vni_t *zvni, + zebra_mac_t *zmac); +static inline void zebra_vxlan_mac_stop_hold_timer(zebra_mac_t *mac); +static inline bool zebra_vxlan_mac_is_static(zebra_mac_t *mac); +static void zebra_vxlan_local_neigh_ref_mac(zebra_neigh_t *n, + struct ethaddr *macaddr, zebra_mac_t *mac, + bool send_mac_update); +static void zebra_vxlan_local_neigh_deref_mac(zebra_neigh_t *n, + bool send_mac_update); +static inline bool zebra_vxlan_neigh_is_ready_for_bgp(zebra_neigh_t *n); +static inline bool zebra_vxlan_neigh_clear_sync_info(zebra_neigh_t *n); +static void zebra_vxlan_sync_neigh_dp_install(zebra_neigh_t *n, + bool set_inactive, bool force_clear_static, const char *caller); +static inline bool zebra_vxlan_neigh_is_static(zebra_neigh_t *neigh); +static void zebra_vxlan_neigh_send_add_del_to_client(zebra_neigh_t *n, + bool old_bgp_ready, bool new_bgp_ready); /* Private functions */ static int host_rb_entry_compare(const struct host_rb_entry *hle1, @@ -730,6 +751,7 @@ static void zvni_print_neigh(zebra_neigh_t *n, void *ctxt, json_object *json) struct zebra_vrf *zvrf = NULL; struct timeval detect_start_time = {0, 0}; char timebuf[MONOTIME_STRLEN]; + char thread_buf[THREAD_TIMER_STRLEN]; zvrf = zebra_vrf_get_evpn(); if (!zvrf) @@ -742,25 +764,75 @@ static void zvni_print_neigh(zebra_neigh_t *n, void *ctxt, json_object *json) state_str = IS_ZEBRA_NEIGH_ACTIVE(n) ? "active" : "inactive"; vty = (struct vty *)ctxt; if (json == NULL) { + bool sync_info = false; + vty_out(vty, "IP: %s\n", - ipaddr2str(&n->ip, buf2, sizeof(buf2))); + ipaddr2str(&n->ip, buf2, sizeof(buf2))); vty_out(vty, " Type: %s\n", type_str); vty_out(vty, " State: %s\n", state_str); vty_out(vty, " MAC: %s\n", - prefix_mac2str(&n->emac, buf1, sizeof(buf1))); + prefix_mac2str(&n->emac, buf1, sizeof(buf1))); + vty_out(vty, " Sync-info:"); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) { + vty_out(vty, " local-inactive"); + sync_info = true; + } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY)) { + vty_out(vty, " peer-proxy"); + sync_info = true; + } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) { + vty_out(vty, " peer-active"); + sync_info = true; + } + if (n->hold_timer) { + vty_out(vty, " (ht: %s)", + thread_timer_to_hhmmss( + thread_buf, + sizeof(thread_buf), + n->hold_timer)); + sync_info = true; + } + if (!sync_info) + vty_out(vty, " -"); + vty_out(vty, "\n"); } else { json_object_string_add(json, "ip", buf2); json_object_string_add(json, "type", type_str); json_object_string_add(json, "state", state_str); json_object_string_add(json, "mac", buf1); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) + json_object_boolean_true_add(json, + "localInactive"); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY)) + json_object_boolean_true_add(json, + "peerProxy"); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + json_object_boolean_true_add(json, + "peerActive"); + if (n->hold_timer) + json_object_string_add(json, "peerActiveHold", + thread_timer_to_hhmmss( + thread_buf, + sizeof(thread_buf), + n->hold_timer)); } if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { - if (json == NULL) { - vty_out(vty, " Remote VTEP: %s\n", - inet_ntoa(n->r_vtep_ip)); - } else - json_object_string_add(json, "remoteVtep", - inet_ntoa(n->r_vtep_ip)); + if (n->mac->es) { + if (json) + json_object_string_add(json, "remoteEs", + n->mac->es->esi_str); + else + vty_out(vty, " Remote ES: %s\n", + n->mac->es->esi_str); + } else { + if (json) + json_object_string_add(json, "remoteVtep", + inet_ntoa(n->r_vtep_ip)); + else + vty_out(vty, " Remote VTEP: %s\n", + inet_ntoa(n->r_vtep_ip)); + } } if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW)) { if (!json) { @@ -811,6 +883,30 @@ static void zvni_print_neigh(zebra_neigh_t *n, void *ctxt, json_object *json) } } +static void zvni_print_neigh_hdr(struct vty *vty, + struct neigh_walk_ctx *wctx) +{ + vty_out(vty, + "Flags: I=local-inactive, P=peer-active, X=peer-proxy\n"); + vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %s\n", + -wctx->addr_width, "Neighbor", "Type", "Flags", + "State", "MAC", "Remote ES/VTEP", "Seq #'s"); +} + +static char *zvni_print_neigh_flags(zebra_neigh_t *n, char *flags_buf, + uint32_t flags_buf_sz) +{ + snprintf(flags_buf, flags_buf_sz, "%s%s%s", + (n->flags & ZEBRA_NEIGH_ES_PEER_ACTIVE) ? + "P" : "", + (n->flags & ZEBRA_NEIGH_ES_PEER_PROXY) ? + "X" : "", + (n->flags & ZEBRA_NEIGH_LOCAL_INACTIVE) ? + "I" : ""); + + return flags_buf; +} + /* * Print neighbor hash entry - called for display of all neighbors. */ @@ -823,6 +919,7 @@ static void zvni_print_neigh_hash(struct hash_bucket *bucket, void *ctxt) char buf2[INET6_ADDRSTRLEN]; struct neigh_walk_ctx *wctx = ctxt; const char *state_str; + char flags_buf[6]; vty = wctx->vty; json_vni = wctx->json; @@ -839,9 +936,11 @@ static void zvni_print_neigh_hash(struct hash_bucket *bucket, void *ctxt) return; if (json_vni == NULL) { - vty_out(vty, "%*s %-6s %-8s %-17s %u/%u\n", + vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %u/%u\n", -wctx->addr_width, buf2, "local", - state_str, buf1, n->loc_seq, n->rem_seq); + zvni_print_neigh_flags(n, flags_buf, + sizeof(flags_buf)), state_str, + buf1, "", n->loc_seq, n->rem_seq); } else { json_object_string_add(json_row, "type", "local"); json_object_string_add(json_row, "state", state_str); @@ -871,19 +970,25 @@ static void zvni_print_neigh_hash(struct hash_bucket *bucket, void *ctxt) if (json_vni == NULL) { if ((wctx->flags & SHOW_REMOTE_NEIGH_FROM_VTEP) && (wctx->count == 0)) - vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n", - -wctx->addr_width, "Neighbor", "Type", - "State", "MAC", "Remote VTEP", - "Seq #'s"); - vty_out(vty, "%*s %-6s %-8s %-17s %-21s %u/%u\n", - -wctx->addr_width, buf2, "remote", state_str, - buf1, inet_ntoa(n->r_vtep_ip), n->loc_seq, n->rem_seq); + zvni_print_neigh_hdr(vty, wctx); + vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %u/%u\n", + -wctx->addr_width, buf2, "remote", + zvni_print_neigh_flags(n, flags_buf, + sizeof(flags_buf)), + state_str, buf1, + n->mac->es ? n->mac->es->esi_str : + inet_ntoa(n->r_vtep_ip), + n->loc_seq, n->rem_seq); } else { json_object_string_add(json_row, "type", "remote"); json_object_string_add(json_row, "state", state_str); json_object_string_add(json_row, "mac", buf1); - json_object_string_add(json_row, "remoteVtep", - inet_ntoa(n->r_vtep_ip)); + if (n->mac->es) + json_object_string_add(json_row, "remoteEs", + n->mac->es->esi_str); + else + json_object_string_add(json_row, "remoteVtep", + inet_ntoa(n->r_vtep_ip)); if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW)) json_object_boolean_true_add(json_row, "defaultGateway"); @@ -986,11 +1091,9 @@ static void zvni_print_neigh_hash_all_vni(struct hash_bucket *bucket, wctx.json = json_vni; hash_iterate(zvni->neigh_table, zvni_find_neigh_addr_width, &wctx); - if (json == NULL) { - vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n", - -wctx.addr_width, "IP", "Type", - "State", "MAC", "Remote VTEP", "Seq #'s"); - } + if (json == NULL) + zvni_print_neigh_hdr(vty, &wctx); + if (print_dup) hash_iterate(zvni->neigh_table, zvni_print_dad_neigh_hash, &wctx); @@ -1163,6 +1266,35 @@ static void zl3vni_print_rmac(zebra_mac_t *zrmac, struct vty *vty, } } +static void +zebra_vxlan_mac_get_access_info(zebra_mac_t *mac, + struct interface **ifpP, vlanid_t *vid) +{ + /* if the mac is associated with an ES we must get the access + * info from the ES + */ + if (mac->es) { + struct zebra_if *zif; + + /* get the access port from the es */ + *ifpP = mac->es->zif ? mac->es->zif->ifp : NULL; + /* get the vlan from the VNI */ + if (mac->zvni->vxlan_if) { + zif = mac->zvni->vxlan_if->info; + *vid = zif->l2info.vxl.access_vlan; + } else { + *vid = 0; + } + } else { + struct zebra_ns *zns; + + *vid = mac->fwd_info.local.vid; + zns = zebra_ns_lookup(NS_DEFAULT); + *ifpP = if_lookup_by_index_per_ns(zns, + mac->fwd_info.local.ifindex); + } +} + /* * Print a specific MAC entry. */ @@ -1176,6 +1308,7 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) struct zebra_vrf *zvrf; struct timeval detect_start_time = {0, 0}; char timebuf[MONOTIME_STRLEN]; + char thread_buf[THREAD_TIMER_STRLEN]; zvrf = zebra_vrf_get_evpn(); if (!zvrf) @@ -1188,21 +1321,21 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) json_object *json_mac = json_object_new_object(); if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { - struct zebra_ns *zns; struct interface *ifp; - ifindex_t ifindex; + vlanid_t vid; - ifindex = mac->fwd_info.local.ifindex; - zns = zebra_ns_lookup(mac->fwd_info.local.ns_id); - ifp = if_lookup_by_index_per_ns(zns, ifindex); - if (!ifp) - return; + zebra_vxlan_mac_get_access_info(mac, + &ifp, &vid); json_object_string_add(json_mac, "type", "local"); - json_object_string_add(json_mac, "intf", ifp->name); - json_object_int_add(json_mac, "ifindex", ifindex); - if (mac->fwd_info.local.vid) + if (ifp) { + json_object_string_add(json_mac, + "intf", ifp->name); + json_object_int_add(json_mac, + "ifindex", ifp->ifindex); + } + if (vid) json_object_int_add(json_mac, "vlan", - mac->fwd_info.local.vid); + vid); } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { json_object_string_add(json_mac, "type", "remote"); json_object_string_add( @@ -1231,6 +1364,25 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) else json_object_boolean_false_add(json_mac, "isDuplicate"); + json_object_int_add(json_mac, "syncNeighCount", mac->sync_neigh_cnt); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE)) + json_object_boolean_true_add(json_mac, + "localInactive"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY)) + json_object_boolean_true_add(json_mac, + "peerProxy"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + json_object_boolean_true_add(json_mac, + "peerActive"); + if (mac->hold_timer) + json_object_string_add(json_mac, "peerActiveHold", + thread_timer_to_hhmmss( + thread_buf, + sizeof(thread_buf), + mac->hold_timer)); + if (mac->es) + json_object_string_add(json_mac, "esi", + mac->es->esi_str); /* print all the associated neigh */ if (!listcount(mac->neigh_list)) json_object_string_add(json_mac, "neighbors", "none"); @@ -1270,22 +1422,28 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) vty_out(vty, "MAC: %s\n", buf1); if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { - struct zebra_ns *zns; struct interface *ifp; - ifindex_t ifindex; + vlanid_t vid; - ifindex = mac->fwd_info.local.ifindex; - zns = zebra_ns_lookup(mac->fwd_info.local.ns_id); - ifp = if_lookup_by_index_per_ns(zns, ifindex); - if (!ifp) - return; - vty_out(vty, " Intf: %s(%u)", ifp->name, ifindex); - if (mac->fwd_info.local.vid) - vty_out(vty, " VLAN: %u", - mac->fwd_info.local.vid); + zebra_vxlan_mac_get_access_info(mac, + &ifp, &vid); + + if (mac->es) + vty_out(vty, " ESI: %s\n", mac->es->esi_str); + + if (ifp) + vty_out(vty, " Intf: %s(%u)", + ifp->name, ifp->ifindex); + else + vty_out(vty, " Intf: -"); + vty_out(vty, " VLAN: %u", vid); } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { - vty_out(vty, " Remote VTEP: %s", - inet_ntoa(mac->fwd_info.r_vtep_ip)); + if (mac->es) + vty_out(vty, " Remote ES: %s", + mac->es->esi_str); + else + vty_out(vty, " Remote VTEP: %s", + inet_ntoa(mac->fwd_info.r_vtep_ip)); } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) { vty_out(vty, " Auto Mac "); } @@ -1300,8 +1458,22 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) vty_out(vty, " Remote-gateway Mac "); vty_out(vty, "\n"); - vty_out(vty, " Local Seq: %u Remote Seq: %u", mac->loc_seq, - mac->rem_seq); + vty_out(vty, " Sync-info: neigh#: %u", mac->sync_neigh_cnt); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE)) + vty_out(vty, " local-inactive"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY)) + vty_out(vty, " peer-proxy"); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + vty_out(vty, " peer-active"); + if (mac->hold_timer) + vty_out(vty, " (ht: %s)", + thread_timer_to_hhmmss( + thread_buf, + sizeof(thread_buf), + mac->hold_timer)); + vty_out(vty, "\n"); + vty_out(vty, " Local Seq: %u Remote Seq: %u", + mac->loc_seq, mac->rem_seq); vty_out(vty, "\n"); if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) { @@ -1338,6 +1510,22 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) } } +static char *zvni_print_mac_flags(zebra_mac_t *mac, char *flags_buf, + uint32_t flags_buf_sz) +{ + snprintf(flags_buf, flags_buf_sz, "%s%s%s%s", + mac->sync_neigh_cnt ? + "N" : "", + (mac->flags & ZEBRA_MAC_ES_PEER_ACTIVE) ? + "P" : "", + (mac->flags & ZEBRA_MAC_ES_PEER_PROXY) ? + "X" : "", + (mac->flags & ZEBRA_MAC_LOCAL_INACTIVE) ? + "I" : ""); + + return flags_buf; +} + /* * Print MAC hash entry - called for display of all MACs. */ @@ -1348,6 +1536,7 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt) zebra_mac_t *mac; char buf1[ETHER_ADDR_STRLEN]; struct mac_walk_ctx *wctx = ctxt; + char flags_buf[6]; vty = wctx->vty; json_mac_hdr = wctx->json; @@ -1359,26 +1548,24 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt) json_mac = json_object_new_object(); if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { - struct zebra_ns *zns; - ifindex_t ifindex; struct interface *ifp; vlanid_t vid; if (wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) return; - zns = zebra_ns_lookup(mac->fwd_info.local.ns_id); - ifindex = mac->fwd_info.local.ifindex; - ifp = if_lookup_by_index_per_ns(zns, ifindex); - if (!ifp) // unexpected - return; - vid = mac->fwd_info.local.vid; - if (json_mac_hdr == NULL) - vty_out(vty, "%-17s %-6s %-21s", buf1, "local", - ifp->name); - else { + zebra_vxlan_mac_get_access_info(mac, + &ifp, &vid); + if (json_mac_hdr == NULL) { + vty_out(vty, "%-17s %-6s %-5s %-30s", buf1, "local", + zvni_print_mac_flags(mac, flags_buf, + sizeof(flags_buf)), + ifp ? ifp->name : "-"); + } else { json_object_string_add(json_mac, "type", "local"); - json_object_string_add(json_mac, "intf", ifp->name); + if (ifp) + json_object_string_add(json_mac, + "intf", ifp->name); } if (vid) { if (json_mac_hdr == NULL) @@ -1418,14 +1605,19 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt) if (json_mac_hdr == NULL) { if ((wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) && - (wctx->count == 0)) { + (wctx->count == 0)) { vty_out(vty, "\nVNI %u\n\n", wctx->zvni->vni); - vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", - "MAC", "Type", "Intf/Remote VTEP", + vty_out(vty, "%-17s %-6s %-5s%-30s %-5s %s\n", + "MAC", "Type", "Flags", + "Intf/Remote ES/VTEP", "VLAN", "Seq #'s"); } - vty_out(vty, "%-17s %-6s %-21s %-5s %u/%u\n", buf1, - "remote", inet_ntoa(mac->fwd_info.r_vtep_ip), + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %u/%u\n", buf1, + "remote", + zvni_print_mac_flags(mac, flags_buf, + sizeof(flags_buf)), + mac->es ? mac->es->esi_str : + inet_ntoa(mac->fwd_info.r_vtep_ip), "", mac->loc_seq, mac->rem_seq); } else { json_object_string_add(json_mac, "type", "remote"); @@ -1540,8 +1732,11 @@ static void zvni_print_mac_hash_all_vni(struct hash_bucket *bucket, void *ctxt) if (json == NULL) { vty_out(vty, "\nVNI %u #MACs (local and remote) %u\n\n", zvni->vni, num_macs); - vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", "MAC", - "Type", "Intf/Remote VTEP", "VLAN", "Seq #'s"); + vty_out(vty, + "Flags: N=sync-neighs, I=local-inactive, P=peer-active, X=peer-proxy\n"); + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %s\n", "MAC", + "Type", "Flags", "Intf/Remote ES/VTEP", + "VLAN", "Seq #'s"); } else json_object_int_add(json_vni, "numMacs", num_macs); } @@ -2106,13 +2301,16 @@ static void zvni_print_hash_detail(struct hash_bucket *bucket, void *data) */ static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr, struct ipaddr *ip, uint8_t flags, - uint32_t seq, int state, uint16_t cmd) + uint32_t seq, int state, + struct zebra_evpn_es *es, + uint16_t cmd) { char buf[ETHER_ADDR_STRLEN]; char buf2[INET6_ADDRSTRLEN]; int ipa_len; struct zserv *client = NULL; struct stream *s = NULL; + esi_t *esi = es ? &es->esi : zero_esi; client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); /* BGP may not be running. */ @@ -2140,6 +2338,7 @@ static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr, if (cmd == ZEBRA_MACIP_ADD) { stream_putc(s, flags); /* sticky mac/gateway mac */ stream_putl(s, seq); /* sequence number */ + stream_put(s, esi, sizeof(esi_t)); } else { stream_putl(s, state); /* state - active/inactive */ } @@ -2150,10 +2349,11 @@ static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr, if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( - "Send MACIP %s flags 0x%x MAC %s IP %s seq %u L2-VNI %u to %s", + "Send MACIP %s f 0x%x MAC %s IP %s seq %u L2-VNI %u ESI %s to %s", (cmd == ZEBRA_MACIP_ADD) ? "Add" : "Del", flags, prefix_mac2str(macaddr, buf, sizeof(buf)), ipaddr2str(ip, buf2, sizeof(buf2)), seq, vni, + es ? es->esi_str : "-", zebra_route_string(client->proto)); if (cmd == ZEBRA_MACIP_ADD) @@ -2222,26 +2422,26 @@ static void *zvni_neigh_alloc(void *p) * Add neighbor entry. */ static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip, - struct ethaddr *mac) + struct ethaddr *mac, zebra_mac_t *zmac, + uint32_t n_flags) { zebra_neigh_t tmp_n; zebra_neigh_t *n = NULL; - zebra_mac_t *zmac = NULL; memset(&tmp_n, 0, sizeof(zebra_neigh_t)); memcpy(&tmp_n.ip, ip, sizeof(struct ipaddr)); n = hash_get(zvni->neigh_table, &tmp_n, zvni_neigh_alloc); assert(n); - memcpy(&n->emac, mac, ETH_ALEN); n->state = ZEBRA_NEIGH_INACTIVE; n->zvni = zvni; n->dad_ip_auto_recovery_timer = NULL; + n->flags = n_flags; - /* Associate the neigh to mac */ - zmac = zvni_mac_lookup(zvni, mac); - if (zmac) - listnode_add_sort(zmac->neigh_list, n); + if (!zmac) + zmac = zvni_mac_lookup(zvni, mac); + zebra_vxlan_local_neigh_ref_mac(n, mac, + zmac, false /* send_mac_update */); return n; } @@ -2252,11 +2452,9 @@ static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip, static int zvni_neigh_del(zebra_vni_t *zvni, zebra_neigh_t *n) { zebra_neigh_t *tmp_n; - zebra_mac_t *zmac = NULL; - zmac = zvni_mac_lookup(zvni, &n->emac); - if (zmac) - listnode_delete(zmac->neigh_list, n); + if (n->mac) + listnode_delete(n->mac->neigh_list, n); /* Cancel auto recovery */ THREAD_OFF(n->dad_ip_auto_recovery_timer); @@ -2284,10 +2482,18 @@ static void zvni_neigh_del_hash_entry(struct hash_bucket *bucket, void *arg) && IPV4_ADDR_SAME(&n->r_vtep_ip, &wctx->r_vtep_ip))) { if (wctx->upd_client && (n->flags & ZEBRA_NEIGH_LOCAL)) zvni_neigh_send_del_to_client(wctx->zvni->vni, &n->ip, - &n->emac, 0, n->state); - - if (wctx->uninstall) - zvni_neigh_uninstall(wctx->zvni, n); + &n->emac, n->flags, n->state, + false /*force*/); + + if (wctx->uninstall) { + if (zebra_vxlan_neigh_is_static(n)) + zebra_vxlan_sync_neigh_dp_install(n, + false /* set_inactive */, + true /* force_clear_static */, + __func__); + if ((n->flags & ZEBRA_NEIGH_REMOTE)) + zvni_neigh_uninstall(wctx->zvni, n); + } zvni_neigh_del(wctx->zvni, n); } @@ -2335,8 +2541,7 @@ static zebra_neigh_t *zvni_neigh_lookup(zebra_vni_t *zvni, struct ipaddr *ip) * locally or undergoing any other change (such as sequence number). */ static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni, - zebra_mac_t *zmac, - bool seq_change) + zebra_mac_t *zmac, bool seq_change, bool es_change) { zebra_neigh_t *n = NULL; struct listnode *node = NULL; @@ -2358,7 +2563,8 @@ static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni, */ for (ALL_LIST_ELEMENTS_RO(zmac->neigh_list, node, n)) { if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { - if (IS_ZEBRA_NEIGH_INACTIVE(n) || seq_change) { + if (IS_ZEBRA_NEIGH_INACTIVE(n) || seq_change || + es_change) { ZEBRA_NEIGH_SET_ACTIVE(n); n->loc_seq = zmac->loc_seq; if (!(zvrf->dup_addr_detect && @@ -2366,7 +2572,7 @@ static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni, ZEBRA_NEIGH_DUPLICATE))) zvni_neigh_send_add_to_client( zvni->vni, &n->ip, &n->emac, - n->flags, n->loc_seq); + n->mac, n->flags, n->loc_seq); } } } @@ -2400,7 +2606,9 @@ static void zvni_process_neigh_on_local_mac_del(zebra_vni_t *zvni, ZEBRA_NEIGH_SET_INACTIVE(n); n->loc_seq = 0; zvni_neigh_send_del_to_client(zvni->vni, &n->ip, - &n->emac, 0, ZEBRA_NEIGH_ACTIVE); + &n->emac, n->flags, + ZEBRA_NEIGH_ACTIVE, + false /*force*/); } } } @@ -2431,7 +2639,9 @@ static void zvni_process_neigh_on_remote_mac_add(zebra_vni_t *zvni, ZEBRA_NEIGH_SET_INACTIVE(n); n->loc_seq = 0; zvni_neigh_send_del_to_client(zvni->vni, &n->ip, - &n->emac, 0, ZEBRA_NEIGH_ACTIVE); + &n->emac, n->flags, + ZEBRA_NEIGH_ACTIVE, + false /* force */); } } } @@ -2464,11 +2674,27 @@ static void zvni_probe_neigh_on_mac_add(zebra_vni_t *zvni, zebra_mac_t *zmac) */ static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip, struct ethaddr *macaddr, - uint8_t neigh_flags, + zebra_mac_t *zmac, + uint32_t neigh_flags, uint32_t seq) { uint8_t flags = 0; + if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) { + /* host reachability has not been verified locally */ + + /* if no ES peer is claiming reachability we can't advertise + * the entry + */ + if (!CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + return 0; + + /* ES peers are claiming reachability; we will + * advertise the entry but with a proxy flag + */ + SET_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT); + } + if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_DEF_GW)) SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); /* Set router flag (R-bit) based on local neigh entry add */ @@ -2478,24 +2704,34 @@ static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip, SET_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP); return zvni_macip_send_msg_to_client(vni, macaddr, ip, flags, - seq, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_ADD); + seq, ZEBRA_NEIGH_ACTIVE, + zmac ? zmac->es : NULL, + ZEBRA_MACIP_ADD); } /* * Inform BGP about local neighbor deletion. */ static int zvni_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip, - struct ethaddr *macaddr, uint8_t flags, - int state) + struct ethaddr *macaddr, uint32_t flags, + int state, bool force) { + if (!force) { + if (CHECK_FLAG(flags, ZEBRA_NEIGH_LOCAL_INACTIVE) && + !CHECK_FLAG(flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + /* the neigh was not advertised - nothing to delete */ + return 0; + } + return zvni_macip_send_msg_to_client(vni, macaddr, ip, flags, - 0, state, ZEBRA_MACIP_DEL); + 0, state, NULL, ZEBRA_MACIP_DEL); } /* * Install remote neighbor into the kernel. */ -static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n) +static int zvni_rem_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n, + bool was_static) { struct zebra_if *zif; struct zebra_l2info_vxlan *vxl; @@ -2520,7 +2756,8 @@ static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n) flags |= DPLANE_NTF_ROUTER; ZEBRA_NEIGH_SET_ACTIVE(n); - dplane_neigh_add(vlan_if, &n->ip, &n->emac, flags); + dplane_rem_neigh_add(vlan_if, &n->ip, &n->emac, flags, + was_static); return ret; } @@ -2555,7 +2792,7 @@ static int zvni_neigh_uninstall(zebra_vni_t *zvni, zebra_neigh_t *n) ZEBRA_NEIGH_SET_INACTIVE(n); n->loc_seq = 0; - dplane_neigh_delete(vlan_if, &n->ip); + dplane_rem_neigh_delete(vlan_if, &n->ip); return 0; } @@ -2578,7 +2815,7 @@ static int zvni_neigh_probe(zebra_vni_t *zvni, zebra_neigh_t *n) if (!vlan_if) return -1; - dplane_neigh_update(vlan_if, &n->ip, &n->emac); + dplane_rem_neigh_update(vlan_if, &n->ip, &n->emac); return 0; } @@ -2594,7 +2831,7 @@ static void zvni_install_neigh_hash(struct hash_bucket *bucket, void *ctxt) n = (zebra_neigh_t *)bucket->data; if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) - zvni_neigh_install(wctx->zvni, n); + zvni_rem_neigh_install(wctx->zvni, n, false /*was_static*/); } /* Get the VRR interface for SVI if any */ @@ -2729,12 +2966,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni, zebra_mac_t *mac = NULL; struct zebra_if *zif = NULL; struct zebra_l2info_vxlan *vxl = NULL; - struct zebra_vrf *zvrf; - ns_id_t local_ns_id = NS_DEFAULT; - zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id); - if (zvrf && zvrf->zns) - local_ns_id = zvrf->zns->ns_id; zif = zvni->vxlan_if->info; if (!zif) return -1; @@ -2759,12 +2991,11 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni, SET_FLAG(mac->flags, ZEBRA_MAC_DEF_GW); memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); mac->fwd_info.local.ifindex = ifp->ifindex; - mac->fwd_info.local.ns_id = local_ns_id; mac->fwd_info.local.vid = vxl->access_vlan; n = zvni_neigh_lookup(zvni, ip); if (!n) { - n = zvni_neigh_add(zvni, ip, macaddr); + n = zvni_neigh_add(zvni, ip, macaddr, mac, 0); if (!n) { flog_err( EC_ZEBRA_MAC_ADD_FAILED, @@ -2798,7 +3029,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni, prefix_mac2str(macaddr, buf, sizeof(buf)), ipaddr2str(ip, buf2, sizeof(buf2)), n->flags); - zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr, + zvni_neigh_send_add_to_client(zvni->vni, ip, &n->emac, n->mac, n->flags, n->loc_seq); } else if (advertise_svi_macip_enabled(zvni)) { @@ -2810,7 +3041,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni, prefix_mac2str(macaddr, buf, sizeof(buf)), ipaddr2str(ip, buf2, sizeof(buf2)), n->flags); - zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr, + zvni_neigh_send_add_to_client(zvni->vni, ip, &n->emac, n->mac, n->flags, n->loc_seq); } @@ -2859,7 +3090,8 @@ static int zvni_gw_macip_del(struct interface *ifp, zebra_vni_t *zvni, /* Remove neighbor from BGP. */ zvni_neigh_send_del_to_client(zvni->vni, &n->ip, &n->emac, - ZEBRA_MACIP_TYPE_GW, ZEBRA_NEIGH_ACTIVE); + n->flags, ZEBRA_NEIGH_ACTIVE, + false /*force*/); /* Delete this neighbor entry. */ zvni_neigh_del(zvni, n); @@ -3007,11 +3239,36 @@ static void zvni_svi_macip_del_for_vni_hash(struct hash_bucket *bucket, return; } +static inline void zvni_local_neigh_update_log(const char *pfx, + zebra_neigh_t *n, bool is_router, bool local_inactive, + bool old_bgp_ready, bool new_bgp_ready, + bool inform_dataplane, bool inform_bgp, const char *sfx) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + + if (!IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + return; + + zlog_debug("%s neigh vni %u ip %s mac %s f 0x%x%s%s%s%s%s%s %s", + pfx, n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, sizeof(macbuf)), + n->flags, is_router ? " router" : "", + local_inactive ? " local-inactive" : "", + old_bgp_ready ? " old_bgp_ready" : "", + new_bgp_ready ? " new_bgp_ready" : "", + inform_dataplane ? " inform_dp" : "", + inform_bgp ? " inform_bgp" : "", + sfx); +} + static int zvni_local_neigh_update(zebra_vni_t *zvni, struct interface *ifp, struct ipaddr *ip, struct ethaddr *macaddr, - bool is_router) + bool is_router, + bool local_inactive, bool dp_static) { char buf[ETHER_ADDR_STRLEN]; char buf2[INET6_ADDRSTRLEN]; @@ -3025,6 +3282,11 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, bool neigh_was_remote = false; bool do_dad = false; struct in_addr vtep_ip = {.s_addr = 0}; + bool inform_dataplane = false; + bool created = false; + bool new_static = false; + bool old_bgp_ready = false; + bool new_bgp_ready; /* Check if the MAC exists. */ zmac = zvni_mac_lookup(zvni, macaddr); @@ -3072,7 +3334,7 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, n = zvni_neigh_lookup(zvni, ip); if (!n) { /* New neighbor - create */ - n = zvni_neigh_add(zvni, ip, macaddr); + n = zvni_neigh_add(zvni, ip, macaddr, zmac, 0); if (!n) { flog_err( EC_ZEBRA_MAC_ADD_FAILED, @@ -3085,17 +3347,28 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, /* Set "local" forwarding info. */ SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); n->ifindex = ifp->ifindex; + created = true; } else { if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { bool mac_different; bool cur_is_router; + bool old_local_inactive; + + old_local_inactive = !!CHECK_FLAG(n->flags, + ZEBRA_NEIGH_LOCAL_INACTIVE); + + old_bgp_ready = + zebra_vxlan_neigh_is_ready_for_bgp(n); /* Note any changes and see if of interest to BGP. */ - mac_different = (memcmp(n->emac.octet, - macaddr->octet, ETH_ALEN) != 0) ? 1 : 0; + mac_different = !!memcmp(&n->emac, + macaddr, ETH_ALEN); cur_is_router = !!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); - if (!mac_different && is_router == cur_is_router) { + new_static = zebra_vxlan_neigh_is_static(n); + if (!mac_different && is_router == cur_is_router && + old_local_inactive == local_inactive && + dp_static != new_static) { if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( " Ignoring entry mac is the same and is_router == cur_is_router"); @@ -3103,7 +3376,9 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, return 0; } + old_zmac = n->mac; if (!mac_different) { + /* XXX - cleanup this code duplication */ bool is_neigh_freezed = false; /* Only the router flag has changed. */ @@ -3114,6 +3389,15 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + if (local_inactive) + SET_FLAG(n->flags, + ZEBRA_NEIGH_LOCAL_INACTIVE); + else + UNSET_FLAG(n->flags, + ZEBRA_NEIGH_LOCAL_INACTIVE); + new_bgp_ready = + zebra_vxlan_neigh_is_ready_for_bgp(n); + /* Neigh is in freeze state and freeze action * is enabled, do not send update to client. */ @@ -3122,13 +3406,20 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, CHECK_FLAG(n->flags, ZEBRA_NEIGH_DUPLICATE)); - if (IS_ZEBRA_NEIGH_ACTIVE(n) && - !is_neigh_freezed) - return zvni_neigh_send_add_to_client( - zvni->vni, ip, macaddr, - n->flags, n->loc_seq); - else { - if (IS_ZEBRA_DEBUG_VXLAN) + zvni_local_neigh_update_log("local", n, + is_router, local_inactive, + old_bgp_ready, new_bgp_ready, + false, false, "flag-update"); + + /* if the neigh can no longer be advertised + * remove it from bgp + */ + if (!is_neigh_freezed) { + zebra_vxlan_neigh_send_add_del_to_client( + n, old_bgp_ready, new_bgp_ready); + } else { + if (IS_ZEBRA_DEBUG_VXLAN && + IS_ZEBRA_NEIGH_ACTIVE(n)) zlog_debug( " Neighbor active and frozen"); } @@ -3141,25 +3432,32 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, * We also need to update the MAC's sequence number * in different situations. */ - if (IS_ZEBRA_NEIGH_ACTIVE(n)) + if (old_bgp_ready) { zvni_neigh_send_del_to_client(zvni->vni, &n->ip, - &n->emac, 0, n->state); - old_zmac = zvni_mac_lookup(zvni, &n->emac); + &n->emac, n->flags, n->state, + false /*force*/); + old_bgp_ready = false; + } if (old_zmac) { old_mac_seq = CHECK_FLAG(old_zmac->flags, ZEBRA_MAC_REMOTE) ? old_zmac->rem_seq : old_zmac->loc_seq; neigh_mac_change = upd_mac_seq = true; - listnode_delete(old_zmac->neigh_list, n); - zvni_deref_ip2mac(zvni, old_zmac); + zebra_vxlan_local_neigh_deref_mac(n, + true /* send_mac_update */); } + /* if mac changes abandon peer flags and tell + * dataplane to clear the static flag + */ + if (zebra_vxlan_neigh_clear_sync_info(n)) + inform_dataplane = true; /* Update the forwarding info. */ n->ifindex = ifp->ifindex; - memcpy(&n->emac, macaddr, ETH_ALEN); /* Link to new MAC */ - listnode_add_sort(zmac->neigh_list, n); + zebra_vxlan_local_neigh_ref_mac(n, macaddr, zmac, + true /* send_mac_update */); } else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { /* * Neighbor has moved from remote to local. Its @@ -3167,7 +3465,7 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, */ if (memcmp(n->emac.octet, macaddr->octet, ETH_ALEN) != 0) { - old_zmac = zvni_mac_lookup(zvni, &n->emac); + old_zmac = n->mac; if (old_zmac) { old_mac_seq = CHECK_FLAG( old_zmac->flags, @@ -3175,14 +3473,13 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, old_zmac->rem_seq : old_zmac->loc_seq; neigh_mac_change = upd_mac_seq = true; - listnode_delete(old_zmac->neigh_list, - n); - zvni_deref_ip2mac(zvni, old_zmac); + zebra_vxlan_local_neigh_deref_mac(n, + true /* send_update */); } /* Link to new MAC */ - memcpy(&n->emac, macaddr, ETH_ALEN); - listnode_add_sort(zmac->neigh_list, n); + zebra_vxlan_local_neigh_ref_mac(n, macaddr, + zmac, true /*send_update*/); } /* Based on Mobility event Scenario-B from the * draft, neigh's previous state was remote treat this @@ -3211,12 +3508,27 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, MAX(seq1, seq2) : zmac->loc_seq; } + if (local_inactive) + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + else + UNSET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + /* Mark Router flag (R-bit) */ if (is_router) SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); else UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + /* if the dataplane thinks that this is a sync entry but + * zebra doesn't we need to re-concile the diff + * by re-installing the dataplane entry + */ + if (dp_static) { + new_static = zebra_vxlan_neigh_is_static(n); + if (!new_static) + inform_dataplane = true; + } + /* Check old and/or new MAC detected as duplicate mark * the neigh as duplicate */ @@ -3239,16 +3551,28 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, zebra_vxlan_dup_addr_detect_for_neigh(zvrf, n, vtep_ip, do_dad, &neigh_on_hold, true); + if (inform_dataplane) + zebra_vxlan_sync_neigh_dp_install(n, false /* set_inactive */, + false /* force_clear_static */, __func__); + /* Before we program this in BGP, we need to check if MAC is locally * learnt. If not, force neighbor to be inactive and reset its seq. */ if (!CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL)) { + zvni_local_neigh_update_log("local", + n, is_router, local_inactive, + false, false, inform_dataplane, false, + "auto-mac"); ZEBRA_NEIGH_SET_INACTIVE(n); n->loc_seq = 0; zmac->loc_seq = mac_new_seq; return 0; } + zvni_local_neigh_update_log("local", + n, is_router, local_inactive, false, false, inform_dataplane, + true, created ? "created" : "updated"); + /* If the MAC's sequence number has changed, inform the MAC and all * neighbors associated with the MAC to BGP, else just inform this * neighbor. @@ -3260,9 +3584,10 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, zvni->vni, zmac->loc_seq, mac_new_seq); zmac->loc_seq = mac_new_seq; if (zvni_mac_send_add_to_client(zvni->vni, macaddr, - zmac->flags, zmac->loc_seq)) + zmac->flags, zmac->loc_seq, zmac->es)) return -1; - zvni_process_neigh_on_local_mac_change(zvni, zmac, 1); + zvni_process_neigh_on_local_mac_change(zvni, zmac, 1, + 0 /*es_change*/); return 0; } @@ -3270,9 +3595,10 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni, if (!neigh_on_hold) { ZEBRA_NEIGH_SET_ACTIVE(n); - - return zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr, - n->flags, n->loc_seq); + new_bgp_ready = + zebra_vxlan_neigh_is_ready_for_bgp(n); + zebra_vxlan_neigh_send_add_del_to_client(n, + old_bgp_ready, new_bgp_ready); } else { if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug(" Neighbor on hold not sending"); @@ -3300,7 +3626,7 @@ static int zvni_remote_neigh_update(zebra_vni_t *zvni, if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { #ifdef GNU_LINUX if (state & NUD_STALE) - zvni_neigh_install(zvni, n); + zvni_rem_neigh_install(zvni, n, false /*was_static*/); #endif } else { /* We got a "remote" neighbor notification for an entry @@ -3318,7 +3644,7 @@ static int zvni_remote_neigh_update(zebra_vni_t *zvni, return -1; } - UNSET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_LOCAL_FLAGS); SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE); ZEBRA_NEIGH_SET_ACTIVE(n); n->r_vtep_ip = zmac->fwd_info.r_vtep_ip; @@ -3389,6 +3715,15 @@ static zebra_mac_t *zvni_mac_add(zebra_vni_t *zvni, struct ethaddr *macaddr) mac->neigh_list = list_new(); mac->neigh_list->cmp = neigh_list_cmp; + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char buf[ETHER_ADDR_STRLEN]; + + zlog_debug("%s: MAC %s flags 0x%x", + __func__, + prefix_mac2str(&mac->macaddr, + buf, sizeof(buf)), + mac->flags); + } return mac; } @@ -3399,6 +3734,22 @@ static int zvni_mac_del(zebra_vni_t *zvni, zebra_mac_t *mac) { zebra_mac_t *tmp_mac; + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) { + char buf[ETHER_ADDR_STRLEN]; + + zlog_debug("%s: MAC %s flags 0x%x", + __func__, + prefix_mac2str(&mac->macaddr, + buf, sizeof(buf)), + mac->flags); + } + + /* force de-ref any ES entry linked to the MAC */ + zebra_evpn_es_mac_deref_entry(mac); + + /* Cancel proxy hold timer */ + zebra_vxlan_mac_stop_hold_timer(mac); + /* Cancel auto recovery */ THREAD_OFF(mac->dad_mac_auto_recovery_timer); @@ -3454,10 +3805,18 @@ static void zvni_mac_del_hash_entry(struct hash_bucket *bucket, void *arg) if (zvni_check_mac_del_from_db(wctx, mac)) { if (wctx->upd_client && (mac->flags & ZEBRA_MAC_LOCAL)) { zvni_mac_send_del_to_client(wctx->zvni->vni, - &mac->macaddr); + &mac->macaddr, mac->flags, false); + } + if (wctx->uninstall) { + if (zebra_vxlan_mac_is_static(mac)) + zebra_vxlan_sync_mac_dp_install(mac, + false /* set_inactive */, + true /* force_clear_static */, + __func__); + + if (mac->flags & ZEBRA_MAC_REMOTE) + zvni_rem_mac_uninstall(wctx->zvni, mac); } - if (wctx->uninstall) - zvni_mac_uninstall(wctx->zvni, mac); zvni_mac_del(wctx->zvni, mac); } @@ -3504,88 +3863,51 @@ static zebra_mac_t *zvni_mac_lookup(zebra_vni_t *zvni, struct ethaddr *mac) * Inform BGP about local MAC addition. */ static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr, - uint8_t mac_flags, uint32_t seq) + uint32_t mac_flags, uint32_t seq, struct zebra_evpn_es *es) { uint8_t flags = 0; + if (CHECK_FLAG(mac_flags, ZEBRA_MAC_LOCAL_INACTIVE)) { + /* host reachability has not been verified locally */ + + /* if no ES peer is claiming reachability we can't advertise the + * entry + */ + if (!CHECK_FLAG(mac_flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + return 0; + + /* ES peers are claiming reachability; we will + * advertise the entry but with a proxy flag + */ + SET_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT); + } + if (CHECK_FLAG(mac_flags, ZEBRA_MAC_STICKY)) SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); if (CHECK_FLAG(mac_flags, ZEBRA_MAC_DEF_GW)) SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); return zvni_macip_send_msg_to_client(vni, macaddr, NULL, flags, - seq, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_ADD); + seq, ZEBRA_NEIGH_ACTIVE, es, + ZEBRA_MACIP_ADD); } /* * Inform BGP about local MAC deletion. */ -static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr) +static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr, + uint32_t flags, bool force) { - return zvni_macip_send_msg_to_client(vni, macaddr, NULL, 0 /* flags */, - 0 /* seq */, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_DEL); -} - -struct zvni_from_svi_param { - struct interface *br_if; - struct interface *svi_if; - struct zebra_if *zif; - uint8_t bridge_vlan_aware; - vlanid_t vid; -}; - -static int zvni_map_vlan_ns(struct ns *ns, - void *_in_param, - void **_p_zvni) -{ - struct zebra_ns *zns = ns->info; - struct route_node *rn; - struct interface *br_if; - zebra_vni_t **p_zvni = (zebra_vni_t **)_p_zvni; - zebra_vni_t *zvni; - struct interface *tmp_if = NULL; - struct zebra_if *zif; - struct zebra_l2info_vxlan *vxl = NULL; - struct zvni_from_svi_param *in_param = - (struct zvni_from_svi_param *)_in_param; - int found = 0; - - if (!in_param) - return NS_WALK_STOP; - br_if = in_param->br_if; - zif = in_param->zif; - assert(zif); - assert(br_if); - - /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ - /* TODO: Optimize with a hash. */ - for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { - tmp_if = (struct interface *)rn->info; - if (!tmp_if) - continue; - zif = tmp_if->info; - if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) - continue; - if (!if_is_operative(tmp_if)) - continue; - vxl = &zif->l2info.vxl; - - if (zif->brslave_info.br_if != br_if) - continue; - - if (!in_param->bridge_vlan_aware - || vxl->access_vlan == in_param->vid) { - found = 1; - break; - } + if (!force) { + if (CHECK_FLAG(flags, ZEBRA_MAC_LOCAL_INACTIVE) && + !CHECK_FLAG(flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + /* the host was not advertised - nothing to delete */ + return 0; } - if (!found) - return NS_WALK_CONTINUE; - zvni = zvni_lookup(vxl->vni); - if (p_zvni) - *p_zvni = zvni; - return NS_WALK_STOP; + return zvni_macip_send_msg_to_client(vni, macaddr, NULL, 0 /* flags */, + 0 /* seq */, ZEBRA_NEIGH_ACTIVE, NULL, + ZEBRA_MACIP_DEL); } /* @@ -3595,51 +3917,25 @@ static int zvni_map_vlan_ns(struct ns *ns, static zebra_vni_t *zvni_map_vlan(struct interface *ifp, struct interface *br_if, vlanid_t vid) { - struct zebra_if *zif; - struct zebra_l2info_bridge *br; - zebra_vni_t **p_zvni; - zebra_vni_t *zvni = NULL; - struct zvni_from_svi_param in_param; - - /* Determine if bridge is VLAN-aware or not */ - zif = br_if->info; - assert(zif); - br = &zif->l2info.br; - in_param.bridge_vlan_aware = br->vlan_aware; - in_param.vid = vid; - in_param.br_if = br_if; - in_param.zif = zif; - p_zvni = &zvni; - - ns_walk_func(zvni_map_vlan_ns, - (void *)&in_param, - (void **)p_zvni); - return zvni; -} - -static int zvni_from_svi_ns(struct ns *ns, - void *_in_param, - void **_p_zvni) -{ - struct zebra_ns *zns = ns->info; + struct zebra_ns *zns; struct route_node *rn; - struct interface *br_if; - zebra_vni_t **p_zvni = (zebra_vni_t **)_p_zvni; - zebra_vni_t *zvni; struct interface *tmp_if = NULL; struct zebra_if *zif; + struct zebra_l2info_bridge *br; struct zebra_l2info_vxlan *vxl = NULL; - struct zvni_from_svi_param *in_param = - (struct zvni_from_svi_param *)_in_param; + uint8_t bridge_vlan_aware; + zebra_vni_t *zvni; int found = 0; - if (!in_param) - return NS_WALK_STOP; - br_if = in_param->br_if; - zif = in_param->zif; + /* Determine if bridge is VLAN-aware or not */ + zif = br_if->info; assert(zif); + br = &zif->l2info.br; + bridge_vlan_aware = br->vlan_aware; + /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ /* TODO: Optimize with a hash. */ + zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { tmp_if = (struct interface *)rn->info; if (!tmp_if) @@ -3654,20 +3950,17 @@ static int zvni_from_svi_ns(struct ns *ns, if (zif->brslave_info.br_if != br_if) continue; - if (!in_param->bridge_vlan_aware - || vxl->access_vlan == !in_param->vid) { + if (!bridge_vlan_aware || vxl->access_vlan == vid) { found = 1; break; } } if (!found) - return NS_WALK_CONTINUE; + return NULL; zvni = zvni_lookup(vxl->vni); - if (p_zvni) - *p_zvni = zvni; - return NS_WALK_STOP; + return zvni; } /* @@ -3677,11 +3970,16 @@ static int zvni_from_svi_ns(struct ns *ns, static zebra_vni_t *zvni_from_svi(struct interface *ifp, struct interface *br_if) { - struct zebra_l2info_bridge *br; - zebra_vni_t *zvni = NULL; - zebra_vni_t **p_zvni; + struct zebra_ns *zns; + struct route_node *rn; + struct interface *tmp_if = NULL; struct zebra_if *zif; - struct zvni_from_svi_param in_param; + struct zebra_l2info_bridge *br; + struct zebra_l2info_vxlan *vxl = NULL; + uint8_t bridge_vlan_aware; + vlanid_t vid = 0; + zebra_vni_t *zvni; + int found = 0; if (!br_if) return NULL; @@ -3694,10 +3992,8 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp, zif = br_if->info; assert(zif); br = &zif->l2info.br; - in_param.bridge_vlan_aware = br->vlan_aware; - in_param.vid = 0; - - if (in_param.bridge_vlan_aware) { + bridge_vlan_aware = br->vlan_aware; + if (bridge_vlan_aware) { struct zebra_l2info_vlan *vl; if (!IS_ZEBRA_IF_VLAN(ifp)) @@ -3706,54 +4002,37 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp, zif = ifp->info; assert(zif); vl = &zif->l2info.vl; - in_param.vid = vl->vid; + vid = vl->vid; } - in_param.br_if = br_if; - in_param.zif = zif; - p_zvni = &zvni; /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ - ns_walk_func(zvni_from_svi_ns, - (void *)&in_param, - (void **)p_zvni); - return zvni; -} - -static int zvni_map_to_svi_ns(struct ns *ns, - void *_in_param, - void **_p_ifp) -{ - struct zebra_ns *zns = ns->info; - struct route_node *rn; - struct zvni_from_svi_param *in_param = - (struct zvni_from_svi_param *)_in_param; - struct zebra_l2info_vlan *vl; - struct interface *tmp_if = NULL; - struct interface **p_ifp = (struct interface **)_p_ifp; - struct zebra_if *zif; - - if (!in_param) - return NS_WALK_STOP; - /* TODO: Optimize with a hash. */ + zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { tmp_if = (struct interface *)rn->info; - /* Check oper status of the SVI. */ - if (!tmp_if || !if_is_operative(tmp_if)) + if (!tmp_if) continue; zif = tmp_if->info; - if (!zif || zif->zif_type != ZEBRA_IF_VLAN - || zif->link != in_param->br_if) + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + if (!if_is_operative(tmp_if)) + continue; + vxl = &zif->l2info.vxl; + + if (zif->brslave_info.br_if != br_if) continue; - vl = (struct zebra_l2info_vlan *)&zif->l2info.vl; - if (vl->vid == in_param->vid) { - if (p_ifp) - *p_ifp = tmp_if; - return NS_WALK_STOP; + if (!bridge_vlan_aware || vxl->access_vlan == vid) { + found = 1; + break; } } - return NS_WALK_CONTINUE; + + if (!found) + return NULL; + + zvni = zvni_lookup(vxl->vni); + return zvni; } /* Map to SVI on bridge corresponding to specified VLAN. This can be one @@ -3765,11 +4044,15 @@ static int zvni_map_to_svi_ns(struct ns *ns, */ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) { + struct zebra_ns *zns; + struct route_node *rn; struct interface *tmp_if = NULL; struct zebra_if *zif; struct zebra_l2info_bridge *br; - struct zvni_from_svi_param in_param; - struct interface **p_ifp; + struct zebra_l2info_vlan *vl; + uint8_t bridge_vlan_aware; + int found = 0; + /* Defensive check, caller expected to invoke only with valid bridge. */ if (!br_if) return NULL; @@ -3778,56 +4061,33 @@ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) zif = br_if->info; assert(zif); br = &zif->l2info.br; - in_param.bridge_vlan_aware = br->vlan_aware; + bridge_vlan_aware = br->vlan_aware; + /* Check oper status of the SVI. */ - if (!in_param.bridge_vlan_aware) + if (!bridge_vlan_aware) return if_is_operative(br_if) ? br_if : NULL; - in_param.vid = vid; - in_param.br_if = br_if; - in_param.zif = NULL; - p_ifp = &tmp_if; - /* Identify corresponding VLAN interface. */ - ns_walk_func(zvni_map_to_svi_ns, - (void *)&in_param, - (void **)p_ifp); - return tmp_if; -} - -static int zvni_map_to_macvlan_ns(struct ns *ns, - void *_in_param, - void **_p_ifp) -{ - struct zebra_ns *zns = ns->info; - struct zvni_from_svi_param *in_param = - (struct zvni_from_svi_param *)_in_param; - struct interface **p_ifp = (struct interface **)_p_ifp; - struct route_node *rn; - struct interface *tmp_if = NULL; - struct zebra_if *zif; - - if (!in_param) - return NS_WALK_STOP; - /* Identify corresponding VLAN interface. */ + /* TODO: Optimize with a hash. */ + zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { tmp_if = (struct interface *)rn->info; /* Check oper status of the SVI. */ if (!tmp_if || !if_is_operative(tmp_if)) continue; zif = tmp_if->info; - - if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN) + if (!zif || zif->zif_type != ZEBRA_IF_VLAN + || zif->link != br_if) continue; + vl = &zif->l2info.vl; - if (zif->link == in_param->svi_if) { - if (p_ifp) - *p_ifp = tmp_if; - return NS_WALK_STOP; + if (vl->vid == vid) { + found = 1; + break; } } - return NS_WALK_CONTINUE; + return found ? tmp_if : NULL; } /* Map to MAC-VLAN interface corresponding to specified SVI interface. @@ -3835,10 +4095,11 @@ static int zvni_map_to_macvlan_ns(struct ns *ns, static struct interface *zvni_map_to_macvlan(struct interface *br_if, struct interface *svi_if) { + struct zebra_ns *zns; + struct route_node *rn; struct interface *tmp_if = NULL; struct zebra_if *zif; - struct interface **p_ifp; - struct zvni_from_svi_param in_param; + int found = 0; /* Defensive check, caller expected to invoke only with valid bridge. */ if (!br_if) @@ -3853,23 +4114,33 @@ static struct interface *zvni_map_to_macvlan(struct interface *br_if, zif = br_if->info; assert(zif); - in_param.vid = 0; - in_param.br_if = br_if; - in_param.zif = NULL; - in_param.svi_if = svi_if; - p_ifp = &tmp_if; - /* Identify corresponding VLAN interface. */ - ns_walk_func(zvni_map_to_macvlan_ns, - (void *)&in_param, - (void **)p_ifp); - return tmp_if; + zns = zebra_ns_lookup(NS_DEFAULT); + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + /* Check oper status of the SVI. */ + if (!tmp_if || !if_is_operative(tmp_if)) + continue; + zif = tmp_if->info; + + if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN) + continue; + + if (zif->link == svi_if) { + found = 1; + break; + } + } + + return found ? tmp_if : NULL; } + /* * Install remote MAC into the forwarding plane. */ -static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac) +static int zvni_rem_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac, + bool was_static) { const struct zebra_if *zif, *br_zif; const struct zebra_l2info_vxlan *vxl; @@ -3877,6 +4148,8 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac) enum zebra_dplane_result res; const struct interface *br_ifp; vlanid_t vid; + uint32_t nhg_id; + struct in_addr vtep_ip; if (!(mac->flags & ZEBRA_MAC_REMOTE)) return 0; @@ -3894,6 +4167,19 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac) sticky = !!CHECK_FLAG(mac->flags, (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)); + /* If nexthop group for the FDB entry is inactive (not programmed in + * the dataplane) the MAC entry cannot be installed + */ + if (mac->es) { + if (!(mac->es->flags & ZEBRA_EVPNES_NHG_ACTIVE)) + return -1; + nhg_id = mac->es->nhg_id; + vtep_ip.s_addr = 0; + } else { + nhg_id = 0; + vtep_ip = mac->fwd_info.r_vtep_ip; + } + br_zif = (const struct zebra_if *)(br_ifp->info); if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) @@ -3901,8 +4187,9 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac) else vid = 0; - res = dplane_mac_add(zvni->vxlan_if, br_ifp, vid, - &mac->macaddr, mac->fwd_info.r_vtep_ip, sticky); + res = dplane_rem_mac_add(zvni->vxlan_if, br_ifp, vid, + &mac->macaddr, vtep_ip, sticky, + nhg_id, was_static); if (res != ZEBRA_DPLANE_REQUEST_FAILURE) return 0; else @@ -3912,7 +4199,7 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac) /* * Uninstall remote MAC from the forwarding plane. */ -static int zvni_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac) +static int zvni_rem_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac) { const struct zebra_if *zif, *br_zif; const struct zebra_l2info_vxlan *vxl; @@ -3951,7 +4238,7 @@ static int zvni_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac) ifp = zvni->vxlan_if; vtep_ip = mac->fwd_info.r_vtep_ip; - res = dplane_mac_del(ifp, br_ifp, vid, &mac->macaddr, vtep_ip); + res = dplane_rem_mac_del(ifp, br_ifp, vid, &mac->macaddr, vtep_ip); if (res != ZEBRA_DPLANE_REQUEST_FAILURE) return 0; else @@ -3969,7 +4256,7 @@ static void zvni_install_mac_hash(struct hash_bucket *bucket, void *ctxt) mac = (zebra_mac_t *)bucket->data; if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) - zvni_mac_install(wctx->zvni, mac); + zvni_rem_mac_install(wctx->zvni, mac, false); } /* @@ -4003,7 +4290,8 @@ static void zvni_deref_ip2mac(zebra_vni_t *zvni, zebra_mac_t *mac) */ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) && remote_neigh_count(mac) == 0) { - zvni_mac_uninstall(zvni, mac); + zvni_rem_mac_uninstall(zvni, mac); + zebra_evpn_es_mac_deref_entry(mac); UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); } @@ -4018,7 +4306,6 @@ static void zvni_deref_ip2mac(zebra_vni_t *zvni, zebra_mac_t *mac) static void zvni_read_mac_neigh(zebra_vni_t *zvni, struct interface *ifp) { struct zebra_ns *zns; - struct zebra_vrf *zvrf; struct zebra_if *zif; struct interface *vlan_if; struct zebra_l2info_vxlan *vxl; @@ -4026,10 +4313,7 @@ static void zvni_read_mac_neigh(zebra_vni_t *zvni, struct interface *ifp) zif = ifp->info; vxl = &zif->l2info.vxl; - zvrf = zebra_vrf_lookup_by_id(zvni->vrf_id); - if (!zvrf || !zvrf->zns) - return; - zns = zvrf->zns; + zns = zebra_ns_lookup(NS_DEFAULT); if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( @@ -4074,7 +4358,7 @@ static bool vni_hash_cmp(const void *p1, const void *p2) return (zvni1->vni == zvni2->vni); } -static int vni_list_cmp(void *p1, void *p2) +int vni_list_cmp(void *p1, void *p2) { const zebra_vni_t *zvni1 = p1; const zebra_vni_t *zvni2 = p2; @@ -4100,7 +4384,7 @@ static void *zvni_alloc(void *p) /* * Look up VNI hash entry. */ -static zebra_vni_t *zvni_lookup(vni_t vni) +zebra_vni_t *zvni_lookup(vni_t vni) { struct zebra_vrf *zvrf; zebra_vni_t tmp_vni; @@ -4131,6 +4415,8 @@ static zebra_vni_t *zvni_add(vni_t vni) zvni = hash_get(zvrf->vni_table, &tmp_zvni, zvni_alloc); assert(zvni); + zebra_evpn_vni_es_init(zvni); + /* Create hash table for MAC */ zvni->mac_table = hash_create(mac_hash_keymake, mac_cmp, "Zebra VNI MAC Table"); @@ -4142,6 +4428,30 @@ static zebra_vni_t *zvni_add(vni_t vni) return zvni; } +/* vni<=>vxlan_zif association */ +static void zvni_vxlan_if_set(zebra_vni_t *zvni, struct interface *ifp, + bool set) +{ + struct zebra_if *zif; + + if (set) { + if (zvni->vxlan_if == ifp) + return; + zvni->vxlan_if = ifp; + } else { + if (!zvni->vxlan_if) + return; + zvni->vxlan_if = NULL; + } + + if (ifp) + zif = ifp->info; + else + zif = NULL; + + zebra_evpn_vxl_vni_set(zif, zvni, set); +} + /* * Delete VNI hash entry. */ @@ -4153,7 +4463,7 @@ static int zvni_del(zebra_vni_t *zvni) zvrf = zebra_vrf_get_evpn(); assert(zvrf); - zvni->vxlan_if = NULL; + zvni_vxlan_if_set(zvni, zvni->vxlan_if, false /* set */); /* Remove references to the BUM mcast grp */ zebra_vxlan_sg_deref(zvni->local_vtep_ip, zvni->mcast_grp); @@ -4166,6 +4476,8 @@ static int zvni_del(zebra_vni_t *zvni) hash_free(zvni->mac_table); zvni->mac_table = NULL; + zebra_evpn_vni_es_cleanup(zvni); + /* Free the VNI hash entry and allocated memory. */ tmp_zvni = hash_release(zvrf->vni_table, zvni); XFREE(MTYPE_ZVNI, tmp_zvni); @@ -4180,6 +4492,7 @@ static int zvni_send_add_to_client(zebra_vni_t *zvni) { struct zserv *client; struct stream *s; + int rc; client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); /* BGP may not be running. */ @@ -4204,13 +4517,22 @@ static int zvni_send_add_to_client(zebra_vni_t *zvni) zebra_route_string(client->proto)); client->vniadd_cnt++; - return zserv_send_message(client, s); + rc = zserv_send_message(client, s); + + if (!(zvni->flags & ZVNI_READY_FOR_BGP)) { + zvni->flags |= ZVNI_READY_FOR_BGP; + /* once the VNI is sent the ES-EVIs can also be replayed + * to BGP + */ + zebra_evpn_vni_update_all_es(zvni); + } + return rc; } /* * Inform BGP about local VNI deletion. */ -static int zvni_send_del_to_client(vni_t vni) +static int zvni_send_del_to_client(zebra_vni_t *zvni) { struct zserv *client; struct stream *s; @@ -4220,38 +4542,41 @@ static int zvni_send_del_to_client(vni_t vni) if (!client) return 0; + if (zvni->flags & ZVNI_READY_FOR_BGP) { + zvni->flags &= ~ZVNI_READY_FOR_BGP; + /* the ES-EVIs must be removed from BGP before the VNI is */ + zebra_evpn_vni_update_all_es(zvni); + } + s = stream_new(ZEBRA_MAX_PACKET_SIZ); stream_reset(s); zclient_create_header(s, ZEBRA_VNI_DEL, zebra_vrf_get_evpn_id()); - stream_putl(s, vni); + stream_putl(s, zvni->vni); /* Write packet size. */ stream_putw_at(s, 0, stream_get_endp(s)); if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug("Send VNI_DEL %u to %s", vni, + zlog_debug("Send VNI_DEL %u to %s", zvni->vni, zebra_route_string(client->proto)); client->vnidel_cnt++; return zserv_send_message(client, s); } -static int zvni_build_hash_table_ns(struct ns *ns, - void *param_in __attribute__((unused)), - void **param_out __attribute__((unused))) +/* + * Build the VNI hash table by going over the VxLAN interfaces. This + * is called when EVPN (advertise-all-vni) is enabled. + */ +static void zvni_build_hash_table(void) { - struct zebra_ns *zns = ns->info; + struct zebra_ns *zns; struct route_node *rn; struct interface *ifp; - struct zebra_vrf *zvrf; - - zvrf = zebra_vrf_get_evpn(); - - if (!zvrf) - return NS_WALK_STOP; /* Walk VxLAN interfaces and create VNI hash. */ + zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { vni_t vni; zebra_vni_t *zvni = NULL; @@ -4268,14 +4593,7 @@ static int zvni_build_hash_table_ns(struct ns *ns, vxl = &zif->l2info.vxl; vni = vxl->vni; - /* link of VXLAN interface should be in zebra_evpn_vrf */ - if (zvrf->zns->ns_id != vxl->link_nsid) { - if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug( - "Intf %s(%u) VNI %u, link not in same namespace than BGP EVPN core instance ", - ifp->name, ifp->ifindex, vni); - continue; - } + /* L3-VNI and L2-VNI are handled seperately */ zl3vni = zl3vni_lookup(vni); if (zl3vni) { @@ -4344,7 +4662,7 @@ static int zvni_build_hash_table_ns(struct ns *ns, zlog_debug( "Failed to add VNI hash, IF %s(%u) L2-VNI %u", ifp->name, ifp->ifindex, vni); - return NS_WALK_CONTINUE; + return; } if (zvni->local_vtep_ip.s_addr != @@ -4358,8 +4676,12 @@ static int zvni_build_hash_table_ns(struct ns *ns, vxl->mcast_grp); zvni->local_vtep_ip = vxl->vtep_ip; zvni->mcast_grp = vxl->mcast_grp; + /* on local vtep-ip check if ES + * orig-ip needs to be updated + */ + zebra_evpn_es_set_base_vni(zvni); } - zvni->vxlan_if = ifp; + zvni_vxlan_if_set(zvni, ifp, true /* set */); vlan_if = zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if); if (vlan_if) { @@ -4381,19 +4703,6 @@ static int zvni_build_hash_table_ns(struct ns *ns, } } } - return NS_WALK_CONTINUE; -} - -/* - * Build the VNI hash table by going over the VxLAN interfaces. This - * is called when EVPN (advertise-all-vni) is enabled. - */ - -static void zvni_build_hash_table(void) -{ - ns_walk_func(zvni_build_hash_table_ns, - (void *)NULL, - (void **)NULL); } /* @@ -4719,8 +5028,9 @@ static int zl3vni_rmac_install(zebra_l3vni_t *zl3vni, zebra_mac_t *zrmac) else vid = 0; - res = dplane_mac_add(zl3vni->vxlan_if, br_ifp, vid, - &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, 0); + res = dplane_rem_mac_add(zl3vni->vxlan_if, br_ifp, vid, + &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, 0, 0, + false /*was_static*/); if (res != ZEBRA_DPLANE_REQUEST_FAILURE) return 0; else @@ -4769,7 +5079,7 @@ static int zl3vni_rmac_uninstall(zebra_l3vni_t *zl3vni, zebra_mac_t *zrmac) else vid = 0; - res = dplane_mac_del(zl3vni->vxlan_if, br_ifp, vid, + res = dplane_rem_mac_del(zl3vni->vxlan_if, br_ifp, vid, &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip); if (res != ZEBRA_DPLANE_REQUEST_FAILURE) return 0; @@ -4948,7 +5258,8 @@ static int zl3vni_nh_install(zebra_l3vni_t *zl3vni, zebra_neigh_t *n) if (n->flags & ZEBRA_NEIGH_ROUTER_FLAG) flags |= DPLANE_NTF_ROUTER; - dplane_neigh_add(zl3vni->svi_if, &n->ip, &n->emac, flags); + dplane_rem_neigh_add(zl3vni->svi_if, &n->ip, &n->emac, flags, + false /*was_static*/); return ret; } @@ -4965,7 +5276,7 @@ static int zl3vni_nh_uninstall(zebra_l3vni_t *zl3vni, zebra_neigh_t *n) if (!zl3vni->svi_if || !if_is_operative(zl3vni->svi_if)) return 0; - dplane_neigh_delete(zl3vni->svi_if, &n->ip); + dplane_rem_neigh_delete(zl3vni->svi_if, &n->ip); return 0; } @@ -5178,22 +5489,14 @@ static int zl3vni_del(zebra_l3vni_t *zl3vni) return 0; } -static int zl3vni_map_to_vxlan_if_ns(struct ns *ns, - void *_zl3vni, - void **_pifp) +struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) { - struct zebra_ns *zns = ns->info; - zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)_zl3vni; + struct zebra_ns *zns = NULL; struct route_node *rn = NULL; struct interface *ifp = NULL; - struct zebra_vrf *zvrf; - - zvrf = zebra_vrf_get_evpn(); - - if (!zvrf) - return NS_WALK_STOP; /* loop through all vxlan-interface */ + zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { struct zebra_if *zif = NULL; @@ -5208,38 +5511,13 @@ static int zl3vni_map_to_vxlan_if_ns(struct ns *ns, continue; vxl = &zif->l2info.vxl; - if (vxl->vni != zl3vni->vni) - continue; - - /* link of VXLAN interface should be in zebra_evpn_vrf */ - if (zvrf->zns->ns_id != vxl->link_nsid) { - if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug( - "Intf %s(%u) VNI %u, link not in same namespace than BGP EVPN core instance ", - ifp->name, ifp->ifindex, vxl->vni); - continue; + if (vxl->vni == zl3vni->vni) { + zl3vni->local_vtep_ip = vxl->vtep_ip; + return ifp; } - - - zl3vni->local_vtep_ip = vxl->vtep_ip; - if (_pifp) - *_pifp = (void *)ifp; - return NS_WALK_STOP; } - return NS_WALK_CONTINUE; -} - -struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) -{ - struct interface **p_ifp; - struct interface *ifp = NULL; - - p_ifp = &ifp; - - ns_walk_func(zl3vni_map_to_vxlan_if_ns, - (void *)zl3vni, (void **)p_ifp); - return ifp; + return NULL; } struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni) @@ -5524,7 +5802,7 @@ static int zebra_vxlan_handle_vni_transition(struct zebra_vrf *zvrf, vni_t vni, zlog_debug("Del L2-VNI %u - transition to L3-VNI", vni); /* Delete VNI from BGP. */ - zvni_send_del_to_client(zvni->vni); + zvni_send_del_to_client(zvni); /* Free up all neighbors and MAC, if any. */ zvni_neigh_del_all(zvni, 0, 0, DEL_ALL_NEIGH); @@ -5632,6 +5910,1167 @@ static int zebra_vxlan_readd_remote_rmac(zebra_l3vni_t *zl3vni, return 0; } +/**************************** SYNC MAC handling *****************************/ +/* if the mac has been added of a mac-route from the peer + * or if it is being referenced by a neigh added by the + * peer we cannot let it age out i.e. we set the static bit + * in the dataplane + */ +static inline bool zebra_vxlan_mac_is_static(zebra_mac_t *mac) +{ + return ((mac->flags & ZEBRA_MAC_ALL_PEER_FLAGS) || + mac->sync_neigh_cnt); +} + +/* mac needs to be locally active or active on an ES peer */ +static inline bool zebra_vxlan_mac_is_ready_for_bgp(uint32_t flags) +{ + return (flags & ZEBRA_MAC_LOCAL) && + (!(flags & ZEBRA_MAC_LOCAL_INACTIVE) || + (flags & ZEBRA_MAC_ES_PEER_ACTIVE)); +} + +/* program sync mac flags in the dataplane */ +void zebra_vxlan_sync_mac_dp_install(zebra_mac_t *mac, bool set_inactive, + bool force_clear_static, const char *caller) +{ + char macbuf[ETHER_ADDR_STRLEN]; + struct interface *ifp; + bool sticky; + bool set_static; + zebra_vni_t *zvni = mac->zvni; + vlanid_t vid; + struct zebra_if *zif; + struct interface *br_ifp; + + /* get the access vlan from the vxlan_device */ + zebra_vxlan_mac_get_access_info(mac, + &ifp, &vid); + + if (!ifp) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("%s: dp-install sync-mac vni %u mac %s es %s 0x%x %sskipped, no access-port", + caller, + zvni->vni, + prefix_mac2str(&mac->macaddr, macbuf, + sizeof(macbuf)), + mac->es ? + mac->es->esi_str : "-", + mac->flags, + set_inactive ? "inactive " : ""); + return; + } + + zif = ifp->info; + br_ifp = zif->brslave_info.br_if; + if (!br_ifp) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("%s: dp-install sync-mac vni %u mac %s es %s 0x%x %sskipped, no br", + caller, + zvni->vni, + prefix_mac2str(&mac->macaddr, macbuf, + sizeof(macbuf)), + mac->es ? + mac->es->esi_str : "-", + mac->flags, + set_inactive ? "inactive " : ""); + return; + } + + sticky = !!CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY); + if (force_clear_static) + set_static = false; + else + set_static = zebra_vxlan_mac_is_static(mac); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("dp-install sync-mac vni %u mac %s es %s 0x%x %s%s", + zvni->vni, + prefix_mac2str(&mac->macaddr, macbuf, + sizeof(macbuf)), + mac->es ? + mac->es->esi_str : "-", mac->flags, + set_static ? "static " : "", + set_inactive ? "inactive " : ""); + + dplane_local_mac_add(ifp, br_ifp, vid, &mac->macaddr, sticky, + set_static, set_inactive); + +} + +static void zebra_vxlan_mac_send_add_del_to_client(zebra_mac_t *mac, + bool old_bgp_ready, bool new_bgp_ready) +{ + if (new_bgp_ready) + zvni_mac_send_add_to_client(mac->zvni->vni, + &mac->macaddr, mac->flags, + mac->loc_seq, mac->es); + else if (old_bgp_ready) + zvni_mac_send_del_to_client(mac->zvni->vni, + &mac->macaddr, mac->flags, + true /* force */); +} + +/* MAC hold timer is used to age out peer-active flag. + * + * During this wait time we expect the dataplane component or an + * external neighmgr daemon to probe existing hosts to independently + * establish their presence on the ES. + */ +static int zebra_vxlan_mac_hold_exp_cb(struct thread *t) +{ + zebra_mac_t *mac; + bool old_bgp_ready; + bool new_bgp_ready; + bool old_static; + bool new_static; + char macbuf[ETHER_ADDR_STRLEN]; + + mac = THREAD_ARG(t); + /* the purpose of the hold timer is to age out the peer-active + * flag + */ + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + return 0; + + old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags); + old_static = zebra_vxlan_mac_is_static(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE); + new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags); + new_static = zebra_vxlan_mac_is_static(mac); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-mac vni %u mac %s es %s 0x%x hold expired", + mac->zvni->vni, + prefix_mac2str(&mac->macaddr, macbuf, + sizeof(macbuf)), + mac->es ? + mac->es->esi_str : "-", + mac->flags); + + /* re-program the local mac in the dataplane if the mac is no + * longer static + */ + if (old_static != new_static) + zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, __func__); + + /* inform bgp if needed */ + if (old_bgp_ready != new_bgp_ready) + zebra_vxlan_mac_send_add_del_to_client(mac, + old_bgp_ready, new_bgp_ready); + + return 0; +} + +static inline void zebra_vxlan_mac_start_hold_timer(zebra_mac_t *mac) +{ + char macbuf[ETHER_ADDR_STRLEN]; + + if (mac->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-mac vni %u mac %s es %s 0x%x hold started", + mac->zvni->vni, + prefix_mac2str(&mac->macaddr, macbuf, + sizeof(macbuf)), + mac->es ? + mac->es->esi_str : "-", + mac->flags); + thread_add_timer(zrouter.master, + zebra_vxlan_mac_hold_exp_cb, + mac, zmh_info->mac_hold_time, + &mac->hold_timer); +} + +static inline void zebra_vxlan_mac_stop_hold_timer(zebra_mac_t *mac) +{ + char macbuf[ETHER_ADDR_STRLEN]; + + if (!mac->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-mac vni %u mac %s es %s 0x%x hold stopped", + mac->zvni->vni, + prefix_mac2str(&mac->macaddr, macbuf, + sizeof(macbuf)), + mac->es ? + mac->es->esi_str : "-", + mac->flags); + THREAD_OFF(mac->hold_timer); +} + +static inline void zebra_vxlan_mac_clear_sync_info(zebra_mac_t *mac) +{ + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_PEER_FLAGS); + zebra_vxlan_mac_stop_hold_timer(mac); +} + +static void zebra_vxlan_sync_mac_del(zebra_mac_t *mac) +{ + char macbuf[ETHER_ADDR_STRLEN]; + bool old_static; + bool new_static; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-mac del vni %u mac %s es %s seq %d f 0x%x", + mac->zvni->vni, + prefix_mac2str(&mac->macaddr, + macbuf, sizeof(macbuf)), + mac->es ? mac->es->esi_str : "-", + mac->loc_seq, + mac->flags); + old_static = zebra_vxlan_mac_is_static(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) + zebra_vxlan_mac_start_hold_timer(mac); + new_static = zebra_vxlan_mac_is_static(mac); + + if (old_static != new_static) + /* program the local mac in the kernel */ + zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, __func__); +} + +static inline bool zebra_vxlan_mac_is_bgp_seq_ok(zebra_vni_t *zvni, + zebra_mac_t *mac, uint32_t seq, uint16_t ipa_len, + struct ipaddr *ipaddr) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + uint32_t tmp_seq; + + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + tmp_seq = mac->loc_seq; + else + tmp_seq = mac->rem_seq; + + if (seq < tmp_seq) { + /* if the mac was never advertised to bgp we must accept + * whatever sequence number bgp sends + * XXX - check with Vivek + */ + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) && + !zebra_vxlan_mac_is_ready_for_bgp(mac->flags)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-macip accept vni %u mac %s%s%s lower seq %u f 0x%x", + zvni->vni, + prefix_mac2str(&mac->macaddr, + macbuf, sizeof(macbuf)), + ipa_len ? " IP " : "", + ipa_len ? + ipaddr2str(ipaddr, + ipbuf, sizeof(ipbuf)) : "", + tmp_seq, mac->flags); + return true; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-macip ignore vni %u mac %s%s%s as existing has higher seq %u f 0x%x", + zvni->vni, + prefix_mac2str(&mac->macaddr, + macbuf, sizeof(macbuf)), + ipa_len ? " IP " : "", + ipa_len ? + ipaddr2str(ipaddr, + ipbuf, sizeof(ipbuf)) : "", + tmp_seq, mac->flags); + return false; + } + + return true; +} + +/* sync-path that is active on an ES peer */ +static zebra_mac_t *zebra_vxlan_proc_sync_mac_update(zebra_vni_t *zvni, + struct ethaddr *macaddr, uint16_t ipa_len, + struct ipaddr *ipaddr, uint8_t flags, + uint32_t seq, esi_t *esi, + struct sync_mac_ip_ctx *ctx) +{ + zebra_mac_t *mac; + bool inform_bgp = false; + bool inform_dataplane = false; + bool seq_change = false; + bool es_change = false; + uint32_t tmp_seq; + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + bool old_local = false; + bool old_bgp_ready; + bool new_bgp_ready; + + mac = zvni_mac_lookup(zvni, macaddr); + if (!mac) { + /* if it is a new local path we need to inform both + * the control protocol and the data-plane + */ + inform_bgp = true; + inform_dataplane = true; + ctx->mac_created = true; + ctx->mac_inactive = true; + + /* create the MAC and associate it with the dest ES */ + mac = zvni_mac_add(zvni, macaddr); + zebra_evpn_es_mac_ref(mac, esi); + + /* local mac activated by an ES peer */ + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + /* if mac-only route setup peer flags */ + if (!ipa_len) { + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) + SET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY); + else + SET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE); + } + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + old_bgp_ready = false; + new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags); + } else { + uint32_t old_flags; + uint32_t new_flags; + bool old_static; + bool new_static; + bool sticky; + bool remote_gw; + + old_flags = mac->flags; + sticky = !!CHECK_FLAG(old_flags, ZEBRA_MAC_STICKY); + remote_gw = !!CHECK_FLAG(old_flags, ZEBRA_MAC_REMOTE_DEF_GW); + if (sticky || remote_gw) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("Ignore sync-macip vni %u mac %s%s%s%s%s", + zvni->vni, + prefix_mac2str(macaddr, + macbuf, sizeof(macbuf)), + ipa_len ? " IP " : "", + ipa_len ? + ipaddr2str(ipaddr, ipbuf, + sizeof(ipbuf)) : "", + sticky ? " sticky" : "", + remote_gw ? " remote_gw" : ""); + ctx->ignore_macip = true; + return NULL; + } + if (!zebra_vxlan_mac_is_bgp_seq_ok(zvni, mac, seq, + ipa_len, ipaddr)) { + ctx->ignore_macip = true; + return NULL; + } + + old_local = !!CHECK_FLAG(old_flags, ZEBRA_MAC_LOCAL); + old_static = zebra_vxlan_mac_is_static(mac); + + /* re-build the mac flags */ + new_flags = 0; + SET_FLAG(new_flags, ZEBRA_MAC_LOCAL); + /* retain old local activity flag */ + if (old_flags & ZEBRA_MAC_LOCAL) { + new_flags |= (old_flags & ZEBRA_MAC_LOCAL_INACTIVE); + } else { + new_flags |= ZEBRA_MAC_LOCAL_INACTIVE; + ctx->mac_inactive = true; + } + if (ipa_len) { + /* if mac-ip route do NOT update the peer flags + * i.e. retain only flags as is + */ + new_flags |= (old_flags & ZEBRA_MAC_ALL_PEER_FLAGS); + } else { + /* if mac-only route update peer flags */ + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) { + SET_FLAG(new_flags, ZEBRA_MAC_ES_PEER_PROXY); + /* if the mac was peer-active previously we + * need to keep the flag and start the + * holdtimer on it. the peer-active flag is + * cleared on holdtimer expiry. + */ + if (CHECK_FLAG(old_flags, + ZEBRA_MAC_ES_PEER_ACTIVE)) { + SET_FLAG(new_flags, + ZEBRA_MAC_ES_PEER_ACTIVE); + zebra_vxlan_mac_start_hold_timer(mac); + } + } else { + SET_FLAG(new_flags, ZEBRA_MAC_ES_PEER_ACTIVE); + /* stop hold timer if a peer has verified + * reachability + */ + zebra_vxlan_mac_stop_hold_timer(mac); + } + } + mac->rem_seq = 0; + memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); + mac->flags = new_flags; + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC && + (old_flags != new_flags)) + zlog_debug("sync-mac vni %u mac %s old_f 0x%x new_f 0x%x", + zvni->vni, + prefix_mac2str(macaddr, + macbuf, sizeof(macbuf)), + old_flags, mac->flags); + + /* update es */ + es_change = zebra_evpn_es_mac_ref(mac, esi); + /* if mac dest change - inform both sides */ + if (es_change) { + inform_bgp = true; + inform_dataplane = true; + ctx->mac_inactive = true; + } + /* if peer-flag is being set notify dataplane that the + * entry must not be expired because of local inactivity + */ + new_static = zebra_vxlan_mac_is_static(mac); + if (old_static != new_static) + inform_dataplane = true; + + old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(old_flags); + new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags); + if (old_bgp_ready != new_bgp_ready) + inform_bgp = true; + } + + + /* update sequence number; if that results in a new local sequence + * inform bgp + */ + tmp_seq = MAX(mac->loc_seq, seq); + if (tmp_seq != mac->loc_seq) { + mac->loc_seq = tmp_seq; + seq_change = true; + inform_bgp = true; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-mac %s vni %u mac %s es %s seq %d f 0x%x%s%s", + ctx->mac_created ? + "created" : "updated", + zvni->vni, + prefix_mac2str(macaddr, + macbuf, sizeof(macbuf)), + mac->es ? mac->es->esi_str : "-", + mac->loc_seq, mac->flags, + inform_bgp ? " inform_bgp" : "", + inform_dataplane ? " inform_dp" : ""); + + if (inform_bgp) + zebra_vxlan_mac_send_add_del_to_client(mac, + old_bgp_ready, new_bgp_ready); + + /* neighs using the mac may need to be re-sent to + * bgp with updated info + */ + if (seq_change || es_change || !old_local) + zvni_process_neigh_on_local_mac_change(zvni, mac, + seq_change, es_change); + + if (inform_dataplane) { + if (ipa_len) + /* if the mac is being created as a part of MAC-IP + * route wait for the neigh to be updated or + * created before programming the mac + */ + ctx->mac_dp_update_deferred = true; + else + /* program the local mac in the kernel. when the ES + * change we need to force the dataplane to reset + * the activity as we are yet to establish activity + * locally + */ + zebra_vxlan_sync_mac_dp_install(mac, + ctx->mac_inactive, + false /* force_clear_static */, + __func__); + } + + return mac; +} + +/**************************** SYNC neigh handling **************************/ +static inline bool zebra_vxlan_neigh_is_static(zebra_neigh_t *neigh) +{ + return !!(neigh->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS); +} + +static inline bool zebra_vxlan_neigh_is_ready_for_bgp(zebra_neigh_t *n) +{ + bool mac_ready; + bool neigh_ready; + + mac_ready = !!(n->mac->flags & ZEBRA_MAC_LOCAL); + neigh_ready = ((n->flags & ZEBRA_NEIGH_LOCAL) && + IS_ZEBRA_NEIGH_ACTIVE(n) && + (!(n->flags & ZEBRA_NEIGH_LOCAL_INACTIVE) || + (n->flags & ZEBRA_NEIGH_ES_PEER_ACTIVE))) ? + true : false; + + return mac_ready && neigh_ready; +} + +static void zebra_vxlan_sync_neigh_dp_install(zebra_neigh_t *n, + bool set_inactive, bool force_clear_static, const char *caller) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + struct zebra_ns *zns; + struct interface *ifp; + bool set_static; + bool set_router; + + zns = zebra_ns_lookup(NS_DEFAULT); + ifp = if_lookup_by_index_per_ns(zns, n->ifindex); + if (!ifp) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("%s: dp-install sync-neigh vni %u ip %s mac %s if %d f 0x%x skipped", + caller, n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + n->ifindex, n->flags); + return; + } + + if (force_clear_static) + set_static = false; + else + set_static = zebra_vxlan_neigh_is_static(n); + + set_router = !!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + /* XXX - this will change post integration with the new kernel */ + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) + set_inactive = true; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("%s: dp-install sync-neigh vni %u ip %s mac %s if %s(%d) f 0x%x%s%s%s", + caller, n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + ifp->name, n->ifindex, n->flags, + set_router ? " router":"", + set_static ? " static":"", + set_inactive ? " inactive":""); + dplane_local_neigh_add(ifp, &n->ip, + &n->emac, set_router, set_static, set_inactive); +} + +static void zebra_vxlan_neigh_send_add_del_to_client(zebra_neigh_t *n, + bool old_bgp_ready, bool new_bgp_ready) +{ + if (new_bgp_ready) + zvni_neigh_send_add_to_client(n->zvni->vni, &n->ip, + &n->emac, n->mac, n->flags, n->loc_seq); + else if (old_bgp_ready) + zvni_neigh_send_del_to_client(n->zvni->vni, &n->ip, + &n->emac, n->flags, n->state, true /*force*/); +} + +/* if the static flag associated with the neigh changes we need + * to update the sync-neigh references against the MAC + * and inform the dataplane about the static flag changes. + */ +static void zebra_vxlan_sync_neigh_static_chg(zebra_neigh_t *n, + bool old_n_static, bool new_n_static, + bool defer_n_dp, bool defer_mac_dp, + const char *caller) +{ + zebra_mac_t *mac = n->mac; + bool old_mac_static; + bool new_mac_static; + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + + if (old_n_static == new_n_static) + return; + + /* update the neigh sync references in the dataplane. if + * the neigh is in the middle of updates the caller can + * request for a defer + */ + if (!defer_n_dp) + zebra_vxlan_sync_neigh_dp_install(n, false /* set_inactive */, + false /* force_clear_static */, __func__); + + if (!mac) + return; + + /* update the mac sync ref cnt */ + old_mac_static = zebra_vxlan_mac_is_static(mac); + if (new_n_static) { + ++mac->sync_neigh_cnt; + } else if (old_n_static) { + if (mac->sync_neigh_cnt) + --mac->sync_neigh_cnt; + } + new_mac_static = zebra_vxlan_mac_is_static(mac); + + /* update the mac sync references in the dataplane */ + if ((old_mac_static != new_mac_static) && !defer_mac_dp) + zebra_vxlan_sync_mac_dp_install(mac, + false /* set_inactive */, + false /* force_clear_static */, + __func__); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh ref-chg vni %u ip %s mac %s f 0x%x %d%s%s%s%s by %s", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + n->flags, mac->sync_neigh_cnt, + old_n_static ? " old_n_static" : "", + new_n_static ? " new_n_static" : "", + old_mac_static ? " old_mac_static" : "", + new_mac_static ? " new_mac_static" : "", + caller); +} + +/* Neigh hold timer is used to age out peer-active flag. + * + * During this wait time we expect the dataplane component or an + * external neighmgr daemon to probe existing hosts to independently + * establish their presence on the ES. + */ +static int zebra_vxlan_neigh_hold_exp_cb(struct thread *t) +{ + zebra_neigh_t *n; + bool old_bgp_ready; + bool new_bgp_ready; + bool old_n_static; + bool new_n_static; + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + + n = THREAD_ARG(t); + /* the purpose of the hold timer is to age out the peer-active + * flag + */ + if (!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + return 0; + + old_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n); + old_n_static = zebra_vxlan_neigh_is_static(n); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + new_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n); + new_n_static = zebra_vxlan_neigh_is_static(n); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x hold expired", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + n->flags); + + /* re-program the local neigh in the dataplane if the neigh is no + * longer static + */ + if (old_n_static != new_n_static) + zebra_vxlan_sync_neigh_static_chg(n, old_n_static, + new_n_static, false /*defer_n_dp*/, + false /*defer_mac_dp*/, __func__); + + /* inform bgp if needed */ + if (old_bgp_ready != new_bgp_ready) + zebra_vxlan_neigh_send_add_del_to_client(n, + old_bgp_ready, new_bgp_ready); + + return 0; +} + +static inline void zebra_vxlan_neigh_start_hold_timer(zebra_neigh_t *n) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + + if (n->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x hold start", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + n->flags); + thread_add_timer(zrouter.master, + zebra_vxlan_neigh_hold_exp_cb, + n, zmh_info->neigh_hold_time, + &n->hold_timer); +} + +static inline void zebra_vxlan_neigh_stop_hold_timer(zebra_neigh_t *n) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + + if (!n->hold_timer) + return; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x hold stop", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + n->flags); + THREAD_OFF(n->hold_timer); +} + +static inline bool zebra_vxlan_neigh_clear_sync_info(zebra_neigh_t *n) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + bool old_n_static = false; + bool new_n_static = false; + + if (n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x clear", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + n->flags); + + old_n_static = zebra_vxlan_neigh_is_static(n); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_PEER_FLAGS); + new_n_static = zebra_vxlan_neigh_is_static(n); + if (old_n_static != new_n_static) + zebra_vxlan_sync_neigh_static_chg(n, old_n_static, + new_n_static, true /*defer_dp)*/, + false/*defer_mac_dp*/, __func__); + } + zebra_vxlan_neigh_stop_hold_timer(n); + + /* if the neigh static flag changed inform that a dp + * re-install maybe needed + */ + return old_n_static != new_n_static; +} + +static void zebra_vxlan_local_neigh_deref_mac(zebra_neigh_t *n, + bool send_mac_update) +{ + zebra_mac_t *mac = n->mac; + zebra_vni_t *zvni = n->zvni; + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + bool old_static; + bool new_static; + + n->mac = NULL; + if (!mac) + return; + + if ((n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) && + mac->sync_neigh_cnt){ + old_static = zebra_vxlan_mac_is_static(mac); + --mac->sync_neigh_cnt; + new_static = zebra_vxlan_mac_is_static(mac); + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh deref mac vni %u ip %s mac %s ref %d", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, + sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + mac->sync_neigh_cnt); + if ((old_static != new_static) && send_mac_update) + /* program the local mac in the kernel */ + zebra_vxlan_sync_mac_dp_install(mac, + false /* set_inactive */, + false /* force_clear_static */, + __func__); + } + + listnode_delete(mac->neigh_list, n); + zvni_deref_ip2mac(zvni, mac); +} + +static void zebra_vxlan_local_neigh_ref_mac(zebra_neigh_t *n, + struct ethaddr *macaddr, zebra_mac_t *mac, + bool send_mac_update) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + bool old_static; + bool new_static; + + memcpy(&n->emac, macaddr, ETH_ALEN); + n->mac = mac; + + /* Link to new MAC */ + if (!mac) + return; + + listnode_add_sort(mac->neigh_list, n); + if (n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) { + old_static = zebra_vxlan_mac_is_static(mac); + ++mac->sync_neigh_cnt; + new_static = zebra_vxlan_mac_is_static(mac); + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh ref mac vni %u ip %s mac %s ref %d", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, + sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + mac->sync_neigh_cnt); + if ((old_static != new_static) && send_mac_update) + /* program the local mac in the kernel */ + zebra_vxlan_sync_mac_dp_install(mac, + false /*set_inactive*/, + false /*force_clear_static*/, + __func__); + } +} + +static inline bool zebra_vxlan_neigh_is_bgp_seq_ok(zebra_vni_t *zvni, + zebra_neigh_t *n, struct ethaddr *macaddr, uint32_t seq) +{ + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + uint32_t tmp_seq; + + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) + tmp_seq = n->loc_seq; + else + tmp_seq = n->rem_seq; + + if (seq < tmp_seq) { + /* if the neigh was never advertised to bgp we must accept + * whatever sequence number bgp sends + * XXX - check with Vivek + */ + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL) && + !zebra_vxlan_neigh_is_ready_for_bgp(n)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-macip accept vni %u mac %s IP %s lower seq %u f 0x%x", + zvni->vni, + prefix_mac2str(macaddr, + macbuf, sizeof(macbuf)), + ipaddr2str(&n->ip, + ipbuf, sizeof(ipbuf)), + tmp_seq, n->flags); + return true; + } + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-macip ignore vni %u mac %s IP %s as existing has higher seq %u f 0x%x", + zvni->vni, + prefix_mac2str(macaddr, + macbuf, sizeof(macbuf)), + ipaddr2str(&n->ip, + ipbuf, sizeof(ipbuf)), + tmp_seq, n->flags); + return false; + } + + return true; +} + +static void zebra_vxlan_sync_neigh_del(zebra_neigh_t *n) +{ + bool old_n_static; + bool new_n_static; + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh del vni %u ip %s mac %s f 0x%x", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + n->flags); + + old_n_static = zebra_vxlan_neigh_is_static(n); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY); + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) + zebra_vxlan_neigh_start_hold_timer(n); + new_n_static = zebra_vxlan_neigh_is_static(n); + + if (old_n_static != new_n_static) + zebra_vxlan_sync_neigh_static_chg(n, old_n_static, + new_n_static, false /*defer-dp*/, + false /*defer_mac_dp*/, __func__); +} + +static zebra_neigh_t *zebra_vxlan_proc_sync_neigh_update(zebra_vni_t *zvni, + zebra_neigh_t *n, uint16_t ipa_len, + struct ipaddr *ipaddr, uint8_t flags, uint32_t seq, + esi_t *esi, struct sync_mac_ip_ctx *ctx) +{ + struct interface *ifp = NULL; + bool is_router; + zebra_mac_t *mac = ctx->mac; + uint32_t tmp_seq; + bool old_router = false; + bool old_bgp_ready = false; + bool new_bgp_ready; + bool inform_dataplane = false; + bool inform_bgp = false; + bool old_mac_static; + bool new_mac_static; + bool set_dp_inactive = false; + struct zebra_if *zif; + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + bool created; + ifindex_t ifindex = 0; + + /* locate l3-svi */ + zif = zvni->vxlan_if->info; + if (zif) { + struct zebra_l2info_vxlan *vxl; + + vxl = &zif->l2info.vxl; + ifp = zvni_map_to_svi(vxl->access_vlan, + zif->brslave_info.br_if); + if (ifp) + ifindex = ifp->ifindex; + } + + is_router = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); + old_mac_static = zebra_vxlan_mac_is_static(mac); + + if (!n) { + uint32_t n_flags = 0; + + /* New neighbor - create */ + SET_FLAG(n_flags, ZEBRA_NEIGH_LOCAL); + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) + SET_FLAG(n_flags, ZEBRA_NEIGH_ES_PEER_PROXY); + else + SET_FLAG(n_flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + SET_FLAG(n_flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + + n = zvni_neigh_add(zvni, ipaddr, &mac->macaddr, mac, + n_flags); + n->ifindex = ifindex; + ZEBRA_NEIGH_SET_ACTIVE(n); + + created = true; + inform_dataplane = true; + inform_bgp = true; + set_dp_inactive = true; + } else { + bool mac_change; + uint32_t old_flags = n->flags; + bool old_n_static; + bool new_n_static; + + created = false; + old_n_static = zebra_vxlan_neigh_is_static(n); + old_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n); + old_router = !!CHECK_FLAG(n->flags, + ZEBRA_NEIGH_ROUTER_FLAG); + + mac_change = !!memcmp(&n->emac, &mac->macaddr, ETH_ALEN); + + /* deref and clear old info */ + if (mac_change) { + if (old_bgp_ready) { + zvni_neigh_send_del_to_client(zvni->vni, &n->ip, + &n->emac, n->flags, n->state, + false /*force*/); + old_bgp_ready = false; + } + if (n->mac) + zebra_vxlan_local_neigh_deref_mac(n, + false /*send_mac_update*/); + } + /* clear old fwd info */ + n->rem_seq = 0; + n->r_vtep_ip.s_addr = 0; + + /* setup new flags */ + n->flags = 0; + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); + /* retain activity flag if the neigh was + * previously local + */ + if (old_flags & ZEBRA_NEIGH_LOCAL) { + n->flags |= (old_flags & ZEBRA_NEIGH_LOCAL_INACTIVE); + } else { + inform_dataplane = true; + set_dp_inactive = true; + n->flags |= ZEBRA_NEIGH_LOCAL_INACTIVE; + } + + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) + SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY); + else + SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) { + SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY); + /* if the neigh was peer-active previously we + * need to keep the flag and start the + * holdtimer on it. the peer-active flag is + * cleared on holdtimer expiry. + */ + if (CHECK_FLAG(old_flags, + ZEBRA_NEIGH_ES_PEER_ACTIVE)) { + SET_FLAG(n->flags, + ZEBRA_NEIGH_ES_PEER_ACTIVE); + zebra_vxlan_neigh_start_hold_timer(n); + } + } else { + SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE); + /* stop hold timer if a peer has verified + * reachability + */ + zebra_vxlan_neigh_stop_hold_timer(n); + } + ZEBRA_NEIGH_SET_ACTIVE(n); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH && + (old_flags != n->flags)) + zlog_debug("sync-neigh vni %u ip %s mac %s old_f 0x%x new_f 0x%x", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + old_flags, n->flags); + + new_n_static = zebra_vxlan_neigh_is_static(n); + if (mac_change) { + set_dp_inactive = true; + n->flags |= ZEBRA_NEIGH_LOCAL_INACTIVE; + inform_dataplane = true; + zebra_vxlan_local_neigh_ref_mac(n, &mac->macaddr, + mac, false /*send_mac_update*/); + } else if (old_n_static != new_n_static) { + inform_dataplane = true; + /* if static flags have changed without a mac change + * we need to create the correct sync-refs against + * the existing mac + */ + zebra_vxlan_sync_neigh_static_chg(n, + old_n_static, new_n_static, + true /*defer_dp*/, true /*defer_mac_dp*/, + __func__); + } + + /* Update the forwarding info. */ + if (n->ifindex != ifindex) { + n->ifindex = ifindex; + inform_dataplane = true; + } + } + + /* update the neigh seq. we don't bother with the mac seq as + * sync_mac_update already took care of that + */ + tmp_seq = MAX(n->loc_seq, seq); + if (tmp_seq != n->loc_seq) { + n->loc_seq = tmp_seq; + inform_bgp = true; + } + + /* Mark Router flag (R-bit) */ + if (is_router) + SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + else + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); + + if (old_router != is_router) + inform_dataplane = true; + + new_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n); + if (old_bgp_ready != new_bgp_ready) + inform_bgp = true; + + new_mac_static = zebra_vxlan_mac_is_static(mac); + if ((old_mac_static != new_mac_static) || + ctx->mac_dp_update_deferred) + zebra_vxlan_sync_mac_dp_install(mac, + ctx->mac_inactive, + false /* force_clear_static */, + __func__); + + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync-neigh %s vni %u ip %s mac %s if %s(%d) seq %d f 0x%x%s%s", + created ? + "created" : "updated", + n->zvni->vni, + ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), + prefix_mac2str(&n->emac, macbuf, + sizeof(macbuf)), + ifp ? ifp->name : "", ifindex, + n->loc_seq, n->flags, + inform_bgp ? " inform_bgp" : "", + inform_dataplane ? " inform_dp" : ""); + + if (inform_dataplane) + zebra_vxlan_sync_neigh_dp_install(n, set_dp_inactive, + false /* force_clear_static */, __func__); + + if (inform_bgp) + zebra_vxlan_neigh_send_add_del_to_client(n, + old_bgp_ready, new_bgp_ready); + + return n; +} + +static void zebra_vxlan_process_sync_macip_add(zebra_vni_t *zvni, + struct ethaddr *macaddr, + uint16_t ipa_len, + struct ipaddr *ipaddr, + uint8_t flags, + uint32_t seq, + esi_t *esi) +{ + struct sync_mac_ip_ctx ctx; + char macbuf[ETHER_ADDR_STRLEN]; + char ipbuf[INET6_ADDRSTRLEN]; + bool sticky; + bool remote_gw; + zebra_neigh_t *n = NULL; + + sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); + remote_gw = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + /* if sticky or remote-gw ignore updates from the peer */ + if (sticky || remote_gw) { + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_NEIGH || + IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("Ignore sync-macip vni %u mac %s%s%s%s%s", + zvni->vni, + prefix_mac2str(macaddr, macbuf, sizeof(macbuf)), + ipa_len ? " IP " : "", + ipa_len ? + ipaddr2str(ipaddr, ipbuf, sizeof(ipbuf)) : "", + sticky ? " sticky" : "", + remote_gw ? " remote_gw" : ""); + return; + } + + if (ipa_len) { + n = zvni_neigh_lookup(zvni, ipaddr); + if (n && + !zebra_vxlan_neigh_is_bgp_seq_ok(zvni, + n, macaddr, seq)) + return; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.mac = zebra_vxlan_proc_sync_mac_update(zvni, macaddr, ipa_len, + ipaddr, flags, seq, esi, &ctx); + if (ctx.ignore_macip || !ctx.mac || !ipa_len) + return; + + zebra_vxlan_proc_sync_neigh_update(zvni, n, ipa_len, + ipaddr, flags, seq, esi, &ctx); +} + +/************************** remote mac-ip handling **************************/ /* Process a remote MACIP add from BGP. */ static void process_remote_macip_add(vni_t vni, struct ethaddr *macaddr, @@ -5639,7 +7078,8 @@ static void process_remote_macip_add(vni_t vni, struct ipaddr *ipaddr, uint8_t flags, uint32_t seq, - struct in_addr vtep_ip) + struct in_addr vtep_ip, + esi_t *esi) { zebra_vni_t *zvni; zebra_vtep_t *zvtep; @@ -5657,6 +7097,8 @@ static void process_remote_macip_add(vni_t vni, bool is_router; bool do_dad = false; bool is_dup_detect = false; + esi_t *old_esi; + bool old_static = false; /* Locate VNI hash entry - expected to exist. */ zvni = zvni_lookup(vni); @@ -5677,22 +7119,36 @@ static void process_remote_macip_add(vni_t vni, return; } + /* Type-2 routes from another PE can be interpreted as remote or + * SYNC based on the destination ES - + * SYNC - if ES is local + * REMOTE - if ES is not local + */ + if (flags & ZEBRA_MACIP_TYPE_SYNC_PATH) { + zebra_vxlan_process_sync_macip_add(zvni, macaddr, ipa_len, + ipaddr, flags, seq, esi); + return; + } + /* The remote VTEP specified should normally exist, but it is * possible that when peering comes up, peer may advertise MACIP * routes before advertising type-3 routes. */ - zvtep = zvni_vtep_find(zvni, &vtep_ip); - if (!zvtep) { - zvtep = zvni_vtep_add(zvni, &vtep_ip, VXLAN_FLOOD_DISABLED); + if (vtep_ip.s_addr) { + zvtep = zvni_vtep_find(zvni, &vtep_ip); if (!zvtep) { - flog_err( - EC_ZEBRA_VTEP_ADD_FAILED, - "Failed to add remote VTEP, VNI %u zvni %p upon remote MACIP ADD", - vni, zvni); - return; - } + zvtep = zvni_vtep_add(zvni, &vtep_ip, + VXLAN_FLOOD_DISABLED); + if (!zvtep) { + flog_err( + EC_ZEBRA_VTEP_ADD_FAILED, + "Failed to add remote VTEP, VNI %u zvni %p upon remote MACIP ADD", + vni, zvni); + return; + } - zvni_vtep_install(zvni, zvtep); + zvni_vtep_install(zvni, zvtep); + } } sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); @@ -5715,10 +7171,12 @@ static void process_remote_macip_add(vni_t vni, return; } - zvrf = zebra_vrf_get_evpn(); + zvrf = vrf_info_lookup(zvni->vxlan_if->vrf_id); if (!zvrf) return; + old_esi = (mac && mac->es) ? &mac->es->esi : zero_esi; + /* check if the remote MAC is unknown or has a change. * If so, that needs to be updated first. Note that client could * install MAC and MACIP separately or just install the latter. @@ -5728,6 +7186,7 @@ static void process_remote_macip_add(vni_t vni, || sticky != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY) || remote_gw != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW) || !IPV4_ADDR_SAME(&mac->fwd_info.r_vtep_ip, &vtep_ip) + || memcmp(old_esi, esi, sizeof(esi_t)) || seq != mac->rem_seq) update_mac = 1; @@ -5743,10 +7202,14 @@ static void process_remote_macip_add(vni_t vni, return; } + zebra_evpn_es_mac_ref(mac, esi); + /* Is this MAC created for a MACIP? */ if (ipa_len) SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); } else { + zebra_evpn_es_mac_ref(mac, esi); + /* When host moves but changes its (MAC,IP) * binding, BGP may install a MACIP entry that * corresponds to "older" location of the host @@ -5793,11 +7256,25 @@ static void process_remote_macip_add(vni_t vni, do_dad = true; /* Remove local MAC from BGP. */ - if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) - zvni_mac_send_del_to_client(zvni->vni, macaddr); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + /* force drop the sync flags */ + old_static = zebra_vxlan_mac_is_static(mac); + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("sync-mac->remote vni %u mac %s es %s seq %d f 0x%x", + zvni->vni, + prefix_mac2str(macaddr, + buf, sizeof(buf)), + mac->es ? + mac->es->esi_str : "-", + mac->loc_seq, + mac->flags); + zebra_vxlan_mac_clear_sync_info(mac); + zvni_mac_send_del_to_client(zvni->vni, macaddr, + mac->flags, false /* force */); + } /* Set "auto" and "remote" forwarding info. */ - UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS); memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); SET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); mac->fwd_info.r_vtep_ip = vtep_ip; @@ -5820,7 +7297,7 @@ static void process_remote_macip_add(vni_t vni, if (!is_dup_detect) { zvni_process_neigh_on_remote_mac_add(zvni, mac); /* Install the entry. */ - zvni_mac_install(zvni, mac); + zvni_rem_mac_install(zvni, mac, old_static); } } @@ -5835,6 +7312,7 @@ static void process_remote_macip_add(vni_t vni, /* Reset flag */ do_dad = false; + old_static = false; /* Check if the remote neighbor itself is unknown or has a * change. If so, create or update and then install the entry. @@ -5850,7 +7328,7 @@ static void process_remote_macip_add(vni_t vni, if (update_neigh) { if (!n) { - n = zvni_neigh_add(zvni, ipaddr, macaddr); + n = zvni_neigh_add(zvni, ipaddr, macaddr, mac, 0); if (!n) { zlog_warn( "Failed to add Neigh %s MAC %s VNI %u Remote VTEP %s", @@ -5892,22 +7370,31 @@ static void process_remote_macip_add(vni_t vni, tmp_seq); return; } + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + old_static = zebra_vxlan_neigh_is_static(n); + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("sync->remote neigh vni %u ip %s mac %s seq %d f0x%x", + n->zvni->vni, + ipaddr2str(&n->ip, buf1, + sizeof(buf1)), + prefix_mac2str(&n->emac, buf, + sizeof(buf)), + seq, n->flags); + zebra_vxlan_neigh_clear_sync_info(n); + if (IS_ZEBRA_NEIGH_ACTIVE(n)) + zvni_mac_send_del_to_client(zvni->vni, + macaddr, mac->flags, + false /*force*/); + } if (memcmp(&n->emac, macaddr, sizeof(*macaddr)) != 0) { - /* MAC change, send a delete for old - * neigh if learnt locally. - */ - if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL) && - IS_ZEBRA_NEIGH_ACTIVE(n)) - zvni_neigh_send_del_to_client( - zvni->vni, &n->ip, - &n->emac, 0, n->state); - /* update neigh list for macs */ old_mac = zvni_mac_lookup(zvni, &n->emac); if (old_mac) { listnode_delete(old_mac->neigh_list, n); + n->mac = NULL; zvni_deref_ip2mac(zvni, old_mac); } + n->mac = mac; listnode_add_sort(mac->neigh_list, n); memcpy(&n->emac, macaddr, ETH_ALEN); @@ -5933,7 +7420,7 @@ static void process_remote_macip_add(vni_t vni, } /* Set "remote" forwarding info. */ - UNSET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL); + UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_LOCAL_FLAGS); n->r_vtep_ip = vtep_ip; SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE); @@ -5963,7 +7450,7 @@ static void process_remote_macip_add(vni_t vni, false); /* Install the entry. */ if (!is_dup_detect) - zvni_neigh_install(zvni, n); + zvni_rem_neigh_install(zvni, n, old_static); } zvni_probe_neigh_on_mac_add(zvni, mac); @@ -5972,6 +7459,32 @@ static void process_remote_macip_add(vni_t vni, n->rem_seq = seq; } +static void zebra_vxlan_rem_mac_del(zebra_vni_t *zvni, + zebra_mac_t *mac) +{ + zvni_process_neigh_on_remote_mac_del(zvni, mac); + /* the remote sequence number in the auto mac entry + * needs to be reset to 0 as the mac entry may have + * been removed on all VTEPs (including + * the originating one) + */ + mac->rem_seq = 0; + + /* If all remote neighbors referencing a remote MAC + * go away, we need to uninstall the MAC. + */ + if (remote_neigh_count(mac) == 0) { + zvni_rem_mac_uninstall(zvni, mac); + zebra_evpn_es_mac_deref_entry(mac); + UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); + } + + if (list_isempty(mac->neigh_list)) + zvni_mac_del(zvni, mac); + else + SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); +} + /* Process a remote MACIP delete from BGP. */ static void process_remote_macip_del(vni_t vni, struct ethaddr *macaddr, @@ -6013,11 +7526,6 @@ static void process_remote_macip_del(vni_t vni, zns = zebra_ns_lookup(NS_DEFAULT); vxl = &zif->l2info.vxl; - /* It is possible remote vtep del request is processed prior to - * remote macip route delete. remote_vtep_del does not clean up - * the macip route delete. Explicite withdraw of the macip route - * is expected to recieve. This handler removes the remote route. - */ mac = zvni_mac_lookup(zvni, macaddr); if (ipa_len) n = zvni_neigh_lookup(zvni, ipaddr); @@ -6076,11 +7584,14 @@ static void process_remote_macip_del(vni_t vni, * "old" neighbor (as these are two different MACIP routes). * Do the delete only if the MAC matches. */ - if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE) - && (memcmp(n->emac.octet, macaddr->octet, ETH_ALEN) == 0)) { - zvni_neigh_uninstall(zvni, n); - zvni_neigh_del(zvni, n); - zvni_deref_ip2mac(zvni, mac); + if (!memcmp(n->emac.octet, macaddr->octet, ETH_ALEN)) { + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { + zebra_vxlan_sync_neigh_del(n); + } else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { + zvni_neigh_uninstall(zvni, n); + zvni_neigh_del(zvni, n); + zvni_deref_ip2mac(zvni, mac); + } } } else { /* DAD: when MAC is freeze state as remote learn event, @@ -6103,27 +7614,11 @@ static void process_remote_macip_del(vni_t vni, macaddr, vxl->access_vlan); } - if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { - zvni_process_neigh_on_remote_mac_del(zvni, mac); - /* - * the remote sequence number in the auto mac entry - * needs to be reset to 0 as the mac entry may have - * been removed on all VTEPs (including - * the originating one) - */ - mac->rem_seq = 0; - - /* If all remote neighbors referencing a remote MAC - * go away, we need to uninstall the MAC. - */ - if (remote_neigh_count(mac) == 0) { - zvni_mac_uninstall(zvni, mac); - UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); - } - if (list_isempty(mac->neigh_list)) - zvni_mac_del(zvni, mac); - else - SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + if (!ipa_len) + zebra_vxlan_sync_mac_del(mac); + } else if (CHECK_FLAG(mac->flags, ZEBRA_NEIGH_REMOTE)) { + zebra_vxlan_rem_mac_del(zvni, mac); } } } @@ -6559,8 +8054,7 @@ void zebra_vxlan_print_neigh_vni(struct vty *vty, struct zebra_vrf *zvrf, vty_out(vty, "Number of ARPs (local and remote) known for this VNI: %u\n", num_neigh); - vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n", -wctx.addr_width, - "IP", "Type", "State", "MAC", "Remote VTEP", "Seq #'s"); + zvni_print_neigh_hdr(vty, &wctx); } else json_object_int_add(json, "numArpNd", num_neigh); @@ -6770,9 +8264,9 @@ void zebra_vxlan_print_neigh_vni_dad(struct vty *vty, vty_out(vty, "Number of ARPs (local and remote) known for this VNI: %u\n", num_neigh); - vty_out(vty, "%*s %-6s %-8s %-17s %-21s\n", + vty_out(vty, "%*s %-6s %-8s %-17s %-30s\n", -wctx.addr_width, "IP", "Type", - "State", "MAC", "Remote VTEP"); + "State", "MAC", "Remote ES/VTEP"); } else json_object_int_add(json, "numArpNd", num_neigh); @@ -6825,8 +8319,11 @@ void zebra_vxlan_print_macs_vni(struct vty *vty, struct zebra_vrf *zvrf, vty_out(vty, "Number of MACs (local and remote) known for this VNI: %u\n", num_macs); - vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", "MAC", "Type", - "Intf/Remote VTEP", "VLAN", "Seq #'s"); + vty_out(vty, + "Flags: N=sync-neighs, I=local-inactive, P=peer-active, X=peer-proxy\n"); + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %s\n", "MAC", + "Type", "Flags", "Intf/Remote ES/VTEP", + "VLAN", "Seq #'s"); } else json_object_int_add(json, "numMacs", num_macs); @@ -7018,8 +8515,8 @@ void zebra_vxlan_print_macs_vni_dad(struct vty *vty, vty_out(vty, "Number of MACs (local and remote) known for this VNI: %u\n", num_macs); - vty_out(vty, "%-17s %-6s %-21s %-5s\n", "MAC", "Type", - "Intf/Remote VTEP", "VLAN"); + vty_out(vty, "%-17s %-6s %-5s %-30s %-5s\n", "MAC", "Type", + "Flags", "Intf/Remote ES/VTEP", "VLAN"); } else json_object_int_add(json, "numMacs", num_macs); @@ -7076,7 +8573,8 @@ int zebra_vxlan_clear_dup_detect_vni_mac(struct zebra_vrf *zvrf, vni_t vni, if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) ZEBRA_NEIGH_SET_INACTIVE(nbr); else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) - zvni_neigh_install(zvni, nbr); + zvni_rem_neigh_install(zvni, nbr, + false /*was_static*/); } UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); @@ -7102,17 +8600,18 @@ int zebra_vxlan_clear_dup_detect_vni_mac(struct zebra_vrf *zvrf, vni_t vni, if (zvni_mac_send_add_to_client(zvni->vni, &mac->macaddr, mac->flags, - mac->loc_seq)) + mac->loc_seq, mac->es)) return 0; /* Process all neighbors associated with this MAC. */ - zvni_process_neigh_on_local_mac_change(zvni, mac, 0); + zvni_process_neigh_on_local_mac_change(zvni, mac, 0, + 0 /*es_change*/); } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { zvni_process_neigh_on_remote_mac_add(zvni, mac); /* Install the entry. */ - zvni_mac_install(zvni, mac); + zvni_rem_mac_install(zvni, mac, false /* was_static */); } return 0; @@ -7172,10 +8671,10 @@ int zebra_vxlan_clear_dup_detect_vni_ip(struct zebra_vrf *zvrf, vni_t vni, if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) { zvni_neigh_send_add_to_client(zvni->vni, ip, - &nbr->emac, + &nbr->emac, nbr->mac, nbr->flags, nbr->loc_seq); } else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) { - zvni_neigh_install(zvni, nbr); + zvni_rem_neigh_install(zvni, nbr, false /*was_static*/); } return 0; @@ -7222,17 +8721,18 @@ static void zvni_clear_dup_mac_hash(struct hash_bucket *bucket, void *ctxt) /* Inform to BGP */ if (zvni_mac_send_add_to_client(zvni->vni, &mac->macaddr, - mac->flags, mac->loc_seq)) + mac->flags, mac->loc_seq, mac->es)) return; /* Process all neighbors associated with this MAC. */ - zvni_process_neigh_on_local_mac_change(zvni, mac, 0); + zvni_process_neigh_on_local_mac_change(zvni, mac, 0, + 0 /*es_change*/); } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { zvni_process_neigh_on_remote_mac_add(zvni, mac); /* Install the entry. */ - zvni_mac_install(zvni, mac); + zvni_rem_mac_install(zvni, mac, false /* was_static */); } } @@ -7267,10 +8767,10 @@ static void zvni_clear_dup_neigh_hash(struct hash_bucket *bucket, void *ctxt) if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) { zvni_neigh_send_add_to_client(zvni->vni, &nbr->ip, - &nbr->emac, + &nbr->emac, nbr->mac, nbr->flags, nbr->loc_seq); } else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) { - zvni_neigh_install(zvni, nbr); + zvni_rem_neigh_install(zvni, nbr, false /*was_static*/); } } @@ -7666,6 +9166,8 @@ int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp, zebra_mac_t *zmac = NULL; zebra_l3vni_t *zl3vni = NULL; struct zebra_vrf *zvrf; + bool old_bgp_ready; + bool new_bgp_ready; /* check if this is a remote neigh entry corresponding to remote * next-hop @@ -7720,7 +9222,36 @@ int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp, * deleted it, it needs to be re-installed as Quagga is the owner. */ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { - zvni_neigh_install(zvni, n); + zvni_rem_neigh_install(zvni, n, false /*was_static*/); + return 0; + } + + /* if this is a sync entry it cannot be dropped re-install it in + * the dataplane + */ + old_bgp_ready = + zebra_vxlan_neigh_is_ready_for_bgp(n); + if (zebra_vxlan_neigh_is_static(n)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + zlog_debug("re-add sync neigh vni %u ip %s mac %s 0x%x", + n->zvni->vni, + ipaddr2str(&n->ip, buf, sizeof(buf)), + prefix_mac2str(&n->emac, buf2, + sizeof(buf2)), + n->flags); + + if (!CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) + SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE); + /* inform-bgp about change in local-activity if any */ + new_bgp_ready = + zebra_vxlan_neigh_is_ready_for_bgp(n); + zebra_vxlan_neigh_send_add_del_to_client(n, + old_bgp_ready, new_bgp_ready); + + /* re-install the entry in the kernel */ + zebra_vxlan_sync_neigh_dp_install(n, false /* set_inactive */, + false /* force_clear_static */, __func__); + return 0; } @@ -7740,7 +9271,9 @@ int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp, ZEBRA_NEIGH_SET_INACTIVE(n); /* Remove neighbor from BGP. */ - zvni_neigh_send_del_to_client(zvni->vni, &n->ip, &n->emac, 0, n->state); + zvni_neigh_send_del_to_client(zvni->vni, &n->ip, + &n->emac, n->flags, n->state, + false /* force */); /* Delete this neighbor entry. */ zvni_neigh_del(zvni, n); @@ -7765,7 +9298,8 @@ int zebra_vxlan_handle_kernel_neigh_update(struct interface *ifp, struct ethaddr *macaddr, uint16_t state, bool is_ext, - bool is_router) + bool is_router, + bool local_inactive, bool dp_static) { char buf[ETHER_ADDR_STRLEN]; char buf2[INET6_ADDRSTRLEN]; @@ -7786,19 +9320,20 @@ int zebra_vxlan_handle_kernel_neigh_update(struct interface *ifp, if (!zvni) return 0; - if (IS_ZEBRA_DEBUG_VXLAN) + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) zlog_debug( - "Add/Update neighbor %s MAC %s intf %s(%u) state 0x%x %s %s-> L2-VNI %u", + "Add/Update neighbor %s MAC %s intf %s(%u) state 0x%x %s%s%s-> L2-VNI %u", ipaddr2str(ip, buf2, sizeof(buf2)), prefix_mac2str(macaddr, buf, sizeof(buf)), ifp->name, ifp->ifindex, state, is_ext ? "ext-learned " : "", is_router ? "router " : "", + local_inactive ? "local_inactive " : "", zvni->vni); /* Is this about a local neighbor or a remote one? */ if (!is_ext) return zvni_local_neigh_update(zvni, ifp, ip, macaddr, - is_router); + is_router, local_inactive, dp_static); return zvni_remote_neigh_update(zvni, ifp, ip, macaddr, state); } @@ -7807,7 +9342,7 @@ static int32_t zebra_vxlan_remote_macip_helper(bool add, struct stream *s, vni_t *vni, struct ethaddr *macaddr, uint16_t *ipa_len, struct ipaddr *ip, struct in_addr *vtep_ip, - uint8_t *flags, uint32_t *seq) + uint8_t *flags, uint32_t *seq, esi_t *esi) { uint16_t l = 0; @@ -7845,6 +9380,8 @@ zebra_vxlan_remote_macip_helper(bool add, struct stream *s, vni_t *vni, STREAM_GETC(s, *flags); STREAM_GETL(s, *seq); l += 5; + STREAM_GET(esi, s, sizeof(esi_t)); + l += sizeof(esi_t); } return l; @@ -7876,7 +9413,7 @@ void zebra_vxlan_remote_macip_del(ZAPI_HANDLER_ARGS) while (l < hdr->length) { int res_length = zebra_vxlan_remote_macip_helper( false, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip, NULL, - NULL); + NULL, NULL); if (res_length == -1) goto stream_failure; @@ -7917,6 +9454,8 @@ void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS) uint32_t seq; char buf[ETHER_ADDR_STRLEN]; char buf1[INET6_ADDRSTRLEN]; + esi_t esi; + char esi_buf[ESI_STR_LEN]; memset(&macaddr, 0, sizeof(struct ethaddr)); memset(&ip, 0, sizeof(struct ipaddr)); @@ -7932,25 +9471,32 @@ void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS) while (l < hdr->length) { int res_length = zebra_vxlan_remote_macip_helper( true, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip, - &flags, &seq); + &flags, &seq, &esi); if (res_length == -1) goto stream_failure; l += res_length; - if (IS_ZEBRA_DEBUG_VXLAN) + if (IS_ZEBRA_DEBUG_VXLAN) { + if (memcmp(&esi, zero_esi, sizeof(esi_t))) + esi_to_str(&esi, esi_buf, sizeof(esi_buf)); + else + strlcpy(esi_buf, "-", ESI_STR_LEN); zlog_debug( - "Recv MACIP ADD VNI %u MAC %s%s%s flags 0x%x seq %u VTEP %s from %s", + "Recv %sMACIP ADD VNI %u MAC %s%s%s flags 0x%x seq %u VTEP %s ESI %s from %s", + (flags & ZEBRA_MACIP_TYPE_SYNC_PATH) ? + "sync-" : "", vni, prefix_mac2str(&macaddr, buf, sizeof(buf)), ipa_len ? " IP " : "", ipa_len ? ipaddr2str(&ip, buf1, sizeof(buf1)) : "", - flags, seq, inet_ntoa(vtep_ip), + flags, seq, inet_ntoa(vtep_ip), esi_buf, zebra_route_string(client->proto)); + } process_remote_macip_add(vni, &macaddr, ipa_len, &ip, - flags, seq, vtep_ip); + flags, seq, vtep_ip, &esi); } stream_failure: @@ -8049,7 +9595,8 @@ int zebra_vxlan_check_del_local_mac(struct interface *ifp, ifp->ifindex, vni, mac->flags); /* Remove MAC from BGP. */ - zvni_mac_send_del_to_client(zvni->vni, macaddr); + zvni_mac_send_del_to_client(zvni->vni, macaddr, + mac->flags, false /* force */); /* * If there are no neigh associated with the mac delete the mac @@ -8058,7 +9605,7 @@ int zebra_vxlan_check_del_local_mac(struct interface *ifp, if (!listcount(mac->neigh_list)) { zvni_mac_del(zvni, mac); } else { - UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS); UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY); SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); } @@ -8116,7 +9663,7 @@ int zebra_vxlan_check_readd_remote_mac(struct interface *ifp, prefix_mac2str(macaddr, buf, sizeof(buf)), ifp->name, ifp->ifindex, vni); - zvni_mac_install(zvni, mac); + zvni_rem_mac_install(zvni, mac, false /* was_static */); return 0; } @@ -8129,6 +9676,8 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if, zebra_vni_t *zvni; zebra_mac_t *mac; char buf[ETHER_ADDR_STRLEN]; + bool old_bgp_ready; + bool new_bgp_ready; /* We are interested in MACs only on ports or (port, VLAN) that * map to a VNI. @@ -8158,11 +9707,46 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if, ifp->ifindex, vid, zvni->vni, mac->loc_seq, mac->flags, listcount(mac->neigh_list)); + old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags); + if (zebra_vxlan_mac_is_static(mac)) { + /* this is a synced entry and can only be removed when the + * es-peers stop advertising it. + */ + memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); + + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("re-add sync-mac vni %u mac %s es %s seq %d f 0x%x", + zvni->vni, + prefix_mac2str(macaddr, + buf, sizeof(buf)), + mac->es ? mac->es->esi_str : "-", + mac->loc_seq, + mac->flags); + + /* inform-bgp about change in local-activity if any */ + if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE)) { + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags); + zebra_vxlan_mac_send_add_del_to_client(mac, + old_bgp_ready, new_bgp_ready); + } + + /* re-install the entry in the kernel */ + zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, + __func__); + + return 0; + } + /* Update all the neigh entries associated with this mac */ zvni_process_neigh_on_local_mac_del(zvni, mac); /* Remove MAC from BGP. */ - zvni_mac_send_del_to_client(zvni->vni, macaddr); + zvni_mac_send_del_to_client(zvni->vni, macaddr, + mac->flags, false /* force */); + + zebra_evpn_es_mac_deref_entry(mac); /* * If there are no neigh associated with the mac delete the mac @@ -8171,7 +9755,7 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if, if (!listcount(mac->neigh_list)) { zvni_mac_del(zvni, mac); } else { - UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); + UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS); UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY); SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); } @@ -8179,13 +9763,36 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if, return 0; } +/* update local fowarding info. return true if a dest-ES change + * is detected + */ +static bool zebra_vxlan_local_mac_update_fwd_info(zebra_mac_t *mac, + struct interface *ifp, vlanid_t vid) +{ + struct zebra_if *zif = ifp->info; + bool es_change; + + memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); + + es_change = zebra_evpn_es_mac_ref_entry(mac, zif->es_info.es); + + if (!mac->es) { + /* if es is set fwd_info is not-relevant/taped-out */ + mac->fwd_info.local.ifindex = ifp->ifindex; + mac->fwd_info.local.vid = vid; + } + + return es_change; +} + /* * Handle local MAC add (on a port or VLAN corresponding to this VNI). */ int zebra_vxlan_local_mac_add_update(struct interface *ifp, struct interface *br_if, struct ethaddr *macaddr, vlanid_t vid, - bool sticky) + bool sticky, bool local_inactive, + bool dp_static) { zebra_vni_t *zvni; zebra_mac_t *mac; @@ -8196,11 +9803,13 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, bool upd_neigh = false; bool is_dup_detect = false; struct in_addr vtep_ip = {.s_addr = 0}; - ns_id_t local_ns_id = NS_DEFAULT; - - zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id); - if (zvrf && zvrf->zns) - local_ns_id = zvrf->zns->ns_id; + bool es_change = false; + bool new_bgp_ready; + /* assume inactive if not present or if not local */ + bool old_local_inactive = true; + bool old_bgp_ready = false; + bool inform_dataplane = false; + bool new_static = false; /* We are interested in MACs only on ports or (port, VLAN) that * map to a VNI. @@ -8224,22 +9833,24 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, return -1; } - zvrf = zebra_vrf_get_evpn(); + zvrf = vrf_info_lookup(zvni->vxlan_if->vrf_id); if (!zvrf) { if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug(" No Evpn Global Vrf found"); + zlog_debug(" No Vrf found for vrf_id: %d", + zvni->vxlan_if->vrf_id); return -1; } /* Check if we need to create or update or it is a NO-OP. */ mac = zvni_mac_lookup(zvni, macaddr); if (!mac) { - if (IS_ZEBRA_DEBUG_VXLAN) + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) zlog_debug( - "ADD %sMAC %s intf %s(%u) VID %u -> VNI %u", + "ADD %sMAC %s intf %s(%u) VID %u -> VNI %u%s", sticky ? "sticky " : "", prefix_mac2str(macaddr, buf, sizeof(buf)), - ifp->name, ifp->ifindex, vid, zvni->vni); + ifp->name, ifp->ifindex, vid, zvni->vni, + local_inactive ? " local-inactive" : ""); mac = zvni_mac_add(zvni, macaddr); if (!mac) { @@ -8251,23 +9862,33 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, return -1; } SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); - mac->fwd_info.local.ifindex = ifp->ifindex; - mac->fwd_info.local.ns_id = local_ns_id; - mac->fwd_info.local.vid = vid; + es_change = zebra_vxlan_local_mac_update_fwd_info(mac, + ifp, vid); if (sticky) SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); inform_client = true; - } else { - if (IS_ZEBRA_DEBUG_VXLAN) + if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) zlog_debug( - "UPD %sMAC %s intf %s(%u) VID %u -> VNI %u curFlags 0x%x", + "UPD %sMAC %s intf %s(%u) VID %u -> VNI %u %scurFlags 0x%x", sticky ? "sticky " : "", prefix_mac2str(macaddr, buf, sizeof(buf)), ifp->name, ifp->ifindex, vid, zvni->vni, + local_inactive ? "local-inactive " : "", mac->flags); if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { + struct interface *old_ifp; + vlanid_t old_vid; + bool old_static; + + zebra_vxlan_mac_get_access_info(mac, + &old_ifp, &old_vid); + old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp( + mac->flags); + old_local_inactive = !!(mac->flags & + ZEBRA_MAC_LOCAL_INACTIVE); + old_static = zebra_vxlan_mac_is_static(mac); if (CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY)) mac_sticky = true; @@ -8276,17 +9897,20 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, * BGP, note it. */ if (mac_sticky == sticky - && mac->fwd_info.local.ifindex == ifp->ifindex - && mac->fwd_info.local.ns_id == local_ns_id - && mac->fwd_info.local.vid == vid) { + && old_ifp == ifp + && old_vid == vid + && old_local_inactive == local_inactive + && dp_static == old_static) { if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( - " Add/Update %sMAC %s intf %s(%u) VID %u -> VNI %u, entry exists and has not changed ", + " Add/Update %sMAC %s intf %s(%u) VID %u -> VNI %u%s, entry exists and has not changed ", sticky ? "sticky " : "", prefix_mac2str(macaddr, buf, sizeof(buf)), ifp->name, ifp->ifindex, vid, - zvni->vni); + zvni->vni, + local_inactive ? + " local_inactive" : ""); return 0; } if (mac_sticky != sticky) { @@ -8299,11 +9923,31 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, inform_client = true; } - memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); - mac->fwd_info.local.ifindex = ifp->ifindex; - mac->fwd_info.local.ns_id = local_ns_id; - mac->fwd_info.local.vid = vid; - + es_change = zebra_vxlan_local_mac_update_fwd_info(mac, + ifp, vid); + /* If an es_change is detected we need to advertise + * the route with a sequence that is one + * greater. This is need to indicate a mac-move + * to the ES peers + */ + if (es_change) { + mac->loc_seq = mac->loc_seq + 1; + /* force drop the peer/sync info as it is + * simply no longer relevant + */ + if (CHECK_FLAG(mac->flags, + ZEBRA_MAC_ALL_PEER_FLAGS)) { + zebra_vxlan_mac_clear_sync_info(mac); + new_static = + zebra_vxlan_mac_is_static(mac); + /* if we clear peer-flags we + * also need to notify the dataplane + * to drop the static flag + */ + if (old_static != new_static) + inform_dataplane = true; + } + } } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) || CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) { bool do_dad = false; @@ -8337,10 +9981,8 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE); UNSET_FLAG(mac->flags, ZEBRA_MAC_AUTO); SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); - memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); - mac->fwd_info.local.ifindex = ifp->ifindex; - mac->fwd_info.local.ns_id = local_ns_id; - mac->fwd_info.local.vid = vid; + es_change = zebra_vxlan_local_mac_update_fwd_info(mac, + ifp, vid); if (sticky) SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); else @@ -8363,16 +10005,57 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, } } - /* Inform BGP if required. */ - if (inform_client) { - if (zvni_mac_send_add_to_client(zvni->vni, macaddr, - mac->flags, mac->loc_seq)) - return -1; + /* if the dataplane thinks the entry is sync but it is + * not sync in zebra we need to re-install to fixup + */ + if (dp_static) { + new_static = zebra_vxlan_mac_is_static(mac); + if (!new_static) + inform_dataplane = true; } + if (local_inactive) + SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + else + UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE); + + new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags); + /* if local-activity has changed we need update bgp + * even if bgp already knows about the mac + */ + if ((old_local_inactive != local_inactive) || + (new_bgp_ready != old_bgp_ready)) { + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + zlog_debug("local mac vni %u mac %s es %s seq %d f 0x%x%s", + zvni->vni, + prefix_mac2str(macaddr, + buf, sizeof(buf)), + mac->es ? mac->es->esi_str : "", + mac->loc_seq, + mac->flags, + local_inactive ? + " local-inactive" : ""); + inform_client = true; + } + + if (es_change) { + inform_client = true; + upd_neigh = true; + } + + /* Inform dataplane if required. */ + if (inform_dataplane) + zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */, + false /* force_clear_static */, __func__); + + /* Inform BGP if required. */ + if (inform_client) + zebra_vxlan_mac_send_add_del_to_client(mac, + old_bgp_ready, new_bgp_ready); + /* Process all neighbors associated with this MAC, if required. */ if (upd_neigh) - zvni_process_neigh_on_local_mac_change(zvni, mac, 0); + zvni_process_neigh_on_local_mac_change(zvni, mac, 0, es_change); return 0; } @@ -8892,7 +10575,7 @@ int zebra_vxlan_if_down(struct interface *ifp) assert(zvni->vxlan_if == ifp); /* Delete this VNI from BGP. */ - zvni_send_del_to_client(zvni->vni); + zvni_send_del_to_client(zvni); /* Free up all neighbors and MACs, if any. */ zvni_neigh_del_all(zvni, 1, 0, DEL_ALL_NEIGH); @@ -9033,9 +10716,8 @@ int zebra_vxlan_if_del(struct interface *ifp) zl3vni = zl3vni_from_vrf(zvni->vrf_id); if (zl3vni) listnode_delete(zl3vni->l2vnis, zvni); - /* Delete VNI from BGP. */ - zvni_send_del_to_client(zvni->vni); + zvni_send_del_to_client(zvni); /* Free up all neighbors and MAC, if any. */ zvni_neigh_del_all(zvni, 0, 0, DEL_ALL_NEIGH); @@ -9154,7 +10836,7 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags) && (zif->brslave_info.bridge_ifindex == IFINDEX_INTERNAL)) { /* Delete from client, remove all remote VTEPs */ /* Also, free up all MACs and neighbors. */ - zvni_send_del_to_client(zvni->vni); + zvni_send_del_to_client(zvni); zvni_neigh_del_all(zvni, 1, 0, DEL_ALL_NEIGH); zvni_mac_del_all(zvni, 1, 0, DEL_ALL_MAC); zvni_vtep_del_all(zvni, 1); @@ -9177,9 +10859,12 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags) zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp); zvni->local_vtep_ip = vxl->vtep_ip; zvni->mcast_grp = vxl->mcast_grp; + /* on local vtep-ip check if ES orig-ip + * needs to be updated + */ + zebra_evpn_es_set_base_vni(zvni); } - zvni->vxlan_if = ifp; - + zvni_vxlan_if_set(zvni, ifp, true /* set */); /* Take further actions needed. * Note that if we are here, there is a change of interest. */ @@ -9290,8 +10975,12 @@ int zebra_vxlan_if_add(struct interface *ifp) zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp); zvni->local_vtep_ip = vxl->vtep_ip; zvni->mcast_grp = vxl->mcast_grp; + /* on local vtep-ip check if ES orig-ip + * needs to be updated + */ + zebra_evpn_es_set_base_vni(zvni); } - zvni->vxlan_if = ifp; + zvni_vxlan_if_set(zvni, ifp, true /* set */); vlan_if = zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if); if (vlan_if) { @@ -9811,25 +11500,6 @@ stream_failure: return; } -static int macfdb_read_ns(struct ns *ns, - void *_in_param __attribute__((unused)), - void **out_param __attribute__((unused))) -{ - struct zebra_ns *zns = ns->info; - - macfdb_read(zns); - return NS_WALK_CONTINUE; -} - -static int neigh_read_ns(struct ns *ns, - void *_in_param __attribute__((unused)), - void **out_param __attribute__((unused))) -{ - struct zebra_ns *zns = ns->info; - - neigh_read(zns); - return NS_WALK_CONTINUE; -} /* * Handle message from client to learn (or stop learning) about VNIs and MACs. @@ -9871,6 +11541,9 @@ void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS) /* Note BUM handling */ zvrf->vxlan_flood_ctrl = flood_ctrl; + /* Replay all ESs */ + zebra_evpn_es_send_all_to_client(true /* add */); + /* Build VNI hash table and inform BGP. */ zvni_build_hash_table(); @@ -9879,16 +11552,19 @@ void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS) NULL); /* Read the MAC FDB */ - ns_walk_func(macfdb_read_ns, NULL, NULL); + macfdb_read(zvrf->zns); /* Read neighbors */ - ns_walk_func(neigh_read_ns, NULL, NULL); + neigh_read(zvrf->zns); } else { /* Cleanup VTEPs for all VNIs - uninstall from * kernel and free entries. */ hash_iterate(zvrf->vni_table, zvni_cleanup_all, zvrf); + /* Delete all ESs in BGP */ + zebra_evpn_es_send_all_to_client(false /* add */); + /* cleanup all l3vnis */ hash_iterate(zrouter.l3vni_table, zl3vni_cleanup_all, NULL); @@ -9917,10 +11593,15 @@ void zebra_vxlan_init_tables(struct zebra_vrf *zvrf) /* Cleanup VNI info, but don't free the table. */ void zebra_vxlan_cleanup_tables(struct zebra_vrf *zvrf) { + struct zebra_vrf *evpn_zvrf = zebra_vrf_get_evpn(); + if (!zvrf) return; hash_iterate(zvrf->vni_table, zvni_cleanup_all, zvrf); hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_cleanup, NULL); + + if (zvrf == evpn_zvrf) + zebra_evpn_es_cleanup(); } /* Close all VNI handling */ @@ -9938,12 +11619,14 @@ void zebra_vxlan_init(void) zrouter.l3vni_table = hash_create(l3vni_hash_keymake, l3vni_hash_cmp, "Zebra VRF L3 VNI table"); zrouter.evpn_vrf = NULL; + zebra_evpn_mh_init(); } /* free l3vni table */ void zebra_vxlan_disable(void) { hash_free(zrouter.l3vni_table); + zebra_evpn_mh_terminate(); } /* get the l3vni svi ifindex */ @@ -10000,9 +11683,9 @@ static int zebra_vxlan_dad_ip_auto_recovery_exp(struct thread *t) /* Send to BGP */ if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) { zvni_neigh_send_add_to_client(zvni->vni, &nbr->ip, &nbr->emac, - nbr->flags, nbr->loc_seq); + nbr->mac, nbr->flags, nbr->loc_seq); } else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) { - zvni_neigh_install(zvni, nbr); + zvni_rem_neigh_install(zvni, nbr, false /*was_static*/); } return 0; @@ -10045,7 +11728,8 @@ static int zebra_vxlan_dad_mac_auto_recovery_exp(struct thread *t) if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) ZEBRA_NEIGH_SET_INACTIVE(nbr); else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) - zvni_neigh_install(zvni, nbr); + zvni_rem_neigh_install(zvni, nbr, + false /*was_static*/); } UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE); @@ -10064,17 +11748,18 @@ static int zebra_vxlan_dad_mac_auto_recovery_exp(struct thread *t) if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { /* Inform to BGP */ if (zvni_mac_send_add_to_client(zvni->vni, &mac->macaddr, - mac->flags, mac->loc_seq)) + mac->flags, mac->loc_seq, mac->es)) return -1; /* Process all neighbors associated with this MAC. */ - zvni_process_neigh_on_local_mac_change(zvni, mac, 0); + zvni_process_neigh_on_local_mac_change(zvni, mac, 0, + 0 /*es_change*/); } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { zvni_process_neigh_on_remote_mac_add(zvni, mac); /* Install the entry. */ - zvni_mac_install(zvni, mac); + zvni_rem_mac_install(zvni, mac, false /* was_static */); } return 0; @@ -10344,7 +12029,7 @@ static void zvni_send_mac_hash_entry_to_client(struct hash_bucket *bucket, if (CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL)) zvni_mac_send_add_to_client(wctx->zvni->vni, &zmac->macaddr, - zmac->flags, zmac->loc_seq); + zmac->flags, zmac->loc_seq, zmac->es); } /* Iterator to Notify Local MACs of a L2VNI */ @@ -10380,7 +12065,7 @@ static void zvni_send_neigh_hash_entry_to_client(struct hash_bucket *bucket, return; zvni_neigh_send_add_to_client(wctx->zvni->vni, &zn->ip, - &zn->emac, zn->flags, + &zn->emac, zn->mac, zn->flags, zn->loc_seq); } } diff --git a/zebra/zebra_vxlan.h b/zebra/zebra_vxlan.h index 064dda6cd0..9c8af9d1fc 100644 --- a/zebra/zebra_vxlan.h +++ b/zebra/zebra_vxlan.h @@ -165,14 +165,15 @@ extern int zebra_vxlan_svi_down(struct interface *ifp, extern int zebra_vxlan_handle_kernel_neigh_update( struct interface *ifp, struct interface *link_if, struct ipaddr *ip, struct ethaddr *macaddr, uint16_t state, bool is_ext, - bool is_router); + bool is_router, bool local_inactive, bool dp_static); extern int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp, struct interface *link_if, struct ipaddr *ip); extern int zebra_vxlan_local_mac_add_update(struct interface *ifp, struct interface *br_if, struct ethaddr *mac, vlanid_t vid, - bool sticky); + bool sticky, bool local_inactive, + bool dp_static); extern int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if, struct ethaddr *mac, vlanid_t vid); @@ -217,6 +218,7 @@ extern void zebra_vxlan_handle_result(struct zebra_dplane_ctx *ctx); extern void zebra_evpn_init(void); extern void zebra_vxlan_macvlan_up(struct interface *ifp); extern void zebra_vxlan_macvlan_down(struct interface *ifp); +extern int vni_list_cmp(void *p1, void *p2); #ifdef __cplusplus } diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h index e4b06054b2..e2eae56873 100644 --- a/zebra/zebra_vxlan_private.h +++ b/zebra/zebra_vxlan_private.h @@ -62,6 +62,9 @@ struct zebra_vtep_t_ { struct zebra_vtep_t_ *prev; }; +RB_HEAD(zebra_es_evi_rb_head, zebra_evpn_es_evi); +RB_PROTOTYPE(zebra_es_evi_rb_head, zebra_evpn_es_evi, rb_node, + zebra_es_evi_rb_cmp); /* * VNI hash table @@ -73,6 +76,10 @@ struct zebra_vni_t_ { /* VNI - key */ vni_t vni; + /* ES flags */ + uint32_t flags; +#define ZVNI_READY_FOR_BGP (1 << 0) /* ready to be sent to BGP */ + /* Flag for advertising gw macip */ uint8_t advertise_gw_macip; @@ -102,6 +109,12 @@ struct zebra_vni_t_ { /* List of local or remote neighbors (MAC+IP) */ struct hash *neigh_table; + + /* RB tree of ES-EVIs */ + struct zebra_es_evi_rb_head es_evi_rb_tree; + + /* List of local ESs */ + struct list *local_es_evi_list; }; /* L3 VNI hash table */ @@ -302,6 +315,23 @@ struct zebra_mac_t_ { #define ZEBRA_MAC_REMOTE_DEF_GW 0x40 #define ZEBRA_MAC_DUPLICATE 0x80 #define ZEBRA_MAC_FPM_SENT 0x100 /* whether or not this entry was sent. */ +/* MAC is locally active on an ethernet segment peer */ +#define ZEBRA_MAC_ES_PEER_ACTIVE 0x200 +/* MAC has been proxy-advertised by peers. This means we need to + * keep the entry for forwarding but cannot advertise it + */ +#define ZEBRA_MAC_ES_PEER_PROXY 0x400 +/* We have not been able to independently establish that the host is + * local connected but one or more ES peers claims it is. + * We will maintain the entry for forwarding purposes and continue + * to advertise it as locally attached but with a "proxy" flag + */ +#define ZEBRA_MAC_LOCAL_INACTIVE 0x800 + +#define ZEBRA_MAC_ALL_LOCAL_FLAGS (ZEBRA_MAC_LOCAL |\ + ZEBRA_MAC_LOCAL_INACTIVE) +#define ZEBRA_MAC_ALL_PEER_FLAGS (ZEBRA_MAC_ES_PEER_PROXY |\ + ZEBRA_MAC_ES_PEER_ACTIVE) /* back pointer to zvni */ zebra_vni_t *zvni; @@ -310,13 +340,17 @@ struct zebra_mac_t_ { union { struct { ifindex_t ifindex; - ns_id_t ns_id; vlanid_t vid; } local; struct in_addr r_vtep_ip; } fwd_info; + /* Local or remote ES */ + struct zebra_evpn_es *es; + /* memory used to link the mac to the es */ + struct listnode es_listnode; + /* Mobility sequence numbers associated with this entry. */ uint32_t rem_seq; uint32_t loc_seq; @@ -335,6 +369,14 @@ struct zebra_mac_t_ { struct timeval detect_start_time; time_t dad_dup_detect_time; + + /* used for ageing out the PEER_ACTIVE flag */ + struct thread *hold_timer; + + /* number of neigh entries (using this mac) that have + * ZEBRA_MAC_ES_PEER_ACTIVE or ZEBRA_NEIGH_ES_PEER_PROXY + */ + uint32_t sync_neigh_cnt; }; /* @@ -366,6 +408,17 @@ struct rmac_walk_ctx { struct json_object *json; }; +/* temporary datastruct to pass info between the mac-update and + * neigh-update while handling mac-ip routes + */ +struct sync_mac_ip_ctx { + bool ignore_macip; + bool mac_created; + bool mac_inactive; + bool mac_dp_update_deferred; + zebra_mac_t *mac; +}; + #define IS_ZEBRA_NEIGH_ACTIVE(n) (n->state == ZEBRA_NEIGH_ACTIVE) #define IS_ZEBRA_NEIGH_INACTIVE(n) (n->state == ZEBRA_NEIGH_INACTIVE) @@ -392,6 +445,9 @@ struct zebra_neigh_t_ { /* MAC address. */ struct ethaddr emac; + /* Back pointer to MAC. Only applicable to hosts in a L2-VNI. */ + zebra_mac_t *mac; + /* Underlying interface. */ ifindex_t ifindex; @@ -405,6 +461,18 @@ struct zebra_neigh_t_ { #define ZEBRA_NEIGH_ROUTER_FLAG 0x10 #define ZEBRA_NEIGH_DUPLICATE 0x20 #define ZEBRA_NEIGH_SVI_IP 0x40 +/* rxed from an ES peer */ +#define ZEBRA_NEIGH_ES_PEER_ACTIVE 0x80 +/* rxed from an ES peer as a proxy advertisement */ +#define ZEBRA_NEIGH_ES_PEER_PROXY 0x100 +/* We have not been able to independently establish that the host + * is local connected + */ +#define ZEBRA_NEIGH_LOCAL_INACTIVE 0x200 +#define ZEBRA_NEIGH_ALL_LOCAL_FLAGS (ZEBRA_NEIGH_LOCAL |\ + ZEBRA_NEIGH_LOCAL_INACTIVE) +#define ZEBRA_NEIGH_ALL_PEER_FLAGS (ZEBRA_NEIGH_ES_PEER_PROXY |\ + ZEBRA_NEIGH_ES_PEER_ACTIVE) enum zebra_neigh_state state; @@ -432,6 +500,9 @@ struct zebra_neigh_t_ { struct timeval detect_start_time; time_t dad_dup_detect_time; + + /* used for ageing out the PEER_ACTIVE flag */ + struct thread *hold_timer; }; /* @@ -508,4 +579,8 @@ typedef struct zebra_vxlan_sg_ { uint32_t ref_cnt; } zebra_vxlan_sg_t; +extern zebra_vni_t *zvni_lookup(vni_t vni); +extern void zebra_vxlan_sync_mac_dp_install(zebra_mac_t *mac, bool set_inactive, + bool force_clear_static, const char *caller); + #endif /* _ZEBRA_VXLAN_PRIVATE_H */ diff --git a/zebra/zserv.c b/zebra/zserv.c index 99a85fd2ce..f1b7dcc848 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -1075,6 +1075,12 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client) vty_out(vty, "L3-VNI delete notifications: %u\n", client->l3vnidel_cnt); vty_out(vty, "MAC-IP add notifications: %u\n", client->macipadd_cnt); vty_out(vty, "MAC-IP delete notifications: %u\n", client->macipdel_cnt); + vty_out(vty, "ES add notifications: %u\n", client->local_es_add_cnt); + vty_out(vty, "ES delete notifications: %u\n", client->local_es_del_cnt); + vty_out(vty, "ES-EVI add notifications: %u\n", + client->local_es_evi_add_cnt); + vty_out(vty, "ES-EVI delete notifications: %u\n", + client->local_es_evi_del_cnt); TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { vty_out(vty, "VRF : %s\n", vrf_id_to_name(info->vrf_id)); diff --git a/zebra/zserv.h b/zebra/zserv.h index f2a4523818..e904460782 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -194,6 +194,10 @@ struct zserv { uint32_t v6_nh_watch_rem_cnt; uint32_t vxlan_sg_add_cnt; uint32_t vxlan_sg_del_cnt; + uint32_t local_es_add_cnt; + uint32_t local_es_del_cnt; + uint32_t local_es_evi_add_cnt; + uint32_t local_es_evi_del_cnt; uint32_t error_cnt; time_t nh_reg_time; |
