diff options
| author | Anuradha Karuppiah <anuradhak@cumulusnetworks.com> | 2020-03-27 17:14:45 -0700 | 
|---|---|---|
| committer | Anuradha Karuppiah <anuradhak@cumulusnetworks.com> | 2020-08-05 06:46:12 -0700 | 
| commit | ce5160c08141db3002060189d624398409bd6317 (patch) | |
| tree | df19f2619fa3d2db3a186720f1cb341fee9550be /zebra | |
| parent | 506efd379b4ed72454650a32049028a7f5b4c5c8 (diff) | |
zebra: Ethernet segment management and support for MAC-ECMP
1. Local ethernet segments are configured in zebra by attaching a
local-es-id and sys-mac to a access interface -
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
!
interface hostbond1
 evpn mh es-id 1
 evpn mh es-sys-mac 00:00:00:00:01:11
!
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
This info is then sent to BGP and used for the generation of EAD-per-ES
routes.
2. Access VLANs associated with an (ES) access port are translated into
ES-EVI objects and sent to BGP. This is used by BGP for the
generation of EAD-EVI routes.
3. Remote ESs are imported by BGP and sent to zebra. A list of VTEPs
is maintained per-remote ES in zebra. This list is used for the creation
of the L2-NHG that is used for forwarding traffic.
4. MAC entries with a non-zero ESI destination use the L2-NHG associated
with the ESI for forwarding traffic over the VxLAN overlay.
Please see zebra_evpn_mh.h for the datastruct organization details.
Signed-off-by: Anuradha Karuppiah <anuradhak@cumulusnetworks.com>
Diffstat (limited to 'zebra')
| -rw-r--r-- | zebra/interface.c | 16 | ||||
| -rw-r--r-- | zebra/interface.h | 31 | ||||
| -rw-r--r-- | zebra/subdir.am | 4 | ||||
| -rw-r--r-- | zebra/zapi_msg.c | 3 | ||||
| -rw-r--r-- | zebra/zebra_errors.h | 1 | ||||
| -rw-r--r-- | zebra/zebra_evpn_mh.c | 2070 | ||||
| -rw-r--r-- | zebra/zebra_evpn_mh.h | 228 | ||||
| -rw-r--r-- | zebra/zebra_l2.c | 51 | ||||
| -rw-r--r-- | zebra/zebra_l2.h | 4 | ||||
| -rw-r--r-- | zebra/zebra_memory.c | 2 | ||||
| -rw-r--r-- | zebra/zebra_router.h | 6 | ||||
| -rw-r--r-- | zebra/zebra_vxlan.c | 429 | ||||
| -rw-r--r-- | zebra/zebra_vxlan.h | 1 | ||||
| -rw-r--r-- | zebra/zebra_vxlan_private.h | 21 | ||||
| -rw-r--r-- | zebra/zserv.c | 6 | ||||
| -rw-r--r-- | zebra/zserv.h | 4 | 
16 files changed, 2744 insertions, 133 deletions
diff --git a/zebra/interface.c b/zebra/interface.c index c23e6037c5..b824e313ec 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -51,6 +51,7 @@  #include "zebra/interface.h"  #include "zebra/zebra_vxlan.h"  #include "zebra/zebra_errors.h" +#include "zebra/zebra_evpn_mh.h"  DEFINE_MTYPE_STATIC(ZEBRA, ZINFO, "Zebra Interface Information") @@ -127,6 +128,7 @@ static int if_zebra_new_hook(struct interface *ifp)  	struct zebra_if *zebra_if;  	zebra_if = XCALLOC(MTYPE_ZINFO, sizeof(struct zebra_if)); +	zebra_if->ifp = ifp;  	zebra_if->multicast = IF_ZEBRA_MULTICAST_UNSPEC;  	zebra_if->shutdown = IF_ZEBRA_SHUTDOWN_OFF; @@ -238,6 +240,8 @@ static int if_zebra_delete_hook(struct interface *ifp)  		list_delete(&rtadv->AdvDNSSLList);  #endif /* HAVE_RTADV */ +		zebra_evpn_if_cleanup(zebra_if); +  		if_nhg_dependents_release(ifp);  		zebra_if_nhg_dependents_free(zebra_if); @@ -831,6 +835,7 @@ void if_delete_update(struct interface *ifp)  		memset(&zif->l2info, 0, sizeof(union zebra_l2if_info));  		memset(&zif->brslave_info, 0,  		       sizeof(struct zebra_l2info_brslave)); +		zebra_evpn_if_cleanup(zif);  	}  	if (!ifp->configured) { @@ -1072,6 +1077,8 @@ void if_up(struct interface *ifp)  	} else if (IS_ZEBRA_IF_MACVLAN(ifp))  		zebra_vxlan_macvlan_up(ifp); +	if (zif->es_info.es) +		zebra_evpn_es_if_oper_state_change(zif, true /*up*/);  }  /* Interface goes down.  We have to manage different behavior of based @@ -1106,6 +1113,8 @@ void if_down(struct interface *ifp)  	} else if (IS_ZEBRA_IF_MACVLAN(ifp))  		zebra_vxlan_macvlan_down(ifp); +	if (zif->es_info.es) +		zebra_evpn_es_if_oper_state_change(zif, false /*up*/);  	/* Notify to the protocol daemons. */  	zebra_interface_down_update(ifp); @@ -1527,6 +1536,8 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp)  		}  	} +	zebra_evpn_if_es_print(vty, zebra_if); +  	if (zebra_if->link_ifindex != IFINDEX_INTERNAL) {  		if (zebra_if->link)  			vty_out(vty, "  Parent interface: %s\n", zebra_if->link->name); @@ -3568,7 +3579,7 @@ static int if_config_write(struct vty *vty)  			}  			hook_call(zebra_if_config_wr, vty, ifp); - +			zebra_evpn_mh_if_write(vty, ifp);  			link_params_config_write(vty, ifp);  			vty_endframe(vty, "!\n"); @@ -3644,4 +3655,7 @@ void zebra_if_init(void)  	install_element(LINK_PARAMS_NODE, &link_params_use_bw_cmd);  	install_element(LINK_PARAMS_NODE, &no_link_params_use_bw_cmd);  	install_element(LINK_PARAMS_NODE, &exit_link_params_cmd); + +	/* setup EVPN MH elements */ +	zebra_evpn_interface_init();  } diff --git a/zebra/interface.h b/zebra/interface.h index 2dad0c3bb2..1a8e3caed5 100644 --- a/zebra/interface.h +++ b/zebra/interface.h @@ -25,6 +25,7 @@  #include "redistribute.h"  #include "vrf.h"  #include "hook.h" +#include "bitfield.h"  #include "zebra/zebra_l2.h"  #include "zebra/zebra_nhg_private.h" @@ -42,6 +43,8 @@ extern "C" {  #define IF_ZEBRA_SHUTDOWN_OFF    0  #define IF_ZEBRA_SHUTDOWN_ON     1 +#define IF_VLAN_BITMAP_MAX 4096 +  #if defined(HAVE_RTADV)  /* Router advertisement parameter.  From RFC4861, RFC6275 and RFC4191. */  struct rtadvconf { @@ -272,8 +275,19 @@ typedef enum {  struct irdp_interface; +/* Ethernet segment info used for setting up EVPN multihoming */ +struct zebra_evpn_es; +struct zebra_es_if_info { +	struct ethaddr sysmac; +	uint32_t lid; /* local-id; has to be unique per-ES-sysmac */ +	struct zebra_evpn_es *es; /* local ES */ +}; +  /* `zebra' daemon local interface structure. */  struct zebra_if { +	/* back pointer to the interface */ +	struct interface *ifp; +  	/* Shutdown configuration. */  	uint8_t shutdown; @@ -347,6 +361,12 @@ struct zebra_if {  	struct zebra_l2info_bondslave bondslave_info; +	/* ethernet segment */ +	struct zebra_es_if_info es_info; + +	/* bitmap of vlans associated with this interface */ +	bitfield_t vlan_bitmap; +  	/* Link fields - for sub-interfaces. */  	ifindex_t link_ifindex;  	struct interface *link; @@ -370,17 +390,6 @@ DECLARE_HOOK(zebra_if_extra_info, (struct vty * vty, struct interface *ifp),  DECLARE_HOOK(zebra_if_config_wr, (struct vty * vty, struct interface *ifp),  	     (vty, ifp)) -static inline void zebra_if_set_ziftype(struct interface *ifp, -					zebra_iftype_t zif_type, -					zebra_slave_iftype_t zif_slave_type) -{ -	struct zebra_if *zif; - -	zif = (struct zebra_if *)ifp->info; -	zif->zif_type = zif_type; -	zif->zif_slave_type = zif_slave_type; -} -  #define IS_ZEBRA_IF_VRF(ifp)                                                   \  	(((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VRF) diff --git a/zebra/subdir.am b/zebra/subdir.am index 49e60820bc..c552ca513e 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -12,6 +12,7 @@ vtysh_scan += \  	zebra/rtadv.c \  	zebra/zebra_gr.c \  	zebra/zebra_mlag_vty.c \ +	zebra/zebra_evpn_mh.c \  	zebra/zebra_mpls_vty.c \  	zebra/zebra_ptm.c \  	zebra/zebra_pw.c \ @@ -108,6 +109,7 @@ zebra_zebra_SOURCES = \  	zebra/zebra_vrf.c \  	zebra/zebra_vty.c \  	zebra/zebra_vxlan.c \ +	zebra/zebra_evpn_mh.c \  	zebra/zserv.c \  	# end @@ -115,6 +117,7 @@ clippy_scan += \  	zebra/debug.c \  	zebra/interface.c \  	zebra/rtadv.c \ +	zebra/zebra_evpn_mh.c \  	zebra/zebra_mlag_vty.c \  	zebra/zebra_routemap.c \  	zebra/zebra_vty.c \ @@ -167,6 +170,7 @@ noinst_HEADERS += \  	zebra/zebra_vrf.h \  	zebra/zebra_vxlan.h \  	zebra/zebra_vxlan_private.h \ +	zebra/zebra_evpn_mh.h \  	zebra/zserv.h \  	# end diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 2ca3e82fac..0a459b4d0a 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -51,6 +51,7 @@  #include "zebra/zebra_mpls.h"  #include "zebra/zebra_mroute.h"  #include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h"  #include "zebra/rt.h"  #include "zebra/zebra_pbr.h"  #include "zebra/table_manager.h" @@ -2892,6 +2893,8 @@ void (*const zserv_handlers[])(ZAPI_HANDLER_ARGS) = {  	[ZEBRA_ADVERTISE_SVI_MACIP] = zebra_vxlan_advertise_svi_macip,  	[ZEBRA_ADVERTISE_SUBNET] = zebra_vxlan_advertise_subnet,  	[ZEBRA_ADVERTISE_ALL_VNI] = zebra_vxlan_advertise_all_vni, +	[ZEBRA_REMOTE_ES_VTEP_ADD] = zebra_evpn_proc_remote_es, +	[ZEBRA_REMOTE_ES_VTEP_DEL] = zebra_evpn_proc_remote_es,  	[ZEBRA_REMOTE_VTEP_ADD] = zebra_vxlan_remote_vtep_add,  	[ZEBRA_REMOTE_VTEP_DEL] = zebra_vxlan_remote_vtep_del,  	[ZEBRA_REMOTE_MACIP_ADD] = zebra_vxlan_remote_macip_add, diff --git a/zebra/zebra_errors.h b/zebra/zebra_errors.h index 5f2a7a12c6..03953ed17f 100644 --- a/zebra/zebra_errors.h +++ b/zebra/zebra_errors.h @@ -134,6 +134,7 @@ enum zebra_log_refs {  	EC_ZEBRA_BAD_NHG_MESSAGE,  	EC_ZEBRA_DUPLICATE_NHG_MESSAGE,  	EC_ZEBRA_VRF_MISCONFIGURED, +	EC_ZEBRA_ES_CREATE,  };  void zebra_error_init(void); diff --git a/zebra/zebra_evpn_mh.c b/zebra/zebra_evpn_mh.c new file mode 100644 index 0000000000..c55e9d3ec2 --- /dev/null +++ b/zebra/zebra_evpn_mh.c @@ -0,0 +1,2070 @@ +/* + * Zebra EVPN multihoming code + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + */ + +#include <zebra.h> + +#include "command.h" +#include "hash.h" +#include "if.h" +#include "jhash.h" +#include "linklist.h" +#include "log.h" +#include "memory.h" +#include "prefix.h" +#include "stream.h" +#include "table.h" +#include "vlan.h" +#include "vxlan.h" + +#include "zebra/zebra_router.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/rt_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_l2.h" +#include "zebra/zebra_memory.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/zebra_vxlan.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_evpn_mh.h" +#include "zebra/zebra_nhg.h" + +DEFINE_MTYPE_STATIC(ZEBRA, ZACC_BD, "Access Broadcast Domain"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES, "Ethernet Segment"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES_EVI, "ES info per-EVI"); +DEFINE_MTYPE_STATIC(ZEBRA, ZMH_INFO, "MH global info"); +DEFINE_MTYPE_STATIC(ZEBRA, ZES_VTEP, "VTEP attached to the ES"); + +static void zebra_evpn_es_get_one_base_vni(void); +static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es, +		zebra_vni_t *vni, bool add); +static void zebra_evpn_local_es_del(struct zebra_evpn_es *es); +static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid, +		struct ethaddr *sysmac); + +esi_t zero_esi_buf, *zero_esi = &zero_esi_buf; + +/*****************************************************************************/ +/* Ethernet Segment to EVI association - + * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI + * (zebra_vni_t.es_evi_rb_tree). + * 2. Each local ES-EVI entry is sent to BGP which advertises it as an + * EAD-EVI (Type-1 EVPN) route + * 3. Local ES-EVI setup is re-evaluated on the following triggers - + *    a. When an ESI is set or cleared on an access port. + *    b. When an access port associated with an ESI is deleted. + *    c. When VLAN member ship changes on an access port. + *    d. When a VXLAN_IF is set or cleared on an access broadcast domain. + *    e. When a L2-VNI is added or deleted for a VxLAN_IF. + * 4. Currently zebra doesn't remote ES-EVIs. Those are managed and maintained + * entirely in BGP which consolidates them into a remote ES. The remote ES + * is then sent to zebra which allocates a NHG for it. + */ + +/* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */ +static int zebra_es_evi_rb_cmp(const struct zebra_evpn_es_evi *es_evi1, +		const struct zebra_evpn_es_evi *es_evi2) +{ +	return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES); +} +RB_GENERATE(zebra_es_evi_rb_head, zebra_evpn_es_evi, +		rb_node, zebra_es_evi_rb_cmp); + +/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES + * tables. + */ +static struct zebra_evpn_es_evi *zebra_evpn_es_evi_new(struct zebra_evpn_es *es, +		zebra_vni_t *zvni) +{ +	struct zebra_evpn_es_evi *es_evi; + +	es_evi = XCALLOC(MTYPE_ZES_EVI, sizeof(struct zebra_evpn_es_evi)); + +	es_evi->es = es; +	es_evi->zvni = zvni; + +	/* insert into the VNI-ESI rb tree */ +	if (RB_INSERT(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, es_evi)) { +		XFREE(MTYPE_ZES_EVI, es_evi); +		return NULL; +	} + +	/* add to the ES's VNI list */ +	listnode_init(&es_evi->es_listnode, es_evi); +	listnode_add(es->es_evi_list, &es_evi->es_listnode); + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("es %s evi %d new", +				es_evi->es->esi_str, es_evi->zvni->vni); + +	return es_evi; +} + +/* returns TRUE if the VNI is ready to be sent to BGP */ +static inline bool zebra_evpn_vni_send_to_client_ok(zebra_vni_t *zvni) +{ +	return !!(zvni->flags & ZVNI_READY_FOR_BGP); +} + +/* Evaluate if the es_evi is ready to be sent BGP - + * 1. If it is ready an add is sent to BGP + * 2. If it is not ready a del is sent (if the ES had been previously added + *   to BGP). + */ +static void zebra_evpn_es_evi_re_eval_send_to_client( +		struct zebra_evpn_es_evi *es_evi) +{ +	bool old_ready; +	bool new_ready; + +	old_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP); + +	/* ES and L2-VNI have to be individually ready for BGP */ +	if ((es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) && +			(es_evi->es->flags & ZEBRA_EVPNES_READY_FOR_BGP) && +			zebra_evpn_vni_send_to_client_ok(es_evi->zvni)) +		es_evi->flags |= ZEBRA_EVPNES_EVI_READY_FOR_BGP; +	else +		es_evi->flags &= ~ZEBRA_EVPNES_EVI_READY_FOR_BGP; + +	new_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP); + +	if (old_ready == new_ready) +		return; + +	if (new_ready) +		zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zvni, +				true /* add */); +	else +		zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zvni, +				false /* add */); +} + +/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free + * up the memory. + */ +static void zebra_evpn_es_evi_free(struct zebra_evpn_es_evi *es_evi) +{ +	struct zebra_evpn_es *es = es_evi->es; +	zebra_vni_t *zvni = es_evi->zvni; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("es %s evi %d free", +				es_evi->es->esi_str, es_evi->zvni->vni); + +	/* remove from the ES's VNI list */ +	list_delete_node(es->es_evi_list, &es_evi->es_listnode); + +	/* remove from the VNI-ESI rb tree */ +	RB_REMOVE(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, es_evi); + +	/* remove from the VNI-ESI rb tree */ +	XFREE(MTYPE_ZES_EVI, es_evi); +} + +/* find the ES-EVI in the per-L2-VNI RB tree */ +static struct zebra_evpn_es_evi *zebra_evpn_es_evi_find( +		struct zebra_evpn_es *es, zebra_vni_t *zvni) +{ +	struct zebra_evpn_es_evi es_evi; + +	es_evi.es = es; + +	return RB_FIND(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, &es_evi); +} + +/* Tell BGP about an ES-EVI deletion and then delete it */ +static void zebra_evpn_local_es_evi_do_del(struct zebra_evpn_es_evi *es_evi) +{ +	if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("local es %s evi %d del", +				es_evi->es->esi_str, es_evi->zvni->vni); + +	if (es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP) { +		/* send a del only if add was sent for it earlier */ +		zebra_evpn_es_evi_send_to_client(es_evi->es, +				es_evi->zvni, false /* add */); +	} + +	/* delete it from the VNI's local list */ +	list_delete_node(es_evi->zvni->local_es_evi_list, +			&es_evi->l2vni_listnode); + +	es_evi->flags &= ~ZEBRA_EVPNES_EVI_LOCAL; +	zebra_evpn_es_evi_free(es_evi); +} +static void zebra_evpn_local_es_evi_del(struct zebra_evpn_es *es, +		zebra_vni_t *zvni) +{ +	struct zebra_evpn_es_evi *es_evi; + +	es_evi = zebra_evpn_es_evi_find(es, zvni); +	if (es_evi) +		zebra_evpn_local_es_evi_do_del(es_evi); +} + +/* Create an ES-EVI if it doesn't already exist and tell BGP */ +static void zebra_evpn_local_es_evi_add(struct zebra_evpn_es *es, +		zebra_vni_t *zvni) +{ +	struct zebra_evpn_es_evi *es_evi; + +	es_evi = zebra_evpn_es_evi_find(es, zvni); +	if (!es_evi) { +		es_evi = zebra_evpn_es_evi_new(es, zvni); +		if (!es_evi) +			return; + +		if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +			zlog_debug("local es %s evi %d add", +					es_evi->es->esi_str, es_evi->zvni->vni); +		es_evi->flags |= ZEBRA_EVPNES_EVI_LOCAL; +		/* add to the VNI's local list */ +		listnode_init(&es_evi->l2vni_listnode, es_evi); +		listnode_add(zvni->local_es_evi_list, &es_evi->l2vni_listnode); + +		zebra_evpn_es_evi_re_eval_send_to_client(es_evi); +	} +} + +static void zebra_evpn_es_evi_show_entry(struct vty *vty, +		struct zebra_evpn_es_evi *es_evi, json_object *json) +{ +	char type_str[4]; + +	if (json) { +		/* XXX */ +	} else { +		type_str[0] = '\0'; +		if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) +			strcpy(type_str + strlen(type_str), "L"); + +		vty_out(vty, "%-8d %-30s %-4s\n", +				es_evi->zvni->vni, es_evi->es->esi_str, +				type_str); +	} +} + +static void zebra_evpn_es_evi_show_entry_detail(struct vty *vty, +		struct zebra_evpn_es_evi *es_evi, json_object *json) +{ +	char type_str[4]; + +	if (json) { +		/* XXX */ +	} else { +		type_str[0] = '\0'; +		if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) +			strcpy(type_str + strlen(type_str), "L"); + +		vty_out(vty, "VNI %d ESI: %s\n", +				es_evi->zvni->vni, es_evi->es->esi_str); +		vty_out(vty, " Type: %s\n", type_str); +		vty_out(vty, " Ready for BGP: %s\n", +				(es_evi->flags & +				 ZEBRA_EVPNES_EVI_READY_FOR_BGP) ? +				"yes" : "no"); +		vty_out(vty, "\n"); +	} +} + +static void zebra_evpn_es_evi_show_one_vni(zebra_vni_t *zvni, +		struct vty *vty, json_object *json, int detail) +{ +	struct zebra_evpn_es_evi *es_evi; + +	RB_FOREACH(es_evi, zebra_es_evi_rb_head, &zvni->es_evi_rb_tree) { +		if (detail) +			zebra_evpn_es_evi_show_entry_detail(vty, es_evi, json); +		else +			zebra_evpn_es_evi_show_entry(vty, es_evi, json); +	} +} + +struct evpn_mh_show_ctx { +	struct vty *vty; +	json_object *json; +	int detail; +}; + +static void zebra_evpn_es_evi_show_one_vni_hash_cb(struct hash_bucket *bucket, +		void *ctxt) +{ +	zebra_vni_t *zvni = (zebra_vni_t *)bucket->data; +	struct evpn_mh_show_ctx *wctx = (struct evpn_mh_show_ctx *)ctxt; + +	zebra_evpn_es_evi_show_one_vni(zvni, wctx->vty, +			wctx->json, wctx->detail); +} + +void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail) +{ +	json_object *json = NULL; +	struct zebra_vrf *zvrf; +	struct evpn_mh_show_ctx wctx; + +	zvrf = zebra_vrf_get_evpn(); + +	memset(&wctx, 0, sizeof(wctx)); +	wctx.vty = vty; +	wctx.json = json; +	wctx.detail = detail; + +	if (!detail && !json) { +		vty_out(vty, "Type: L local, R remote\n"); +		vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type"); +	} +	/* Display all L2-VNIs */ +	hash_iterate(zvrf->vni_table, zebra_evpn_es_evi_show_one_vni_hash_cb, +			&wctx); +} + +void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, vni_t vni, int detail) +{ +	json_object *json = NULL; +	zebra_vni_t *zvni; + +	zvni = zvni_lookup(vni); +	if (zvni) { +		if (!detail && !json) { +			vty_out(vty, "Type: L local, R remote\n"); +			vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type"); +		} +	} else { +		if (!uj) +			vty_out(vty, "VNI %d doesn't exist\n", vni); +	} +	zebra_evpn_es_evi_show_one_vni(zvni, vty, json, detail); +} + +/* Initialize the ES tables maintained per-L2_VNI */ +void zebra_evpn_vni_es_init(zebra_vni_t *zvni) +{ +	/* Initialize the ES-EVI RB tree */ +	RB_INIT(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree); + +	/* Initialize the local and remote ES lists maintained for quick +	 * walks by type +	 */ +	zvni->local_es_evi_list = list_new(); +	listset_app_node_mem(zvni->local_es_evi_list); +} + +/* Cleanup the ES info maintained per-L2_VNI */ +void zebra_evpn_vni_es_cleanup(zebra_vni_t *zvni) +{ +	struct zebra_evpn_es_evi *es_evi; +	struct zebra_evpn_es_evi *es_evi_next; + +	RB_FOREACH_SAFE(es_evi, zebra_es_evi_rb_head, +			&zvni->es_evi_rb_tree, es_evi_next) { +		zebra_evpn_local_es_evi_do_del(es_evi); +	} + +	list_delete(&zvni->local_es_evi_list); +	zebra_evpn_es_clear_base_vni(zvni); +} + +/* called when the oper state or bridge membership changes for the + * vxlan device + */ +void zebra_evpn_vni_update_all_es(zebra_vni_t *zvni) +{ +	struct zebra_evpn_es_evi *es_evi; +	struct listnode *node; + +	/* the VNI is now elgible as a base for EVPN-MH */ +	if (zebra_evpn_vni_send_to_client_ok(zvni)) +		zebra_evpn_es_set_base_vni(zvni); +	else +		zebra_evpn_es_clear_base_vni(zvni); + +	for (ALL_LIST_ELEMENTS_RO(zvni->local_es_evi_list, node, es_evi)) +		zebra_evpn_es_evi_re_eval_send_to_client(es_evi); +} + +/*****************************************************************************/ +/* Access broadcast domains (BD) + * 1. These broadcast domains can be VLAN aware (in which case + * the key is VID) or VLAN unaware (in which case the key is + * 2. A VID-BD is created when a VLAN is associated with an access port or + *    when the VLAN is associated with VXLAN_IF + * 3. A BD is translated into ES-EVI entries when a VNI is associated + *  with the broadcast domain + */ +/* Hash key for VLAN based broadcast domains */ +static unsigned int zebra_evpn_acc_vl_hash_keymake(const void *p) +{ +	const struct zebra_evpn_access_bd *acc_bd = p; + +	return jhash_1word(acc_bd->vid, 0); +} + +/* Compare two VLAN based broadcast domains */ +static bool zebra_evpn_acc_vl_cmp(const void *p1, const void *p2) +{ +	const struct zebra_evpn_access_bd *acc_bd1 = p1; +	const struct zebra_evpn_access_bd *acc_bd2 = p2; + +	if (acc_bd1 == NULL && acc_bd2 == NULL) +		return true; + +	if (acc_bd1 == NULL || acc_bd2 == NULL) +		return false; + +	return (acc_bd1->vid == acc_bd2->vid); +} + +/* Lookup VLAN based broadcast domain */ +static struct zebra_evpn_access_bd *zebra_evpn_acc_vl_find(vlanid_t vid) +{ +	struct zebra_evpn_access_bd *acc_bd; +	struct zebra_evpn_access_bd tmp; + +	tmp.vid = vid; +	acc_bd = hash_lookup(zmh_info->evpn_vlan_table, &tmp); + +	return acc_bd; +} + +/* A new broadcast domain can be created when a VLAN member or VLAN<=>VxLAN_IF + * mapping is added. + */ +static struct zebra_evpn_access_bd *zebra_evpn_acc_vl_new(vlanid_t vid) +{ +	struct zebra_evpn_access_bd *acc_bd; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("access vlan %d add", vid); + +	acc_bd = XCALLOC(MTYPE_ZACC_BD, sizeof(struct zebra_evpn_access_bd)); + +	acc_bd->vid = vid; + +	/* Initialize the mbr list */ +	acc_bd->mbr_zifs = list_new(); + +	/* Add to hash */ +	if (!hash_get(zmh_info->evpn_vlan_table, acc_bd, hash_alloc_intern)) { +		XFREE(MTYPE_ZACC_BD, acc_bd); +		return NULL; +	} + +	return acc_bd; +} + +/* Free VLAN based broadcast domain - + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +static void zebra_evpn_acc_vl_free(struct zebra_evpn_access_bd *acc_bd) +{ +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("access vlan %d del", acc_bd->vid); + +	/* cleanup resources maintained against the ES */ +	list_delete(&acc_bd->mbr_zifs); + +	/* remove EVI from various tables */ +	hash_release(zmh_info->evpn_vlan_table, acc_bd); + +	XFREE(MTYPE_ZACC_BD, acc_bd); +} + +static void zebra_evpn_acc_vl_cleanup_all(struct hash_bucket *bucket, void *arg) +{ +	struct zebra_evpn_access_bd *acc_bd = bucket->data; + +	zebra_evpn_acc_vl_free(acc_bd); +} + +/* called when a bd mbr is removed or VxLAN_IF is diassociated from the access + * VLAN + */ +static void zebra_evpn_acc_bd_free_on_deref(struct zebra_evpn_access_bd *acc_bd) +{ +	if (!list_isempty(acc_bd->mbr_zifs) || acc_bd->vxlan_zif) +		return; + +	/* if there are no references free the EVI */ +	zebra_evpn_acc_vl_free(acc_bd); +} + +/* called when a EVPN-L2VNI is set or cleared against a BD */ +static void zebra_evpn_acc_bd_vni_set(struct zebra_evpn_access_bd *acc_bd, +		zebra_vni_t *zvni, zebra_vni_t *old_zvni) +{ +	struct zebra_if *zif; +	struct listnode *node; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("access vlan %d l2-vni %u set", +				acc_bd->vid, zvni ? zvni->vni : 0); + +	for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) { +		if (!zif->es_info.es) +			continue; + +		if (zvni) +			zebra_evpn_local_es_evi_add(zif->es_info.es, zvni); +		else if (old_zvni) +			zebra_evpn_local_es_evi_del(zif->es_info.es, old_zvni); +	} +} + +/* handle VLAN->VxLAN_IF association */ +void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif) +{ +	struct zebra_evpn_access_bd *acc_bd; +	struct zebra_if *old_vxlan_zif; +	zebra_vni_t *old_zvni; + +	if (!vid) +		return; + +	acc_bd = zebra_evpn_acc_vl_find(vid); +	if (!acc_bd) +		acc_bd = zebra_evpn_acc_vl_new(vid); + +	old_vxlan_zif = acc_bd->vxlan_zif; +	acc_bd->vxlan_zif = vxlan_zif; +	if (vxlan_zif == old_vxlan_zif) +		return; + +	old_zvni = acc_bd->zvni; +	acc_bd->zvni = zvni_lookup(vxlan_zif->l2info.vxl.vni); +	if (acc_bd->zvni == old_zvni) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("access vlan %d vni %u ref", +				acc_bd->vid, vxlan_zif->l2info.vxl.vni); + +	if (old_zvni) +		zebra_evpn_acc_bd_vni_set(acc_bd, NULL, old_zvni); + +	if (acc_bd->zvni) +		zebra_evpn_acc_bd_vni_set(acc_bd, acc_bd->zvni, NULL); +} + +/* handle VLAN->VxLAN_IF deref */ +void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif) +{ +	struct zebra_evpn_access_bd *acc_bd; + +	if (!vid) +		return; + +	acc_bd = zebra_evpn_acc_vl_find(vid); +	if (!acc_bd) +		return; + +	/* clear vxlan_if only if it matches */ +	if (acc_bd->vxlan_zif != vxlan_zif) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("access vlan %d vni %u deref", +				acc_bd->vid, vxlan_zif->l2info.vxl.vni); + +	if (acc_bd->zvni) +		zebra_evpn_acc_bd_vni_set(acc_bd, NULL, acc_bd->zvni); + +	acc_bd->zvni = NULL; +	acc_bd->vxlan_zif = NULL; + +	/* if there are no other references the access_bd can be freed */ +	zebra_evpn_acc_bd_free_on_deref(acc_bd); +} + +/* handle EVPN L2VNI add/del */ +void zebra_evpn_vxl_vni_set(struct zebra_if *zif, zebra_vni_t *zvni, +		bool set) +{ +	struct zebra_l2info_vxlan *vxl; +	struct zebra_evpn_access_bd *acc_bd; + +	if (!zif) +		return; + +	/* locate access_bd associated with the vxlan device */ +	vxl = &zif->l2info.vxl; +	acc_bd = zebra_evpn_acc_vl_find(vxl->access_vlan); +	if (!acc_bd) +		return; + +	if (set) { +		zebra_evpn_es_set_base_vni(zvni); +		if (acc_bd->zvni != zvni) { +			acc_bd->zvni = zvni; +			zebra_evpn_acc_bd_vni_set(acc_bd, zvni, NULL); +		} +	} else { +		if (acc_bd->zvni) { +			zebra_vni_t *old_zvni = acc_bd->zvni; +			acc_bd->zvni = NULL; +			zebra_evpn_acc_bd_vni_set(acc_bd, NULL, old_zvni); +		} +	} +} + +/* handle addition of new VLAN members */ +void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif) +{ +	struct zebra_evpn_access_bd *acc_bd; + +	if (!vid) +		return; + +	acc_bd = zebra_evpn_acc_vl_find(vid); +	if (!acc_bd) +		acc_bd = zebra_evpn_acc_vl_new(vid); + +	if (listnode_lookup(acc_bd->mbr_zifs, zif)) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("access vlan %d mbr %s ref", +				vid, zif->ifp->name); + +	listnode_add(acc_bd->mbr_zifs, zif); +	if (acc_bd->zvni && zif->es_info.es) +		zebra_evpn_local_es_evi_add(zif->es_info.es, acc_bd->zvni); +} + +/* handle deletion of VLAN members */ +void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif) +{ +	struct zebra_evpn_access_bd *acc_bd; +	struct listnode *node; + +	if (!vid) +		return; + +	acc_bd = zebra_evpn_acc_vl_find(vid); +	if (!acc_bd) +		return; + +	node = listnode_lookup(acc_bd->mbr_zifs, zif); +	if (!node) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("access vlan %d mbr %s deref", +				vid, zif->ifp->name); + +	list_delete_node(acc_bd->mbr_zifs, node); + +	if (acc_bd->zvni && zif->es_info.es) +		zebra_evpn_local_es_evi_del(zif->es_info.es, acc_bd->zvni); + +	/* if there are no other references the access_bd can be freed */ +	zebra_evpn_acc_bd_free_on_deref(acc_bd); +} + +static void zebra_evpn_acc_vl_show_entry_detail(struct vty *vty, +		struct zebra_evpn_access_bd *acc_bd, json_object *json) +{ +	struct zebra_if *zif; +	struct listnode	*node; + +	if (json) { +		/* XXX */ +	} else { +		vty_out(vty, "VLAN: %u\n", acc_bd->vid); +		vty_out(vty, " VxLAN Interface: %s\n", +				acc_bd->vxlan_zif ? +				acc_bd->vxlan_zif->ifp->name : "-"); +		vty_out(vty, " L2-VNI: %d\n", +				acc_bd->zvni ? acc_bd->zvni->vni : 0); +		vty_out(vty, " Member Count: %d\n", +				listcount(acc_bd->mbr_zifs)); +		vty_out(vty, " Members: \n"); +		for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) +			vty_out(vty, "    %s\n", zif->ifp->name); +		vty_out(vty, "\n"); +	} +} + +static void zebra_evpn_acc_vl_show_entry(struct vty *vty, +		struct zebra_evpn_access_bd *acc_bd, json_object *json) +{ +	if (!json) +		vty_out(vty, "%-5u %21s %-8d %u\n", +				acc_bd->vid, +				acc_bd->vxlan_zif ? +				acc_bd->vxlan_zif->ifp->name : "-", +				acc_bd->zvni ? acc_bd->zvni->vni : 0, +				listcount(acc_bd->mbr_zifs)); +} + +static void zebra_evpn_acc_vl_show_hash(struct hash_bucket *bucket, void *ctxt) +{ +	struct evpn_mh_show_ctx *wctx = ctxt; +	struct zebra_evpn_access_bd *acc_bd = bucket->data; + +	if (wctx->detail) +		zebra_evpn_acc_vl_show_entry_detail(wctx->vty, +				acc_bd, wctx->json); +	else +		zebra_evpn_acc_vl_show_entry(wctx->vty, +				acc_bd, wctx->json); +} + +void zebra_evpn_acc_vl_show(struct vty *vty, bool uj) +{ +	json_object *json = NULL; +	struct evpn_mh_show_ctx wctx; + +	memset(&wctx, 0, sizeof(wctx)); +	wctx.vty = vty; +	wctx.json = json; +	wctx.detail = false; + +	if (!json) +		vty_out(vty, "%-5s %21s %-8s %s\n", +				"VLAN", "VxLAN-IF", "L2-VNI", "# Members"); + +	hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash, +			&wctx); +} + +void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj) +{ +	json_object *json = NULL; +	struct evpn_mh_show_ctx wctx; + +	memset(&wctx, 0, sizeof(wctx)); +	wctx.vty = vty; +	wctx.json = json; +	wctx.detail = true; + +	hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash, +			&wctx); +} + +void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid) +{ +	json_object *json = NULL; +	struct zebra_evpn_access_bd *acc_bd; + +	acc_bd = zebra_evpn_acc_vl_find(vid); +	if (!acc_bd) { +		if (!json) { +			vty_out(vty, "VLAN %u not present\n", vid); +			return; +		} +	} +	zebra_evpn_acc_vl_show_entry_detail(vty, acc_bd, json); +} + +/* Initialize VLAN member bitmap on an interface. Although VLAN membership + * is independent of EVPN we only process it if its of interest to EVPN-MH + * i.e. on access ports that can be setup as Ethernet Segments. And that is + * intended as an optimization. + */ +void zebra_evpn_if_init(struct zebra_if *zif) +{ +	if (!zebra_evpn_is_if_es_capable(zif)) +		return; + +	if (!bf_is_inited(zif->vlan_bitmap)) +		bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX); + +	/* if an es_id and sysmac are already present against the interface +	 * activate it +	 */ +	zebra_evpn_local_es_update(zif, zif->es_info.lid, &zif->es_info.sysmac); +} + +/* handle deletion of an access port by removing it from all associated + * broadcast domains. + */ +void zebra_evpn_if_cleanup(struct zebra_if *zif) +{ +	vlanid_t vid; + +	if (!bf_is_inited(zif->vlan_bitmap)) +		return; + +	bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { +		zebra_evpn_vl_mbr_deref(vid, zif); +	} + +	bf_free(zif->vlan_bitmap); + +	/* Delete associated Ethernet Segment */ +	if (zif->es_info.es) +		zebra_evpn_local_es_del(zif->es_info.es); +} + +/***************************************************************************** + * L2 NH/NHG Management + *   A L2 NH entry is programmed in the kernel for every ES-VTEP entry. This + * NH is then added to the L2-ECMP-NHG associated with the ES. + */ +static uint32_t zebra_evpn_nhid_alloc(bool is_nhg) +{ +	uint32_t id; +	int type; + +	bf_assign_index(zmh_info->nh_id_bitmap, id); + +	if (!id) +		return 0; + +	type = is_nhg ? EVPN_NHG_ID_TYPE_BIT : EVPN_NH_ID_TYPE_BIT; +	return (id | type); +} + +static void zebra_evpn_nhid_free(uint32_t nh_id) +{ +	uint32_t id = (nh_id & EVPN_NH_ID_VAL_MASK); + +	if (!id) +		return; + +	bf_release_index(zmh_info->nh_id_bitmap, id); +} + +/* The MAC ECMP group is activated on the first VTEP */ +static void zebra_evpn_nhg_update(struct zebra_evpn_es *es) +{ +	uint32_t nh_cnt = 0; +	struct nh_grp nh_ids[ES_VTEP_MAX_CNT]; +	struct zebra_evpn_es_vtep *es_vtep; +	struct listnode	*node; + +	if (!es->nhg_id) +		return; + +	for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { +		if (!es_vtep->nh_id) +			continue; + +		if (nh_cnt >= ES_VTEP_MAX_CNT) +			break; + +		memset(&nh_ids[nh_cnt], 0, sizeof(struct nh_grp)); +		nh_ids[nh_cnt].id = es_vtep->nh_id; +		++nh_cnt; +	} + +	if (nh_cnt) { +		if (IS_ZEBRA_DEBUG_EVPN_MH_NH) { +			char nh_str[ES_VTEP_LIST_STR_SZ]; +			uint32_t i; + +			nh_str[0] = '\0'; +			for (i = 0; i < nh_cnt; ++i) +				sprintf(nh_str + strlen(nh_str), +						"0x%x ", nh_ids[i].id); +			zlog_debug("es %s nhg 0x%x add %s", +					es->esi_str, es->nhg_id, nh_str); +		} + +		es->flags |= ZEBRA_EVPNES_NHG_ACTIVE; +		kernel_upd_mac_nhg(es->nhg_id, nh_cnt, nh_ids); +	} else { +		if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) { +			if (IS_ZEBRA_DEBUG_EVPN_MH_NH) +				zlog_debug("es %s nhg 0x%x del", +						es->esi_str, es->nhg_id); +			es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE; +			kernel_del_mac_nhg(es->nhg_id); +		} +	} + +	/* XXX - update remote macs associated with the ES */ +} + +static void zebra_evpn_nh_add(struct zebra_evpn_es_vtep *es_vtep) +{ +	if (es_vtep->nh_id) +		return; + +	es_vtep->nh_id = zebra_evpn_nhid_alloc(false); + +	if (!es_vtep->nh_id) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_NH) +		zlog_debug("es %s vtep %s nh 0x%x add", +				es_vtep->es->esi_str, +				inet_ntoa(es_vtep->vtep_ip), es_vtep->nh_id); +	/* install the NH */ +	kernel_upd_mac_nh(es_vtep->nh_id, es_vtep->vtep_ip); +	/* add the NH to the parent NHG */ +	zebra_evpn_nhg_update(es_vtep->es); +} + +static void zebra_evpn_nh_del(struct zebra_evpn_es_vtep *es_vtep) +{ +	uint32_t nh_id; + +	if (!es_vtep->nh_id) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_NH) +		zlog_debug("es %s vtep %s nh 0x%x del", +				es_vtep->es->esi_str, +				inet_ntoa(es_vtep->vtep_ip), es_vtep->nh_id); + +	nh_id = es_vtep->nh_id; +	es_vtep->nh_id = 0; + +	/* remove the NH from the parent NHG */ +	zebra_evpn_nhg_update(es_vtep->es); +	/* uninstall the NH */ +	kernel_del_mac_nh(nh_id); +	zebra_evpn_nhid_free(nh_id); + +} + +/*****************************************************************************/ +/* Ethernet Segment Management + * 1. Ethernet Segment is a collection of links attached to the same + *    server (MHD) or switch (MHN) + * 2. An Ethernet Segment can span multiple PEs and is identified by the + *    10-byte ES-ID. + * 3. Zebra manages the local ESI configuration. + * 4. It also maintains the aliasing that maps an ESI (local or remote) + *    to one or more PEs/VTEPs. + * 5. remote ESs are added by BGP (on rxing EAD Type-1 routes) + */ +/* A list of remote VTEPs is maintained for each ES. This list includes - + * 1. VTEPs for which we have imported the ESR i.e. ES-peers + * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI + *    have been imported into one or more VNIs + */ +static int zebra_evpn_es_vtep_cmp(void *p1, void *p2) +{ +	const struct zebra_evpn_es_vtep *es_vtep1 = p1; +	const struct zebra_evpn_es_vtep *es_vtep2 = p2; + +	return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr; +} + +static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_new( +		struct zebra_evpn_es *es, struct in_addr vtep_ip) +{ +	struct zebra_evpn_es_vtep *es_vtep; + +	es_vtep = XCALLOC(MTYPE_ZES_VTEP, sizeof(*es_vtep)); + +	es_vtep->es = es; +	es_vtep->vtep_ip.s_addr = vtep_ip.s_addr; +	listnode_init(&es_vtep->es_listnode, es_vtep); +	listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode); + +	return es_vtep; +} + +static void zebra_evpn_es_vtep_free(struct zebra_evpn_es_vtep *es_vtep) +{ +	struct zebra_evpn_es *es = es_vtep->es; + +	list_delete_node(es->es_vtep_list, &es_vtep->es_listnode); +	/* update the L2-NHG associated with the ES */ +	zebra_evpn_nh_del(es_vtep); +	XFREE(MTYPE_ZES_VTEP, es_vtep); +} + + +/* check if VTEP is already part of the list */ +static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_find( +		struct zebra_evpn_es *es, struct in_addr vtep_ip) +{ +	struct listnode *node = NULL; +	struct zebra_evpn_es_vtep *es_vtep; + +	for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { +		if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr) +			return es_vtep; +	} +	return NULL; +} + +static void zebra_evpn_es_vtep_add(struct zebra_evpn_es *es, +		struct in_addr vtep_ip) +{ +	struct zebra_evpn_es_vtep *es_vtep; + +	es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip); + +	if (!es_vtep) { +		if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +			zlog_debug("es %s vtep %s add", +					es->esi_str, inet_ntoa(vtep_ip)); +		es_vtep = zebra_evpn_es_vtep_new(es, vtep_ip); +		/* update the L2-NHG associated with the ES */ +		zebra_evpn_nh_add(es_vtep); +	} +} + +static void zebra_evpn_es_vtep_del(struct zebra_evpn_es *es, +		struct in_addr vtep_ip) +{ +	struct zebra_evpn_es_vtep *es_vtep; + +	es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip); + +	if (es_vtep) { +		if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +			zlog_debug("es %s vtep %s del", +					es->esi_str, inet_ntoa(vtep_ip)); +		zebra_evpn_es_vtep_free(es_vtep); +	} +} + +/* compare ES-IDs for the global ES RB tree */ +static int zebra_es_rb_cmp(const struct zebra_evpn_es *es1, +		const struct zebra_evpn_es *es2) +{ +	return memcmp(&es1->esi, &es2->esi, ESI_BYTES); +} +RB_GENERATE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp); + +/* Lookup ES */ +struct zebra_evpn_es *zebra_evpn_es_find(esi_t *esi) +{ +	struct zebra_evpn_es tmp; + +	memcpy(&tmp.esi, esi, sizeof(esi_t)); +	return RB_FIND(zebra_es_rb_head, &zmh_info->es_rb_tree, &tmp); +} + +/* A new local es is created when a local-es-id and sysmac is configured + * against an interface. + */ +static struct zebra_evpn_es *zebra_evpn_es_new(esi_t *esi) +{ +	struct zebra_evpn_es *es; + +	es = XCALLOC(MTYPE_ZES, sizeof(struct zebra_evpn_es)); + +	/* fill in ESI */ +	memcpy(&es->esi, esi, sizeof(esi_t)); +	esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str)); + +	/* Add to rb_tree */ +	if (RB_INSERT(zebra_es_rb_head, &zmh_info->es_rb_tree, es)) { +		XFREE(MTYPE_ZES, es); +		return NULL; +	} + +	/* Initialise the ES-EVI list */ +	es->es_evi_list = list_new(); +	listset_app_node_mem(es->es_evi_list); + +	/* Initialise the VTEP list */ +	es->es_vtep_list = list_new(); +	listset_app_node_mem(es->es_vtep_list); +	es->es_vtep_list->cmp = zebra_evpn_es_vtep_cmp; + +	/* reserve a NHG  */ +	es->nhg_id = zebra_evpn_nhid_alloc(true); + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("es %s nhg 0x%x new", es->esi_str, es->nhg_id); + +	return es; +} + +/* Free a given ES - + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +static void zebra_evpn_es_free(struct zebra_evpn_es *es) +{ +	/* If the ES has a local or remote reference it cannot be freed. +	 * Free is also prevented if there are MAC entries referencing +	 * it. +	 */ +	if ((es->flags & (ZEBRA_EVPNES_LOCAL | ZEBRA_EVPNES_REMOTE)) || +			es->mac_cnt) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("es %s free", es->esi_str); + +	/* If the NHG is still installed uninstall it and free the id */ +	if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) { +		es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE; +		kernel_del_mac_nhg(es->nhg_id); +	} +	zebra_evpn_nhid_free(es->nhg_id); + +	/* cleanup resources maintained against the ES */ +	list_delete(&es->es_evi_list); +	list_delete(&es->es_vtep_list); + +	/* remove from the VNI-ESI rb tree */ +	RB_REMOVE(zebra_es_rb_head, &zmh_info->es_rb_tree, es); + +	XFREE(MTYPE_ZES, es); +} + +/* Inform BGP about local ES addition */ +static int zebra_evpn_es_send_add_to_client(struct zebra_evpn_es *es) +{ +	struct zserv *client; +	struct stream *s; +	uint8_t oper_up; + +	client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); +	/* BGP may not be running. */ +	if (!client) +		return 0; + +	s = stream_new(ZEBRA_MAX_PACKET_SIZ); + +	zclient_create_header(s, ZEBRA_LOCAL_ES_ADD, zebra_vrf_get_evpn_id()); +	stream_put(s, &es->esi, sizeof(esi_t)); +	stream_put_ipv4(s, zmh_info->es_originator_ip.s_addr); +	oper_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP); +	stream_putc(s, oper_up); + +	/* Write packet size. */ +	stream_putw_at(s, 0, stream_get_endp(s)); + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("send add local es %s %s to %s", +				es->esi_str, +				inet_ntoa(zmh_info->es_originator_ip), +				zebra_route_string(client->proto)); + +	client->local_es_add_cnt++; +	return zserv_send_message(client, s); +} + +/* Inform BGP about local ES deletion */ +static int zebra_evpn_es_send_del_to_client(struct zebra_evpn_es *es) +{ +	struct zserv *client; +	struct stream *s; + +	client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); +	/* BGP may not be running. */ +	if (!client) +		return 0; + +	s = stream_new(ZEBRA_MAX_PACKET_SIZ); +	stream_reset(s); + +	zclient_create_header(s, ZEBRA_LOCAL_ES_DEL, zebra_vrf_get_evpn_id()); +	stream_put(s, &es->esi, sizeof(esi_t)); + +	/* Write packet size. */ +	stream_putw_at(s, 0, stream_get_endp(s)); + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("send del local es %s to %s", es->esi_str, +				zebra_route_string(client->proto)); + +	client->local_es_del_cnt++; +	return zserv_send_message(client, s); +} + +/* XXX - call any time ZEBRA_EVPNES_LOCAL gets set or cleared */ +static void zebra_evpn_es_re_eval_send_to_client(struct zebra_evpn_es *es, +		bool es_evi_re_reval) +{ +	bool old_ready; +	bool new_ready; +	struct listnode *node; +	struct zebra_evpn_es_evi *es_evi; + +	old_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP); + +	if ((es->flags & ZEBRA_EVPNES_LOCAL) && +			zmh_info->es_originator_ip.s_addr) +		es->flags |= ZEBRA_EVPNES_READY_FOR_BGP; +	else +		es->flags &= ~ZEBRA_EVPNES_READY_FOR_BGP; + +	new_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP); +	if (old_ready == new_ready) +		return; + +	if (new_ready) +		zebra_evpn_es_send_add_to_client(es); +	else +		zebra_evpn_es_send_del_to_client(es); + +	/* re-eval associated EVIs */ +	if (es_evi_re_reval) { +		for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, node, es_evi)) { +			if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)) +				continue; +			zebra_evpn_es_evi_re_eval_send_to_client(es_evi); +		} +	} +} + +void zebra_evpn_es_send_all_to_client(bool add) +{ +	struct listnode *es_node; +	struct listnode *evi_node; +	struct zebra_evpn_es *es; +	struct zebra_evpn_es_evi *es_evi; + +	if (!zmh_info) +		return; + +	for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, es_node, es)) { +		if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) { +			if (add) +				zebra_evpn_es_send_add_to_client(es); +			for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, +						evi_node, es_evi)) { +				if (!(es_evi->flags & +					ZEBRA_EVPNES_EVI_READY_FOR_BGP)) +					continue; + +				if (add) +					zebra_evpn_es_evi_send_to_client( +						es, es_evi->zvni, +						true /* add */); +				else +					zebra_evpn_es_evi_send_to_client( +						es, es_evi->zvni, +						false /* add */); +			} +			if (!add) +				zebra_evpn_es_send_del_to_client(es); +		} +	} +} + +/* walk the vlan bitmap associated with the zif and create or delete + * es_evis for all vlans associated with a VNI. + * XXX: This API is really expensive. optimize later if possible. + */ +static void zebra_evpn_es_setup_evis(struct zebra_evpn_es *es) +{ +	struct zebra_if *zif = es->zif; +	uint16_t vid; +	struct zebra_evpn_access_bd *acc_bd; + + +	bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { +		acc_bd = zebra_evpn_acc_vl_find(vid); +		if (acc_bd->zvni) +			zebra_evpn_local_es_evi_add(es, acc_bd->zvni); +	} +} + +static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es, +		struct zebra_if *zif) +{ +	if (es->flags & ZEBRA_EVPNES_LOCAL) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("local es %s add; nhg 0x%x if %s", +				es->esi_str, es->nhg_id, zif->ifp->name); + +	es->flags |= ZEBRA_EVPNES_LOCAL; +	listnode_init(&es->local_es_listnode, es); +	listnode_add(zmh_info->local_es_list, &es->local_es_listnode); + +	/* attach es to interface */ +	zif->es_info.es = es; + +	/* attach interface to es */ +	es->zif = zif; +	if (if_is_operative(zif->ifp)) +		es->flags |= ZEBRA_EVPNES_OPER_UP; + +	/* setup base-vni if one doesn't already exist; the ES will get sent +	 * to BGP as a part of that process +	 */ +	if (!zmh_info->es_base_vni) +		zebra_evpn_es_get_one_base_vni(); +	else +		/* send notification to bgp */ +		zebra_evpn_es_re_eval_send_to_client(es, +			false /* es_evi_re_reval */); + +	/* Setup ES-EVIs for all VxLAN stretched VLANs associated with +	 * the zif +	 */ +	zebra_evpn_es_setup_evis(es); +} + +static void zebra_evpn_es_local_info_clear(struct zebra_evpn_es *es) +{ +	struct zebra_if *zif; + +	if (!(es->flags & ZEBRA_EVPNES_LOCAL)) +		return; + +	es->flags &= ~ZEBRA_EVPNES_LOCAL; +	/* clear the es from the parent interface */ +	zif = es->zif; +	zif->es_info.es = NULL; +	es->zif = NULL; + +	/* remove from the ES list */ +	list_delete_node(zmh_info->local_es_list, &es->local_es_listnode); + +	/* free up the ES if there is no remote reference */ +	zebra_evpn_es_free(es); +} + +/* Delete an ethernet segment and inform BGP */ +static void zebra_evpn_local_es_del(struct zebra_evpn_es *es) +{ +	struct zebra_evpn_es_evi *es_evi; +	struct listnode *node = NULL; +	struct listnode *nnode = NULL; +	struct zebra_if *zif; + +	if (!CHECK_FLAG(es->flags, ZEBRA_EVPNES_LOCAL)) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) { +		zif = es->zif; +		zlog_debug("local es %s del; nhg 0x%x if %s", +				es->esi_str, es->nhg_id, +				zif ? zif->ifp->name : "-"); +	} + +	/* remove all ES-EVIs associated with the ES */ +	for (ALL_LIST_ELEMENTS(es->es_evi_list, node, nnode, es_evi)) +		zebra_evpn_local_es_evi_do_del(es_evi); + +	/* send a del if the ES had been sent to BGP earlier */ +	if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) +		zebra_evpn_es_send_del_to_client(es); + +	zebra_evpn_es_local_info_clear(es); +} + +/* eval remote info associated with the ES */ +static void zebra_evpn_es_remote_info_re_eval(struct zebra_evpn_es *es) +{ +	/* if there are remote VTEPs the ES-EVI is classified as "remote" */ +	if (listcount(es->es_vtep_list)) { +		if (!(es->flags & ZEBRA_EVPNES_REMOTE)) { +			es->flags |= ZEBRA_EVPNES_REMOTE; +			if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +				zlog_debug("remote es %s add; nhg 0x%x", +						es->esi_str, es->nhg_id); +		} +	} else { +		if (es->flags & ZEBRA_EVPNES_REMOTE) { +			es->flags &= ~ZEBRA_EVPNES_REMOTE; +			if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +				zlog_debug("remote es %s del; nhg 0x%x", +						es->esi_str, es->nhg_id); +			zebra_evpn_es_free(es); +		} +	} +} + +/* A new local es is created when a local-es-id and sysmac is configured + * against an interface. + */ +static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid, +		struct ethaddr *sysmac) +{ +	struct zebra_evpn_es *old_es = zif->es_info.es; +	struct zebra_evpn_es *es; +	esi_t esi; +	int offset = 0; +	int field_bytes = 0; + +	/* Complete config of the ES-ID bootstraps the ES */ +	if (!lid || is_zero_mac(sysmac)) { +		/* if in ES is attached to zif delete it */ +		if (old_es) +			zebra_evpn_local_es_del(old_es); +		return 0; +	} + +	/* build 10-byte type-3-ESI - +	 * Type(1-byte), MAC(6-bytes), ES-LID (3-bytes) +	 */ +	field_bytes = 1; +	esi.val[offset] = ESI_TYPE_MAC; +	offset += field_bytes; + +	field_bytes = ETH_ALEN; +	memcpy(&esi.val[offset], (uint8_t *)sysmac, field_bytes); +	offset += field_bytes; + +	esi.val[offset++] = (uint8_t)(lid >> 16); +	esi.val[offset++] = (uint8_t)(lid >> 8); +	esi.val[offset++] = (uint8_t)lid; + +	if (old_es && !memcmp(&old_es->esi, &esi, sizeof(esi_t))) +		/* dup - nothing to be done */ +		return 0; + +	/* release the old_es against the zif */ +	if (old_es) +		zebra_evpn_local_es_del(old_es); + +	es = zebra_evpn_es_find(&esi); +	if (es) { +		/* if it exists against another interface flag an error */ +		if (es->zif && es->zif != zif) +			return -1; +	} else { +		/* create new es */ +		es = zebra_evpn_es_new(&esi); +	} + +	zebra_evpn_es_local_info_set(es, zif); + +	return 0; +} + +static int zebra_evpn_remote_es_del(esi_t *esi, struct in_addr vtep_ip) +{ +	char buf[ESI_STR_LEN]; +	struct zebra_evpn_es *es; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("remote es %s vtep %s del", +				esi_to_str(esi, buf, sizeof(buf)), +				inet_ntoa(vtep_ip)); + +	es = zebra_evpn_es_find(esi); +	if (!es) { +		/* XXX - error log */ +		return -1; +	} + +	zebra_evpn_es_vtep_del(es, vtep_ip); +	zebra_evpn_es_remote_info_re_eval(es); + +	return 0; +} + +/* force delete a remote ES on the way down */ +static void zebra_evpn_remote_es_flush(struct zebra_evpn_es *es) +{ +	struct zebra_evpn_es_vtep *es_vtep; +	struct listnode	*node; +	struct listnode	*nnode; + +	for (ALL_LIST_ELEMENTS(es->es_vtep_list, node, nnode, es_vtep)) { +		if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +			zlog_debug("es %s vtep %s flush", +					es->esi_str, +					inet_ntoa(es_vtep->vtep_ip)); +		zebra_evpn_es_vtep_free(es_vtep); +		zebra_evpn_es_remote_info_re_eval(es); +	} +} + +static int zebra_evpn_remote_es_add(esi_t *esi, struct in_addr vtep_ip) +{ +	char buf[ESI_STR_LEN]; +	struct zebra_evpn_es *es; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("remote es %s vtep %s add", +				esi_to_str(esi, buf, sizeof(buf)), +				inet_ntoa(vtep_ip)); + +	es = zebra_evpn_es_find(esi); +	if (!es) { +		es = zebra_evpn_es_new(esi); +		if (!es) { +			/* XXX - error log */ +			return -1; +		} +	} + +	zebra_evpn_es_vtep_add(es, vtep_ip); +	zebra_evpn_es_remote_info_re_eval(es); + +	return 0; +} + +void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS) +{ +	struct stream *s; +	struct in_addr vtep_ip; +	esi_t esi; + +	if (!is_evpn_enabled()) { +		zlog_debug( +				"%s: EVPN not enabled yet we received a es_add zapi call", +				__PRETTY_FUNCTION__); +		return; +	} + +	memset(&esi, 0, sizeof(esi_t)); +	s = msg; + +	stream_get(&esi, s, sizeof(esi_t)); +	vtep_ip.s_addr = stream_get_ipv4(s); + +	if (hdr->command == ZEBRA_REMOTE_ES_VTEP_ADD) +		zebra_evpn_remote_es_add(&esi, vtep_ip); +	else +		zebra_evpn_remote_es_del(&esi, vtep_ip); +} + +void zebra_evpn_es_mac_deref_entry(zebra_mac_t *mac) +{ +	struct zebra_evpn_es *es = mac->es; + +	mac->es = NULL; +	if (!es || !es->mac_cnt) +		return; + +	--es->mac_cnt; +	if (!es->mac_cnt) +		zebra_evpn_es_free(es); +} + +/* Associate a MAC entry with a local or remote ES. Returns false if there + * was no ES change. + */ +bool zebra_evpn_es_mac_ref_entry(zebra_mac_t *mac, struct zebra_evpn_es *es) +{ +	if (mac->es == es) +		return false; + +	if (mac->es) +		zebra_evpn_es_mac_deref_entry(mac); + +	if (!es) +		return true; + +	mac->es = es; +	++es->mac_cnt; +	return true; +} + +void zebra_evpn_es_mac_ref(zebra_mac_t *mac, esi_t *esi) +{ +	struct zebra_evpn_es *es; + +	es = zebra_evpn_es_find(esi); +	if (!es) { +		es = zebra_evpn_es_new(esi); +		if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +			zlog_debug("auto es %s add on mac ref", es->esi_str); +	} + +	zebra_evpn_es_mac_ref_entry(mac, es); +} + +/* Inform BGP about local ES-EVI add or del */ +static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es, +		zebra_vni_t *zvni, bool add) +{ +	struct zserv *client; +	struct stream *s; + +	client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); +	/* BGP may not be running. */ +	if (!client) +		return 0; + +	s = stream_new(ZEBRA_MAX_PACKET_SIZ); + +	zclient_create_header(s, +			add ? ZEBRA_LOCAL_ES_EVI_ADD : ZEBRA_LOCAL_ES_EVI_DEL, +			zebra_vrf_get_evpn_id()); +	stream_put(s, &es->esi, sizeof(esi_t)); +	stream_putl(s, zvni->vni); + +	/* Write packet size. */ +	stream_putw_at(s, 0, stream_get_endp(s)); + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("send %s local es %s evi %u to %s", +				add ? "add" : "del", +				es->esi_str, zvni->vni, +				zebra_route_string(client->proto)); + +	client->local_es_add_cnt++; +	return zserv_send_message(client, s); +} + +/* sysmac part of a local ESI has changed */ +static int zebra_evpn_es_sys_mac_update(struct zebra_if *zif, +		struct ethaddr *sysmac) +{ +	int rv; + +	rv = zebra_evpn_local_es_update(zif, zif->es_info.lid, sysmac); +	if (!rv) +		memcpy(&zif->es_info.sysmac, sysmac, sizeof(struct ethaddr)); + +	return rv; +} + +/* local-ID part of ESI has changed */ +static int zebra_evpn_es_lid_update(struct zebra_if *zif, uint32_t lid) +{ +	int rv; + +	rv = zebra_evpn_local_es_update(zif, lid, &zif->es_info.sysmac); +	if (!rv) +		zif->es_info.lid = lid; + +	return rv; +} + +void zebra_evpn_es_cleanup(void) +{ +	struct zebra_evpn_es *es; +	struct zebra_evpn_es *es_next; + +	RB_FOREACH_SAFE(es, zebra_es_rb_head, +			&zmh_info->es_rb_tree, es_next) { +		zebra_evpn_local_es_del(es); +		zebra_evpn_remote_es_flush(es); +	} +} + +/* Only certain types of access ports can be setup as an Ethernet Segment */ +bool zebra_evpn_is_if_es_capable(struct zebra_if *zif) +{ +	if (zif->zif_type == ZEBRA_IF_BOND) +		return true; + +	/* XXX: allow swpX i.e. a regular ethernet port to be an ES link too */ +	return false; +} + +void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif) +{ +	char buf[ETHER_ADDR_STRLEN]; + +	if (zif->es_info.lid || !is_zero_mac(&zif->es_info.sysmac)) +		vty_out(vty, "  EVPN MH: ES id %u ES sysmac %s\n", +				zif->es_info.lid, +				prefix_mac2str(&zif->es_info.sysmac, +					buf, sizeof(buf))); +} + +void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up) +{ +	struct zebra_evpn_es *es = zif->es_info.es; +	bool old_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP); + +	if (old_up == up) +		return; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("es %s state changed to %s ", +				es->esi_str, +				up ? "up" : "down"); +	if (up) +		es->flags |= ZEBRA_EVPNES_OPER_UP; +	else +		es->flags &= ~ZEBRA_EVPNES_OPER_UP; + +	/* inform BGP of the ES oper state change */ +	if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) +		zebra_evpn_es_send_add_to_client(es); +} + +static char *zebra_evpn_es_vtep_str(char *vtep_str, +		struct zebra_evpn_es *es) +{ +	struct zebra_evpn_es_vtep *zvtep; +	struct listnode	*node; +	bool first = true; + +	vtep_str[0] = '\0'; +	for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, zvtep)) { +		if (first) { +			first = false; +			sprintf(vtep_str + strlen(vtep_str), "%s", +					inet_ntoa(zvtep->vtep_ip)); +		} else { +			sprintf(vtep_str + strlen(vtep_str), ",%s", +					inet_ntoa(zvtep->vtep_ip)); +		} +	} +	return vtep_str; +} + +static void zebra_evpn_es_show_entry(struct vty *vty, +		struct zebra_evpn_es *es, json_object *json) +{ +	char type_str[4]; +	char vtep_str[ES_VTEP_LIST_STR_SZ]; + +	if (json) { +		/* XXX */ +	} else { +		type_str[0] = '\0'; +		if (es->flags & ZEBRA_EVPNES_LOCAL) +			strcpy(type_str + strlen(type_str), "L"); +		if (es->flags & ZEBRA_EVPNES_REMOTE) +			strcpy(type_str + strlen(type_str), "R"); + +		zebra_evpn_es_vtep_str(vtep_str, es); + +		vty_out(vty, "%-30s %-4s %-21s %s\n", +				es->esi_str, type_str, +				es->zif ? es->zif->ifp->name : "-", +				vtep_str); +	} +} + +static void zebra_evpn_es_show_entry_detail(struct vty *vty, +		struct zebra_evpn_es *es, json_object *json) +{ +	char type_str[80]; +	struct zebra_evpn_es_vtep *zvtep; +	struct listnode	*node; + +	if (json) { +		/* XXX */ +	} else { +		type_str[0] = '\0'; +		if (es->flags & ZEBRA_EVPNES_LOCAL) +			strcpy(type_str + strlen(type_str), "Local"); +		if (es->flags & ZEBRA_EVPNES_REMOTE) { +			if (strlen(type_str)) +				strcpy(type_str + strlen(type_str), ","); +			strcpy(type_str + strlen(type_str), "Remote"); +		} + +		vty_out(vty, "ESI: %s\n", es->esi_str); +		vty_out(vty, " Type: %s\n", type_str); +		vty_out(vty, " Interface: %s\n", +				(es->zif) ? +				es->zif->ifp->name : "-"); +		vty_out(vty, " State: %s\n", +				(es->flags & ZEBRA_EVPNES_OPER_UP) ? +				"up" : "down"); +		vty_out(vty, " Ready for BGP: %s\n", +				(es->flags & ZEBRA_EVPNES_READY_FOR_BGP) ? +				"yes" : "no"); +		vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list)); +		vty_out(vty, " MAC Count: %d\n", es->mac_cnt); +		vty_out(vty, " Nexthop group: 0x%x\n", es->nhg_id); +		vty_out(vty, " VTEPs:\n"); +		for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, zvtep)) +			vty_out(vty, "     %s nh: 0x%x\n", +					inet_ntoa(zvtep->vtep_ip), +					zvtep->nh_id); + +		vty_out(vty, "\n"); +	} +} + +void zebra_evpn_es_show(struct vty *vty, bool uj) +{ +	struct zebra_evpn_es *es; +	json_object *json = NULL; + +	if (uj) { +		/* XXX */ +	} else { +		vty_out(vty, "Type: L local, R remote\n"); +		vty_out(vty, "%-30s %-4s %-21s %s\n", +				"ESI", "Type", "ES-IF", "VTEPs"); +	} + +	RB_FOREACH(es, zebra_es_rb_head, &zmh_info->es_rb_tree) +		zebra_evpn_es_show_entry(vty, es, json); +} + +void zebra_evpn_es_show_detail(struct vty *vty, bool uj) +{ +	struct zebra_evpn_es *es; +	json_object *json = NULL; + +	RB_FOREACH(es, zebra_es_rb_head, &zmh_info->es_rb_tree) +		zebra_evpn_es_show_entry_detail(vty, es, json); +} + +void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi) +{ +	struct zebra_evpn_es *es; +	char esi_str[ESI_STR_LEN]; +	json_object *json = NULL; + +	es = zebra_evpn_es_find(esi); + +	if (!es) { +		esi_to_str(esi, esi_str, sizeof(esi_str)); +		vty_out(vty, "ESI %s does not exist\n", esi_str); +		return; +	} + +	zebra_evpn_es_show_entry_detail(vty, es, json); +} + +int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp) +{ +	struct zebra_if *zif = ifp->info; +	char buf[ETHER_ADDR_STRLEN]; + +	if (zif->es_info.lid) +		vty_out(vty, " evpn mh es-id %u\n", zif->es_info.lid); + +	if (!is_zero_mac(&zif->es_info.sysmac)) +		vty_out(vty, " evpn mh es-sys-mac %s\n", +				prefix_mac2str(&zif->es_info.sysmac, +					buf, sizeof(buf))); +	return 0; +} + +#ifndef VTYSH_EXTRACT_PL +#include "zebra/zebra_evpn_mh_clippy.c" +#endif +/* CLI for setting up sysmac part of ESI on an access port */ +DEFPY(zebra_evpn_es_sys_mac, +      zebra_evpn_es_sys_mac_cmd, +      "[no$no] evpn mh es-sys-mac [X:X:X:X:X:X$mac]", +      NO_STR +      "EVPN\n" +      EVPN_MH_VTY_STR +      "Ethernet segment system MAC\n" +      MAC_STR +) +{ +	VTY_DECLVAR_CONTEXT(interface, ifp); +	struct zebra_if *zif; +	int ret = 0; + +	zif = ifp->info; + +	if (no) { +		static struct ethaddr zero_mac; + +		ret = zebra_evpn_es_sys_mac_update(zif, &zero_mac); +		if (ret == -1) { +			vty_out(vty, "%%Failed to clear ES sysmac\n"); +			return CMD_WARNING; +		} +	} else { + +		if (!zebra_evpn_is_if_es_capable(zif)) { +			vty_out(vty, +				"%%ESI cannot be associated with this interface type\n"); +			return CMD_WARNING; +		} + +		if  (!mac || is_zero_mac(&mac->eth_addr)) { +			vty_out(vty, "%%ES sysmac value is invalid\n"); +			return CMD_WARNING; +		} + +		ret = zebra_evpn_es_sys_mac_update(zif, &mac->eth_addr); +		if (ret == -1) { +			vty_out(vty, "%%ESI already exists on a different interface\n"); +			return CMD_WARNING; +		} +	} +	return CMD_SUCCESS; +} + +/* CLI for setting up local-ID part of ESI on an access port */ +DEFPY(zebra_evpn_es_id, +      zebra_evpn_es_id_cmd, +      "[no$no] evpn mh es-id [(1-16777215)$es_lid]", +      NO_STR +      "EVPN\n" +      EVPN_MH_VTY_STR +      "Ethernet segment local identifier\n" +      "ID\n" +) +{ +	VTY_DECLVAR_CONTEXT(interface, ifp); +	struct zebra_if *zif; +	int ret; + +	zif = ifp->info; + +	if (no) { +		ret = zebra_evpn_es_lid_update(zif, 0); +		if (ret == -1) { +			vty_out(vty, "%%Failed to clear ES local id\n"); +			return CMD_WARNING; +		} +	} else { +		if (!zebra_evpn_is_if_es_capable(zif)) { +			vty_out(vty, +				"%%ESI cannot be associated with this interface type\n"); +			return CMD_WARNING; +		} + +		if  (!es_lid) { +			vty_out(vty, "%%Specify local ES ID\n"); +			return CMD_WARNING; +		} +		ret = zebra_evpn_es_lid_update(zif, es_lid); +		if (ret == -1) { +			vty_out(vty, +				"%%ESI already exists on a different interface\n"); +			return CMD_WARNING; +		} +	} +	return CMD_SUCCESS; +} + +/*****************************************************************************/ +/* A base L2-VNI is maintained to derive parameters such as ES originator-IP. + * XXX: once single vxlan device model becomes available this will not be + * necessary + */ +/* called when a new vni is added or becomes oper up or becomes a bridge port */ +void zebra_evpn_es_set_base_vni(zebra_vni_t *zvni) +{ +	struct listnode *node; +	struct zebra_evpn_es *es; + +	if (zmh_info->es_base_vni) { +		if (zmh_info->es_base_vni != zvni) { +			/* unrelated VNI; ignore it */ +			return; +		} +		/* check if the local vtep-ip has changed */ +	} else { +		/* check if the VNI can be used as base VNI */ +		if (!zebra_evpn_vni_send_to_client_ok(zvni)) +			return; + +		if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +			zlog_debug("es base vni set to %d", +					zvni->vni); +		zmh_info->es_base_vni = zvni; +	} + +	/* update local VTEP-IP */ +	if (zmh_info->es_originator_ip.s_addr == +			zmh_info->es_base_vni->local_vtep_ip.s_addr) +		return; + +	zmh_info->es_originator_ip.s_addr = +		zmh_info->es_base_vni->local_vtep_ip.s_addr; + +	if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +		zlog_debug("es originator ip set to %s", +			inet_ntoa(zmh_info->es_base_vni->local_vtep_ip)); + +	/* if originator ip changes we need to update bgp */ +	for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) { +		if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) +			zebra_evpn_es_send_add_to_client(es); +		else +			zebra_evpn_es_re_eval_send_to_client(es, +					true /* es_evi_re_reval */); +	} +} + +/* called when a vni is removed or becomes oper down or is removed from a + * bridge + */ +void zebra_evpn_es_clear_base_vni(zebra_vni_t *zvni) +{ +	struct listnode *node; +	struct zebra_evpn_es *es; + +	if (zmh_info->es_base_vni != zvni) +		return; + +	zmh_info->es_base_vni = NULL; +	/* lost current base VNI; try to find a new one */ +	zebra_evpn_es_get_one_base_vni(); + +	/* couldn't locate an eligible base vni */ +	if (!zmh_info->es_base_vni && zmh_info->es_originator_ip.s_addr) { +		if (IS_ZEBRA_DEBUG_EVPN_MH_ES) +			zlog_debug("es originator ip cleared"); + +		zmh_info->es_originator_ip.s_addr = 0; +		/* lost originator ip */ +		for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) { +			zebra_evpn_es_re_eval_send_to_client(es, +					true /* es_evi_re_reval */); +		} +	} +} + +/* Locate an "eligible" L2-VNI to follow */ +static int zebra_evpn_es_get_one_base_vni_cb(struct hash_bucket *b, void *data) +{ +	zebra_vni_t *zvni = b->data; + +	zebra_evpn_es_set_base_vni(zvni); + +	if (zmh_info->es_base_vni) +		return HASHWALK_ABORT; + +	return HASHWALK_CONTINUE; +} + +/* locate a base_vni to follow for the purposes of common params like + * originator IP + */ +static void zebra_evpn_es_get_one_base_vni(void) +{ +	struct zebra_vrf *zvrf; + +	zvrf = zebra_vrf_get_evpn(); +	hash_walk(zvrf->vni_table, zebra_evpn_es_get_one_base_vni_cb, NULL); +} + +/*****************************************************************************/ +void zebra_evpn_interface_init(void) +{ +	install_element(INTERFACE_NODE, &zebra_evpn_es_id_cmd); +	install_element(INTERFACE_NODE, &zebra_evpn_es_sys_mac_cmd); +} + +void zebra_evpn_mh_init(void) +{ +	zrouter.mh_info = XCALLOC(MTYPE_ZMH_INFO, sizeof(*zrouter.mh_info)); + +	/* setup ES tables */ +	RB_INIT(zebra_es_rb_head, &zmh_info->es_rb_tree); +	zmh_info->local_es_list = list_new(); +	listset_app_node_mem(zmh_info->local_es_list); + +	bf_init(zmh_info->nh_id_bitmap, EVPN_NH_ID_MAX); +	bf_assign_zero_index(zmh_info->nh_id_bitmap); + +	/* setup broadcast domain tables */ +	zmh_info->evpn_vlan_table = hash_create(zebra_evpn_acc_vl_hash_keymake, +			zebra_evpn_acc_vl_cmp, "access VLAN hash table"); +} + +void zebra_evpn_mh_terminate(void) +{ +	list_delete(&zmh_info->local_es_list); + +	hash_iterate(zmh_info->evpn_vlan_table, +			zebra_evpn_acc_vl_cleanup_all, NULL); +	hash_free(zmh_info->evpn_vlan_table); +} diff --git a/zebra/zebra_evpn_mh.h b/zebra/zebra_evpn_mh.h new file mode 100644 index 0000000000..795053d649 --- /dev/null +++ b/zebra/zebra_evpn_mh.h @@ -0,0 +1,228 @@ +/* + * Zebra EVPN MH Data structures and definitions + * + * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + */ + +#ifndef _ZEBRA_EVPN_MH_H +#define _ZEBRA_EVPN_MH_H + +#include <zebra.h> + +#include "if.h" +#include "linklist.h" +#include "bitfield.h" +#include "zebra_vxlan.h" +#include "zebra_vxlan_private.h" + +#define EVPN_MH_VTY_STR "Multihoming\n" + +/* Ethernet Segment entry - + * - Local and remote ESs are maintained in a global RB tree, + * zmh_info->es_rb_tree using ESI as key + * - Local ESs are added via zebra config (ZEBRA_EVPNES_LOCAL) when an + *   access port is associated with an ES-ID + * - Remotes ESs are added by BGP based on received/remote EAD/Type-1 routes + *   (ZEBRA_EVPNES_REMOTE) + * - An ES can be simulatenously LOCAL and REMOTE; infact all LOCAL ESs are + *   expected to have REMOTE ES peers. + */ +struct zebra_evpn_es { +	esi_t esi; +	char esi_str[ESI_STR_LEN]; + +	/* ES flags */ +	uint32_t flags; +#define ZEBRA_EVPNES_LOCAL         (1 << 0) /* configured in zebra */ +#define ZEBRA_EVPNES_REMOTE        (1 << 1) /* added by bgp */ +#define ZEBRA_EVPNES_OPER_UP       (1 << 2) /* es->ifp is oper-up */ +#define ZEBRA_EVPNES_READY_FOR_BGP (1 << 3) /* ready to be sent to BGP */ +#define ZEBRA_EVPNES_NHG_ACTIVE    (1 << 4) /* NHG has been installed */ + +	/* memory used for adding the es to zmh_info->es_rb_tree */ +	RB_ENTRY(zebra_evpn_es) rb_node; + +	/* [EVPNES_LOCAL] memory used for linking the es to +	 * zmh_info->local_es_list +	 */ +	struct listnode local_es_listnode; + +	/* [EVPNES_LOCAL] corresponding interface */ +	struct zebra_if *zif; + +	/* list of ES-EVIs associated with the ES */ +	struct list *es_evi_list; + +	/* [!EVPNES_LOCAL] List of remote VTEPs (zebra_evpn_es_vtep) */ +	struct list *es_vtep_list; + +	/* zebra_mac_t entries using this ES as destination */ +	uint32_t mac_cnt; + +	/* Nexthop group id */ +	uint32_t nhg_id; +}; +RB_HEAD(zebra_es_rb_head, zebra_evpn_es); +RB_PROTOTYPE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp); + +/* ES per-EVI info + * - ES-EVIs are maintained per-VNI (vni->es_evi_rb_tree) + * - Local ES-EVIs are linked to per-VNI list for quick access + * - Although some infrastucture is present for remote ES-EVIs, currently + *   BGP does NOT send remote ES-EVIs to zebra. This may change in the + *   future (but must be changed thoughtfully and only if needed as ES-EVI + *   can get prolific and come in the way of rapid failovers) + */ +struct zebra_evpn_es_evi { +	struct zebra_evpn_es *es; +	zebra_vni_t *zvni; + +	/* ES-EVI flags */ +	uint32_t flags; +	/* local ES-EVI */ +#define ZEBRA_EVPNES_EVI_LOCAL         (1 << 0) /* created by zebra */ +#define ZEBRA_EVPNES_EVI_READY_FOR_BGP (1 << 1) /* ready to be sent to BGP */ + +	/* memory used for adding the es_evi to +	 * es_evi->zvni->es_evi_rb_tree +	 */ +	RB_ENTRY(zebra_evpn_es_evi) rb_node; +	/* memory used for linking the es_evi to +	 * es_evi->zvni->local_es_evi_list +	 */ +	struct listnode l2vni_listnode; +	/* memory used for linking the es_evi to +	 * es_evi->es->es_evi_list +	 */ +	struct listnode es_listnode; +}; + +/* PE attached to an ES */ +struct zebra_evpn_es_vtep { +	struct zebra_evpn_es *es; /* parent ES */ +	struct in_addr vtep_ip; + +	/* memory used for adding the entry to es->es_vtep_list */ +	struct listnode es_listnode; + +	/* MAC nexthop */ +	uint32_t nh_id; + +	/* XXX - maintain a backpointer to zebra_vtep_t */ +}; + +/* Local/access-side broadcast domain - zebra_evpn_access_bd is added to - + * zrouter->evpn_vlan_table (for VLAN aware bridges) OR + * zrouter->evpn_bridge_table (for VLAN unaware bridges) + * XXX - support for VLAN unaware bridges is yet to be flushed out + */ +struct zebra_evpn_access_bd { +	vlanid_t vid; + +	struct zebra_if *vxlan_zif; /* vxlan device */ +	/* list of members associated with the BD i.e. (potential) ESs */ +	struct list *mbr_zifs; +	/* presence of zvni activates the EVI on all the ESs in mbr_zifs */ +	zebra_vni_t *zvni; +}; + +/* multihoming information stored in zrouter */ +#define zmh_info (zrouter.mh_info) +struct zebra_evpn_mh_info { +	/* RB tree of Ethernet segments (used for EVPN-MH)  */ +	struct zebra_es_rb_head es_rb_tree; +	/* List of local ESs */ +	struct list *local_es_list; + +	/* EVPN MH broadcast domains indexed by the VID */ +	struct hash *evpn_vlan_table; + +	/* A base L2-VNI is maintained to derive parameters such as +	 * ES originator-IP. +	 * XXX: once single vxlan device model becomes available this will +	 * not be necessary +	 */ +	zebra_vni_t *es_base_vni; +	struct in_addr es_originator_ip; + +	/* L2 NH and NHG ids - +	 * Most significant 8 bits is type. Lower 24 bits is the value +	 * allocated from the nh_id_bitmap. +	 */ +	bitfield_t nh_id_bitmap; +#define EVPN_NH_ID_MAX       (16*1024) +#define EVPN_NH_ID_VAL_MASK  0xffffff +#define EVPN_NH_ID_TYPE_POS  24 +/* The purpose of using different types for NHG and NH is NOT to manage the + * id space separately. It is simply to make debugging easier. + */ +#define EVPN_NH_ID_TYPE_BIT  (1 << EVPN_NH_ID_TYPE_POS) +#define EVPN_NHG_ID_TYPE_BIT (2 << EVPN_NH_ID_TYPE_POS) +}; + +static inline bool zebra_evpn_mac_is_es_local(zebra_mac_t *mac) +{ +	return mac->es && (mac->es->flags & ZEBRA_EVPNES_LOCAL); +} + +/* Returns true if the id is of L2-NHG or L2-NH type */ +static inline bool zebra_evpn_mh_is_fdb_nh(uint32_t id) +{ +	return ((id & EVPN_NHG_ID_TYPE_BIT) || +			(id & EVPN_NH_ID_TYPE_BIT)); +} + +/*****************************************************************************/ +extern esi_t *zero_esi; +extern void zebra_evpn_mh_init(void); +extern void zebra_evpn_mh_terminate(void); +extern bool zebra_evpn_is_if_es_capable(struct zebra_if *zif); +extern void zebra_evpn_if_init(struct zebra_if *zif); +extern void zebra_evpn_if_cleanup(struct zebra_if *zif); +extern void zebra_evpn_vni_es_init(zebra_vni_t *zvni); +extern void zebra_evpn_vni_es_cleanup(zebra_vni_t *zvni); +extern void zebra_evpn_vxl_vni_set(struct zebra_if *zif, zebra_vni_t *zvni, +		bool set); +extern void zebra_evpn_es_set_base_vni(zebra_vni_t *zvni); +extern void zebra_evpn_es_clear_base_vni(zebra_vni_t *zvni); +extern void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif); +extern void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif); +extern void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif); +extern void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif); +extern void zebra_evpn_es_send_all_to_client(bool add); +extern void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up); +extern void zebra_evpn_es_show(struct vty *vty, bool uj); +extern void zebra_evpn_es_show_detail(struct vty *vty, bool uj); +extern void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi); +extern void zebra_evpn_vni_update_all_es(zebra_vni_t *zvni); +extern void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS); +extern void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail); +extern void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, +		vni_t vni, int detail); +extern void zebra_evpn_es_mac_deref_entry(zebra_mac_t *mac); +extern bool zebra_evpn_es_mac_ref_entry(zebra_mac_t *mac, +		struct zebra_evpn_es *es); +extern void zebra_evpn_es_mac_ref(zebra_mac_t *mac, esi_t *esi); +extern struct zebra_evpn_es *zebra_evpn_es_find(esi_t *esi); +extern void zebra_evpn_interface_init(void); +extern int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp); +extern void zebra_evpn_acc_vl_show(struct vty *vty, bool uj); +extern void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj); +extern void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid); +extern void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif); +extern void zebra_evpn_es_cleanup(void); + +#endif /* _ZEBRA_EVPN_MH_H */ diff --git a/zebra/zebra_l2.c b/zebra/zebra_l2.c index e549d80a5c..1758c8f96a 100644 --- a/zebra/zebra_l2.c +++ b/zebra/zebra_l2.c @@ -43,6 +43,7 @@  #include "zebra/rt_netlink.h"  #include "zebra/zebra_l2.h"  #include "zebra/zebra_vxlan.h" +#include "zebra/zebra_evpn_mh.h"  /* definitions */ @@ -182,6 +183,7 @@ void zebra_l2_vxlanif_add_update(struct interface *ifp,  	if (add) {  		memcpy(&zif->l2info.vxl, vxlan_info, sizeof(*vxlan_info)); +		zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif);  		zebra_vxlan_if_add(ifp);  		return;  	} @@ -220,6 +222,9 @@ void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp,  		return;  	zif->l2info.vxl.access_vlan = access_vlan; + +	zebra_evpn_vl_vxl_deref(old_access_vlan, zif); +	zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif);  	zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_VLAN_CHANGE);  } @@ -228,6 +233,12 @@ void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp,   */  void zebra_l2_vxlanif_del(struct interface *ifp)  { +	struct zebra_if *zif; + +	zif = ifp->info; +	assert(zif); + +	zebra_evpn_vl_vxl_deref(zif->l2info.vxl.access_vlan, zif);  	zebra_vxlan_if_del(ifp);  } @@ -289,3 +300,43 @@ void zebra_l2if_update_bond_slave(struct interface *ifp, ifindex_t bond_ifindex)  	else if (old_bond_ifindex != IFINDEX_INTERNAL)  		zebra_l2_unmap_slave_from_bond(&zif->bondslave_info);  } + +void zebra_vlan_bitmap_compute(struct interface *ifp, +		uint32_t vid_start, uint16_t vid_end) +{ +	uint32_t vid; +	struct zebra_if *zif; + +	zif = (struct zebra_if *)ifp->info; +	assert(zif); + +	for (vid = vid_start; vid <= vid_end; ++vid) +		bf_set_bit(zif->vlan_bitmap, vid); +} + +void zebra_vlan_mbr_re_eval(struct interface *ifp, bitfield_t old_vlan_bitmap) +{ +	uint32_t vid; +	struct zebra_if *zif; + +	zif = (struct zebra_if *)ifp->info; +	assert(zif); + +	if (!bf_cmp(zif->vlan_bitmap, old_vlan_bitmap)) +		/* no change */ +		return; + +	bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { +		/* if not already set create new reference */ +		if (!bf_test_index(old_vlan_bitmap, vid)) +			zebra_evpn_vl_mbr_ref(vid, zif); + +		/* also clear from the old vlan bitmap */ +		bf_release_index(old_vlan_bitmap, vid); +	} + +	/* any bits remaining in the old vlan bitmap are stale references */ +	bf_for_each_set_bit(old_vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { +		zebra_evpn_vl_mbr_deref(vid, zif); +	} +} diff --git a/zebra/zebra_l2.h b/zebra/zebra_l2.h index 23875331f7..2735d915ec 100644 --- a/zebra/zebra_l2.h +++ b/zebra/zebra_l2.h @@ -105,6 +105,10 @@ extern void zebra_l2if_update_bridge_slave(struct interface *ifp,  extern void zebra_l2if_update_bond_slave(struct interface *ifp,  					 ifindex_t bond_ifindex); +extern void zebra_vlan_bitmap_compute(struct interface *ifp, +		uint32_t vid_start, uint16_t vid_end); +extern void zebra_vlan_mbr_re_eval(struct interface *ifp, +		bitfield_t vlan_bitmap);  #ifdef __cplusplus  } diff --git a/zebra/zebra_memory.c b/zebra/zebra_memory.c index a9c2c5fe58..da8121774e 100644 --- a/zebra/zebra_memory.c +++ b/zebra/zebra_memory.c @@ -28,3 +28,5 @@  DEFINE_MGROUP(ZEBRA, "zebra")  DEFINE_MTYPE(ZEBRA, RE, "Route Entry")  DEFINE_MTYPE(ZEBRA, RIB_DEST, "RIB destination") +DEFINE_MTYPE(ZEBRA, ZVLAN, "VLAN") +DEFINE_MTYPE(ZEBRA, ZVLAN_BITMAP, "VLAN bitmap") diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h index 863c5fa71c..f73a8f2d59 100644 --- a/zebra/zebra_router.h +++ b/zebra/zebra_router.h @@ -125,6 +125,12 @@ struct zebra_router {  	/* L3-VNI hash table (for EVPN). Only in default instance */  	struct hash *l3vni_table; +	/* Tables and other global info maintained for EVPN multihoming */ +	struct zebra_evpn_mh_info *mh_info; + +	/* EVPN MH broadcast domains indexed by the VID */ +	struct hash *evpn_vlan_table; +  	struct hash *rules_hash;  	struct hash *ipset_hash; diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index daff5e7b3a..dbfa49a6ef 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -50,6 +50,7 @@  #include "zebra/zebra_vrf.h"  #include "zebra/zebra_vxlan.h"  #include "zebra/zebra_vxlan_private.h" +#include "zebra/zebra_evpn_mh.h"  #include "zebra/zebra_router.h"  DEFINE_MTYPE_STATIC(ZEBRA, HOST_PREFIX, "host prefix"); @@ -74,7 +75,6 @@ static const struct message zvtep_flood_str[] = {  	{0}  }; -  /* static function declarations */  static int ip_prefix_send_to_client(vrf_id_t vrf_id, struct prefix *p,  				    uint16_t cmd); @@ -95,20 +95,22 @@ static void zvni_print_hash(struct hash_bucket *bucket, void *ctxt[]);  static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,  					 struct ipaddr *ip, uint8_t flags, -					 uint32_t seq, int state, uint16_t cmd); +					 uint32_t seq, int state, +					 struct zebra_evpn_es *es, +					 uint16_t cmd);  static unsigned int neigh_hash_keymake(const void *p);  static void *zvni_neigh_alloc(void *p);  static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip, -				     struct ethaddr *mac); +				     struct ethaddr *mac, zebra_mac_t *zmac);  static int zvni_neigh_del(zebra_vni_t *zvni, zebra_neigh_t *n);  static void zvni_neigh_del_all(zebra_vni_t *zvni, int uninstall, int upd_client,  			       uint32_t flags);  static zebra_neigh_t *zvni_neigh_lookup(zebra_vni_t *zvni, struct ipaddr *ip);  static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip, -					 struct ethaddr *macaddr, +					 struct ethaddr *mac, zebra_mac_t *zmac,  					 uint8_t flags, uint32_t seq);  static int zvni_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip, -					 struct ethaddr *macaddr, +					 struct ethaddr *mac,  					 uint8_t flags, int state);  static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n);  static int zvni_neigh_uninstall(zebra_vni_t *zvni, zebra_neigh_t *n); @@ -157,7 +159,7 @@ static void zvni_mac_del_all(zebra_vni_t *zvni, int uninstall, int upd_client,  			     uint32_t flags);  static zebra_mac_t *zvni_mac_lookup(zebra_vni_t *zvni, struct ethaddr *macaddr);  static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr, -				       uint8_t flags, uint32_t seq); +		uint8_t flags, uint32_t seq, struct zebra_evpn_es *es);  static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr);  static zebra_vni_t *zvni_map_vlan(struct interface *ifp,  				  struct interface *br_if, vlanid_t vid); @@ -167,11 +169,10 @@ static void zvni_install_mac_hash(struct hash_bucket *bucket, void *ctxt);  static unsigned int vni_hash_keymake(const void *p);  static void *zvni_alloc(void *p); -static zebra_vni_t *zvni_lookup(vni_t vni);  static zebra_vni_t *zvni_add(vni_t vni);  static int zvni_del(zebra_vni_t *zvni);  static int zvni_send_add_to_client(zebra_vni_t *zvni); -static int zvni_send_del_to_client(vni_t vni); +static int zvni_send_del_to_client(zebra_vni_t *zvni);  static void zvni_build_hash_table(void);  static int zvni_vtep_match(struct in_addr *vtep_ip, zebra_vtep_t *zvtep);  static zebra_vtep_t *zvni_vtep_find(zebra_vni_t *zvni, struct in_addr *vtep_ip); @@ -755,12 +756,21 @@ static void zvni_print_neigh(zebra_neigh_t *n, void *ctxt, json_object *json)  		json_object_string_add(json, "mac", buf1);  	}  	if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) { -		if (json == NULL) { -			vty_out(vty, " Remote VTEP: %s\n", -				inet_ntoa(n->r_vtep_ip)); -		} else -			json_object_string_add(json, "remoteVtep", -					       inet_ntoa(n->r_vtep_ip)); +		if (n->mac->es) { +			if (json) +				json_object_string_add(json, "remoteEs", +						n->mac->es->esi_str); +			else +				vty_out(vty, " Remote ES: %s\n", +						n->mac->es->esi_str); +		} else { +			if (json) +				json_object_string_add(json, "remoteVtep", +						inet_ntoa(n->r_vtep_ip)); +			else +				vty_out(vty, " Remote VTEP: %s\n", +						inet_ntoa(n->r_vtep_ip)); +		}  	}  	if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW)) {  		if (!json) { @@ -871,19 +881,26 @@ static void zvni_print_neigh_hash(struct hash_bucket *bucket, void *ctxt)  		if (json_vni == NULL) {  			if ((wctx->flags & SHOW_REMOTE_NEIGH_FROM_VTEP) &&  			    (wctx->count == 0)) -				vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n", +				vty_out(vty, "%*s %-6s %-8s %-17s %-30s %s\n",  					-wctx->addr_width, "Neighbor", "Type", -					"State", "MAC", "Remote VTEP", +					"State", "MAC", "Remote ES/VTEP",  					"Seq #'s"); -			vty_out(vty, "%*s %-6s %-8s %-17s %-21s %u/%u\n", +			vty_out(vty, "%*s %-6s %-8s %-17s %-30s %u/%u\n",  				-wctx->addr_width, buf2, "remote", state_str, -				buf1, inet_ntoa(n->r_vtep_ip), n->loc_seq, n->rem_seq); +				buf1, +				n->mac->es ? n->mac->es->esi_str : +				inet_ntoa(n->r_vtep_ip), +				n->loc_seq, n->rem_seq);  		} else {  			json_object_string_add(json_row, "type", "remote");  			json_object_string_add(json_row, "state", state_str);  			json_object_string_add(json_row, "mac", buf1); -			json_object_string_add(json_row, "remoteVtep", -					       inet_ntoa(n->r_vtep_ip)); +			if (n->mac->es) +				json_object_string_add(json_row, "remoteEs", +						n->mac->es->esi_str); +			else +				json_object_string_add(json_row, "remoteVtep", +						inet_ntoa(n->r_vtep_ip));  			if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW))  				json_object_boolean_true_add(json_row,  							     "defaultGateway"); @@ -987,9 +1004,9 @@ static void zvni_print_neigh_hash_all_vni(struct hash_bucket *bucket,  	hash_iterate(zvni->neigh_table, zvni_find_neigh_addr_width, &wctx);  	if (json == NULL) { -		vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n", +		vty_out(vty, "%*s %-6s %-8s %-17s %-30s %s\n",  			-wctx.addr_width, "IP", "Type", -			"State", "MAC", "Remote VTEP", "Seq #'s"); +			"State", "MAC", "Remote ES/VTEP", "Seq #'s");  	}  	if (print_dup)  		hash_iterate(zvni->neigh_table, zvni_print_dad_neigh_hash, @@ -1284,8 +1301,12 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json)  				vty_out(vty, " VLAN: %u",  					mac->fwd_info.local.vid);  		} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) { -			vty_out(vty, " Remote VTEP: %s", -				inet_ntoa(mac->fwd_info.r_vtep_ip)); +			if (mac->es) +				vty_out(vty, " Remote ES: %s", +						mac->es->esi_str); +			else +				vty_out(vty, " Remote VTEP: %s", +					inet_ntoa(mac->fwd_info.r_vtep_ip));  		} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) {  			vty_out(vty, " Auto Mac ");  		} @@ -1374,7 +1395,7 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt)  			return;  		vid = mac->fwd_info.local.vid;  		if (json_mac_hdr == NULL) -			vty_out(vty, "%-17s %-6s %-21s", buf1, "local", +			vty_out(vty, "%-17s %-6s %-30s", buf1, "local",  				ifp->name);  		else {  			json_object_string_add(json_mac, "type", "local"); @@ -1420,12 +1441,14 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt)  			if ((wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) &&  			    (wctx->count == 0)) {  				vty_out(vty, "\nVNI %u\n\n", wctx->zvni->vni); -				vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", -					"MAC", "Type", "Intf/Remote VTEP", +				vty_out(vty, "%-17s %-6s %-30s %-5s %s\n", +					"MAC", "Type", "Intf/Remote ES/VTEP",  					"VLAN", "Seq #'s");  			} -			vty_out(vty, "%-17s %-6s %-21s %-5s %u/%u\n", buf1, -				"remote", inet_ntoa(mac->fwd_info.r_vtep_ip), +			vty_out(vty, "%-17s %-6s %-30s %-5s %u/%u\n", buf1, +				"remote", +				mac->es ?  mac->es->esi_str : +				inet_ntoa(mac->fwd_info.r_vtep_ip),  				"", mac->loc_seq, mac->rem_seq);  		} else {  			json_object_string_add(json_mac, "type", "remote"); @@ -1540,8 +1563,8 @@ static void zvni_print_mac_hash_all_vni(struct hash_bucket *bucket, void *ctxt)  		if (json == NULL) {  			vty_out(vty, "\nVNI %u #MACs (local and remote) %u\n\n",  				zvni->vni, num_macs); -			vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", "MAC", -				"Type", "Intf/Remote VTEP", "VLAN", "Seq #'s"); +			vty_out(vty, "%-17s %-6s %-30s %-5s %s\n", "MAC", +				"Type", "Intf/Remote ES/VTEP", "VLAN", "Seq #'s");  		} else  			json_object_int_add(json_vni, "numMacs", num_macs);  	} @@ -2106,13 +2129,16 @@ static void zvni_print_hash_detail(struct hash_bucket *bucket, void *data)   */  static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,  					 struct ipaddr *ip, uint8_t flags, -					 uint32_t seq, int state, uint16_t cmd) +					 uint32_t seq, int state, +					 struct zebra_evpn_es *es, +					 uint16_t cmd)  {  	char buf[ETHER_ADDR_STRLEN];  	char buf2[INET6_ADDRSTRLEN];  	int ipa_len;  	struct zserv *client = NULL;  	struct stream *s = NULL; +	esi_t *esi = es ? &es->esi : zero_esi;  	client = zserv_find_client(ZEBRA_ROUTE_BGP, 0);  	/* BGP may not be running. */ @@ -2140,6 +2166,7 @@ static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,  	if (cmd == ZEBRA_MACIP_ADD) {  		stream_putc(s, flags); /* sticky mac/gateway mac */  		stream_putl(s, seq); /* sequence number */ +		stream_put(s, esi, sizeof(esi_t));  	} else {  		stream_putl(s, state); /* state - active/inactive */  	} @@ -2150,10 +2177,11 @@ static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,  	if (IS_ZEBRA_DEBUG_VXLAN)  		zlog_debug( -			"Send MACIP %s flags 0x%x MAC %s IP %s seq %u L2-VNI %u to %s", +			"Send MACIP %s f 0x%x MAC %s IP %s seq %u L2-VNI %u ESI %s to %s",  			(cmd == ZEBRA_MACIP_ADD) ? "Add" : "Del", flags,  			prefix_mac2str(macaddr, buf, sizeof(buf)),  			ipaddr2str(ip, buf2, sizeof(buf2)), seq, vni, +			es ? es->esi_str : "-",  			zebra_route_string(client->proto));  	if (cmd == ZEBRA_MACIP_ADD) @@ -2222,11 +2250,10 @@ static void *zvni_neigh_alloc(void *p)   * Add neighbor entry.   */  static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip, -				     struct ethaddr *mac) +		struct ethaddr *mac, zebra_mac_t *zmac)  {  	zebra_neigh_t tmp_n;  	zebra_neigh_t *n = NULL; -	zebra_mac_t *zmac = NULL;  	memset(&tmp_n, 0, sizeof(zebra_neigh_t));  	memcpy(&tmp_n.ip, ip, sizeof(struct ipaddr)); @@ -2239,9 +2266,10 @@ static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip,  	n->dad_ip_auto_recovery_timer = NULL;  	/* Associate the neigh to mac */ -	zmac = zvni_mac_lookup(zvni, mac); -	if (zmac) +	if (zmac) { +		n->mac = zmac;  		listnode_add_sort(zmac->neigh_list, n); +	}  	return n;  } @@ -2252,11 +2280,9 @@ static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip,  static int zvni_neigh_del(zebra_vni_t *zvni, zebra_neigh_t *n)  {  	zebra_neigh_t *tmp_n; -	zebra_mac_t *zmac = NULL; -	zmac = zvni_mac_lookup(zvni, &n->emac); -	if (zmac) -		listnode_delete(zmac->neigh_list, n); +	if (n->mac) +		listnode_delete(n->mac->neigh_list, n);  	/* Cancel auto recovery */  	THREAD_OFF(n->dad_ip_auto_recovery_timer); @@ -2335,8 +2361,7 @@ static zebra_neigh_t *zvni_neigh_lookup(zebra_vni_t *zvni, struct ipaddr *ip)   * locally or undergoing any other change (such as sequence number).   */  static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni, -						   zebra_mac_t *zmac, -						   bool seq_change) +		zebra_mac_t *zmac, bool seq_change, bool es_change)  {  	zebra_neigh_t *n = NULL;  	struct listnode *node = NULL; @@ -2358,7 +2383,8 @@ static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni,  	 */  	for (ALL_LIST_ELEMENTS_RO(zmac->neigh_list, node, n)) {  		if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { -			if (IS_ZEBRA_NEIGH_INACTIVE(n) || seq_change) { +			if (IS_ZEBRA_NEIGH_INACTIVE(n) || seq_change || +					es_change) {  				ZEBRA_NEIGH_SET_ACTIVE(n);  				n->loc_seq = zmac->loc_seq;  				if (!(zvrf->dup_addr_detect && @@ -2366,7 +2392,7 @@ static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni,  						ZEBRA_NEIGH_DUPLICATE)))  					zvni_neigh_send_add_to_client(  						zvni->vni, &n->ip, &n->emac, -						n->flags, n->loc_seq); +						n->mac, n->flags, n->loc_seq);  			}  		}  	} @@ -2464,6 +2490,7 @@ static void zvni_probe_neigh_on_mac_add(zebra_vni_t *zvni, zebra_mac_t *zmac)   */  static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip,  					 struct ethaddr *macaddr, +					 zebra_mac_t *zmac,  					 uint8_t neigh_flags,  					 uint32_t seq)  { @@ -2478,7 +2505,9 @@ static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip,  		SET_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP);  	return zvni_macip_send_msg_to_client(vni, macaddr, ip, flags, -			     seq, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_ADD); +			seq, ZEBRA_NEIGH_ACTIVE, +			zmac ? zmac->es : NULL, +			ZEBRA_MACIP_ADD);  }  /* @@ -2489,7 +2518,7 @@ static int zvni_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip,  					 int state)  {  	return zvni_macip_send_msg_to_client(vni, macaddr, ip, flags, -					     0, state, ZEBRA_MACIP_DEL); +			0, state, NULL, ZEBRA_MACIP_DEL);  }  /* @@ -2758,7 +2787,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,  	n = zvni_neigh_lookup(zvni, ip);  	if (!n) { -		n = zvni_neigh_add(zvni, ip, macaddr); +		n = zvni_neigh_add(zvni, ip, macaddr, mac);  		if (!n) {  			flog_err(  				EC_ZEBRA_MAC_ADD_FAILED, @@ -2792,7 +2821,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,  			prefix_mac2str(macaddr, buf, sizeof(buf)),  			ipaddr2str(ip, buf2, sizeof(buf2)), n->flags); -		zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr, +		zvni_neigh_send_add_to_client(zvni->vni, ip, &n->emac, n->mac,  					      n->flags, n->loc_seq);  	} else if (advertise_svi_macip_enabled(zvni)) { @@ -2804,7 +2833,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,  			prefix_mac2str(macaddr, buf, sizeof(buf)),  			ipaddr2str(ip, buf2, sizeof(buf2)), n->flags); -		zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr, +		zvni_neigh_send_add_to_client(zvni->vni, ip, &n->emac, n->mac,  					      n->flags, n->loc_seq);  	} @@ -3066,7 +3095,7 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  	n = zvni_neigh_lookup(zvni, ip);  	if (!n) {  		/* New neighbor - create */ -		n = zvni_neigh_add(zvni, ip, macaddr); +		n = zvni_neigh_add(zvni, ip, macaddr, zmac);  		if (!n) {  			flog_err(  				EC_ZEBRA_MAC_ADD_FAILED, @@ -3119,8 +3148,9 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  				if (IS_ZEBRA_NEIGH_ACTIVE(n) &&  				    !is_neigh_freezed)  					return zvni_neigh_send_add_to_client( -							zvni->vni, ip, macaddr, -							n->flags, n->loc_seq); +							zvni->vni, ip, &n->emac, +							n->mac, n->flags, +							n->loc_seq);  				else {  					if (IS_ZEBRA_DEBUG_VXLAN)  						zlog_debug( @@ -3138,13 +3168,14 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  			if (IS_ZEBRA_NEIGH_ACTIVE(n))  				zvni_neigh_send_del_to_client(zvni->vni, &n->ip,  					      &n->emac, 0, n->state); -			old_zmac = zvni_mac_lookup(zvni, &n->emac); +			old_zmac = n->mac;  			if (old_zmac) {  				old_mac_seq = CHECK_FLAG(old_zmac->flags,  							 ZEBRA_MAC_REMOTE) ?  					old_zmac->rem_seq : old_zmac->loc_seq;  				neigh_mac_change = upd_mac_seq = true;  				listnode_delete(old_zmac->neigh_list, n); +				n->mac = NULL;  				zvni_deref_ip2mac(zvni, old_zmac);  			} @@ -3153,6 +3184,7 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  			memcpy(&n->emac, macaddr, ETH_ALEN);  			/* Link to new MAC */ +			n->mac = zmac;  			listnode_add_sort(zmac->neigh_list, n);  		} else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) {  			/* @@ -3161,7 +3193,7 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  			 */  			if (memcmp(n->emac.octet, macaddr->octet,  				   ETH_ALEN) != 0) { -				old_zmac = zvni_mac_lookup(zvni, &n->emac); +				old_zmac = n->mac;  				if (old_zmac) {  					old_mac_seq = CHECK_FLAG(  							old_zmac->flags, @@ -3171,11 +3203,13 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  					neigh_mac_change = upd_mac_seq = true;  					listnode_delete(old_zmac->neigh_list,  							n); +					n->mac = NULL;  					zvni_deref_ip2mac(zvni, old_zmac);  				}  				/* Link to new MAC */  				memcpy(&n->emac, macaddr, ETH_ALEN); +				n->mac = zmac;  				listnode_add_sort(zmac->neigh_list, n);  			}  			/* Based on Mobility event Scenario-B from the @@ -3254,9 +3288,10 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  				   zvni->vni, zmac->loc_seq, mac_new_seq);  		zmac->loc_seq = mac_new_seq;  		if (zvni_mac_send_add_to_client(zvni->vni, macaddr, -						zmac->flags, zmac->loc_seq)) +					zmac->flags, zmac->loc_seq, zmac->es))  			return -1; -		zvni_process_neigh_on_local_mac_change(zvni, zmac, 1); +		zvni_process_neigh_on_local_mac_change(zvni, zmac, 1, +				0 /*es_change*/);  		return 0;  	} @@ -3265,8 +3300,8 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,  	if (!neigh_on_hold) {  		ZEBRA_NEIGH_SET_ACTIVE(n); -		return zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr, -					     n->flags, n->loc_seq); +		return zvni_neigh_send_add_to_client(zvni->vni, ip, +				&n->emac, n->mac, n->flags, n->loc_seq);  	} else {  		if (IS_ZEBRA_DEBUG_VXLAN)  			zlog_debug("        Neighbor on hold not sending"); @@ -3393,6 +3428,9 @@ static int zvni_mac_del(zebra_vni_t *zvni, zebra_mac_t *mac)  {  	zebra_mac_t *tmp_mac; +	/* force de-ref any ES entry linked to the MAC */ +	zebra_evpn_es_mac_deref_entry(mac); +  	/* Cancel auto recovery */  	THREAD_OFF(mac->dad_mac_auto_recovery_timer); @@ -3498,7 +3536,7 @@ static zebra_mac_t *zvni_mac_lookup(zebra_vni_t *zvni, struct ethaddr *mac)   * Inform BGP about local MAC addition.   */  static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr, -				       uint8_t mac_flags, uint32_t seq) +		uint8_t mac_flags, uint32_t seq, struct zebra_evpn_es *es)  {  	uint8_t flags = 0; @@ -3508,7 +3546,8 @@ static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr,  		SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW);  	return zvni_macip_send_msg_to_client(vni, macaddr, NULL, flags, -			     seq, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_ADD); +			seq, ZEBRA_NEIGH_ACTIVE, es, +			ZEBRA_MACIP_ADD);  }  /* @@ -3517,7 +3556,8 @@ static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr,  static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr)  {  	return zvni_macip_send_msg_to_client(vni, macaddr, NULL, 0 /* flags */, -			     0 /* seq */, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_DEL); +			0 /* seq */, ZEBRA_NEIGH_ACTIVE, NULL, +			ZEBRA_MACIP_DEL);  }  /* @@ -3757,6 +3797,7 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)  	enum zebra_dplane_result res;  	const struct interface *br_ifp;  	vlanid_t vid; +	uint32_t nhg_id;  	if (!(mac->flags & ZEBRA_MAC_REMOTE))  		return 0; @@ -3774,6 +3815,17 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)  	sticky = !!CHECK_FLAG(mac->flags,  			 (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)); +	/* If nexthop group for the FDB entry is inactive (not programmed in +	 * the dataplane) the MAC entry cannot be installed +	 */ +	if (mac->es) { +		if (!(mac->es->flags & ZEBRA_EVPNES_NHG_ACTIVE)) +			return -1; +		nhg_id = mac->es->nhg_id; +	} else { +		nhg_id = 0; +	} +  	br_zif = (const struct zebra_if *)(br_ifp->info);  	if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif)) @@ -3782,7 +3834,8 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)  		vid = 0;  	res = dplane_mac_add(zvni->vxlan_if, br_ifp, vid, -			     &mac->macaddr, mac->fwd_info.r_vtep_ip, sticky); +			     &mac->macaddr, mac->fwd_info.r_vtep_ip, sticky, +				 nhg_id);  	if (res != ZEBRA_DPLANE_REQUEST_FAILURE)  		return 0;  	else @@ -3884,6 +3937,7 @@ static void zvni_deref_ip2mac(zebra_vni_t *zvni, zebra_mac_t *mac)  	if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) &&  	    remote_neigh_count(mac) == 0) {  		zvni_mac_uninstall(zvni, mac); +		zebra_evpn_es_mac_deref_entry(mac);  		UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE);  	} @@ -3950,7 +4004,7 @@ static bool vni_hash_cmp(const void *p1, const void *p2)  	return (zvni1->vni == zvni2->vni);  } -static int vni_list_cmp(void *p1, void *p2) +int vni_list_cmp(void *p1, void *p2)  {  	const zebra_vni_t *zvni1 = p1;  	const zebra_vni_t *zvni2 = p2; @@ -3976,7 +4030,7 @@ static void *zvni_alloc(void *p)  /*   * Look up VNI hash entry.   */ -static zebra_vni_t *zvni_lookup(vni_t vni) +zebra_vni_t *zvni_lookup(vni_t vni)  {  	struct zebra_vrf *zvrf;  	zebra_vni_t tmp_vni; @@ -4007,6 +4061,8 @@ static zebra_vni_t *zvni_add(vni_t vni)  	zvni = hash_get(zvrf->vni_table, &tmp_zvni, zvni_alloc);  	assert(zvni); +	zebra_evpn_vni_es_init(zvni); +  	/* Create hash table for MAC */  	zvni->mac_table =  		hash_create(mac_hash_keymake, mac_cmp, "Zebra VNI MAC Table"); @@ -4018,6 +4074,30 @@ static zebra_vni_t *zvni_add(vni_t vni)  	return zvni;  } +/* vni<=>vxlan_zif association */ +static void zvni_vxlan_if_set(zebra_vni_t *zvni, struct interface *ifp, +		bool set) +{ +	struct zebra_if *zif; + +	if (set) { +		if (zvni->vxlan_if == ifp) +			return; +		zvni->vxlan_if = ifp; +	} else { +		if (!zvni->vxlan_if) +			return; +		zvni->vxlan_if = NULL; +	} + +	if (ifp) +		zif = ifp->info; +	else +		zif = NULL; + +	zebra_evpn_vxl_vni_set(zif, zvni, set); +} +  /*   * Delete VNI hash entry.   */ @@ -4029,7 +4109,7 @@ static int zvni_del(zebra_vni_t *zvni)  	zvrf = zebra_vrf_get_evpn();  	assert(zvrf); -	zvni->vxlan_if = NULL; +	zvni_vxlan_if_set(zvni, zvni->vxlan_if, false /* set */);  	/* Remove references to the BUM mcast grp */  	zebra_vxlan_sg_deref(zvni->local_vtep_ip, zvni->mcast_grp); @@ -4042,6 +4122,8 @@ static int zvni_del(zebra_vni_t *zvni)  	hash_free(zvni->mac_table);  	zvni->mac_table = NULL; +	zebra_evpn_vni_es_cleanup(zvni); +  	/* Free the VNI hash entry and allocated memory. */  	tmp_zvni = hash_release(zvrf->vni_table, zvni);  	XFREE(MTYPE_ZVNI, tmp_zvni); @@ -4056,6 +4138,7 @@ static int zvni_send_add_to_client(zebra_vni_t *zvni)  {  	struct zserv *client;  	struct stream *s; +	int rc;  	client = zserv_find_client(ZEBRA_ROUTE_BGP, 0);  	/* BGP may not be running. */ @@ -4080,13 +4163,22 @@ static int zvni_send_add_to_client(zebra_vni_t *zvni)  			   zebra_route_string(client->proto));  	client->vniadd_cnt++; -	return zserv_send_message(client, s); +	rc = zserv_send_message(client, s); + +	if (!(zvni->flags & ZVNI_READY_FOR_BGP)) { +		zvni->flags |= ZVNI_READY_FOR_BGP; +		/* once the VNI is sent the ES-EVIs can also be replayed +		 * to BGP +		 */ +		zebra_evpn_vni_update_all_es(zvni); +	} +	return rc;  }  /*   * Inform BGP about local VNI deletion.   */ -static int zvni_send_del_to_client(vni_t vni) +static int zvni_send_del_to_client(zebra_vni_t *zvni)  {  	struct zserv *client;  	struct stream *s; @@ -4096,17 +4188,23 @@ static int zvni_send_del_to_client(vni_t vni)  	if (!client)  		return 0; +	if (zvni->flags & ZVNI_READY_FOR_BGP) { +		zvni->flags &= ~ZVNI_READY_FOR_BGP; +		/* the ES-EVIs must be removed from BGP before the VNI is */ +		zebra_evpn_vni_update_all_es(zvni); +	} +  	s = stream_new(ZEBRA_MAX_PACKET_SIZ);  	stream_reset(s);  	zclient_create_header(s, ZEBRA_VNI_DEL, zebra_vrf_get_evpn_id()); -	stream_putl(s, vni); +	stream_putl(s, zvni->vni);  	/* Write packet size. */  	stream_putw_at(s, 0, stream_get_endp(s));  	if (IS_ZEBRA_DEBUG_VXLAN) -		zlog_debug("Send VNI_DEL %u to %s", vni, +		zlog_debug("Send VNI_DEL %u to %s", zvni->vni,  			   zebra_route_string(client->proto));  	client->vnidel_cnt++; @@ -4224,8 +4322,12 @@ static void zvni_build_hash_table(void)  						vxl->mcast_grp);  					zvni->local_vtep_ip = vxl->vtep_ip;  					zvni->mcast_grp = vxl->mcast_grp; +					/* on local vtep-ip check if ES +					 * orig-ip needs to be updated +					 */ +					zebra_evpn_es_set_base_vni(zvni);  				} -				zvni->vxlan_if = ifp; +				zvni_vxlan_if_set(zvni, ifp, true /* set */);  				vlan_if = zvni_map_to_svi(vxl->access_vlan,  						zif->brslave_info.br_if);  				if (vlan_if) { @@ -4573,7 +4675,7 @@ static int zl3vni_rmac_install(zebra_l3vni_t *zl3vni, zebra_mac_t *zrmac)  		vid = 0;  	res = dplane_mac_add(zl3vni->vxlan_if, br_ifp, vid, -			     &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, 0); +			     &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, 0, 0);  	if (res != ZEBRA_DPLANE_REQUEST_FAILURE)  		return 0;  	else @@ -5344,7 +5446,7 @@ static int zebra_vxlan_handle_vni_transition(struct zebra_vrf *zvrf, vni_t vni,  			zlog_debug("Del L2-VNI %u - transition to L3-VNI", vni);  		/* Delete VNI from BGP. */ -		zvni_send_del_to_client(zvni->vni); +		zvni_send_del_to_client(zvni);  		/* Free up all neighbors and MAC, if any. */  		zvni_neigh_del_all(zvni, 0, 0, DEL_ALL_NEIGH); @@ -5452,6 +5554,17 @@ static int zebra_vxlan_readd_remote_rmac(zebra_l3vni_t *zl3vni,  	return 0;  } +static void process_sync_macip_add(zebra_vni_t *zvni, +				     struct ethaddr *macaddr, +				     uint16_t ipa_len, +				     struct ipaddr *ipaddr, +				     uint8_t flags, +				     uint32_t seq, +				     struct zebra_evpn_es *es) +{ +	/* XXX - sync route */ +} +  /* Process a remote MACIP add from BGP. */  static void process_remote_macip_add(vni_t vni,  				     struct ethaddr *macaddr, @@ -5459,7 +5572,8 @@ static void process_remote_macip_add(vni_t vni,  				     struct ipaddr *ipaddr,  				     uint8_t flags,  				     uint32_t seq, -				     struct in_addr vtep_ip) +				     struct in_addr vtep_ip, +				     esi_t *esi)  {  	zebra_vni_t *zvni;  	zebra_vtep_t *zvtep; @@ -5477,6 +5591,8 @@ static void process_remote_macip_add(vni_t vni,  	bool is_router;  	bool do_dad = false;  	bool is_dup_detect = false; +	struct zebra_evpn_es *es; +	esi_t *old_esi;  	/* Locate VNI hash entry - expected to exist. */  	zvni = zvni_lookup(vni); @@ -5497,22 +5613,37 @@ static void process_remote_macip_add(vni_t vni,  		return;  	} +	/* Type-2 routes from another PE can be interpreted as remote or +	 * SYNC based on the destination ES - +	 * SYNC - if ES is local +	 * REMOTE - if ES is not local +	 */ +	es = zebra_evpn_es_find(esi); +	if (es && (es->flags & ZEBRA_EVPNES_LOCAL)) { +		process_sync_macip_add(zvni, macaddr, ipa_len, ipaddr, flags, +				seq, es); +		return; +	} +  	/* The remote VTEP specified should normally exist, but it is  	 * possible that when peering comes up, peer may advertise MACIP  	 * routes before advertising type-3 routes.  	 */ -	zvtep = zvni_vtep_find(zvni, &vtep_ip); -	if (!zvtep) { -		zvtep = zvni_vtep_add(zvni, &vtep_ip, VXLAN_FLOOD_DISABLED); +	if (vtep_ip.s_addr) { +		zvtep = zvni_vtep_find(zvni, &vtep_ip);  		if (!zvtep) { -			flog_err( -				EC_ZEBRA_VTEP_ADD_FAILED, -				"Failed to add remote VTEP, VNI %u zvni %p upon remote MACIP ADD", -				vni, zvni); -			return; -		} +			zvtep = zvni_vtep_add(zvni, &vtep_ip, +					VXLAN_FLOOD_DISABLED); +			if (!zvtep) { +				flog_err( +					EC_ZEBRA_VTEP_ADD_FAILED, +					"Failed to add remote VTEP, VNI %u zvni %p upon remote MACIP ADD", +					vni, zvni); +				return; +			} -		zvni_vtep_install(zvni, zvtep); +			zvni_vtep_install(zvni, zvtep); +		}  	}  	sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); @@ -5539,6 +5670,8 @@ static void process_remote_macip_add(vni_t vni,  	if (!zvrf)  		return; +	old_esi = (mac && mac->es) ? &mac->es->esi : zero_esi; +  	/* check if the remote MAC is unknown or has a change.  	 * If so, that needs to be updated first. Note that client could  	 * install MAC and MACIP separately or just install the latter. @@ -5548,6 +5681,7 @@ static void process_remote_macip_add(vni_t vni,  	    || sticky != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY)  	    || remote_gw != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW)  	    || !IPV4_ADDR_SAME(&mac->fwd_info.r_vtep_ip, &vtep_ip) +	    || memcmp(old_esi, esi, sizeof(esi_t))  	    || seq != mac->rem_seq)  		update_mac = 1; @@ -5563,10 +5697,14 @@ static void process_remote_macip_add(vni_t vni,  				return;  			} +			zebra_evpn_es_mac_ref(mac, esi); +  			/* Is this MAC created for a MACIP? */  			if (ipa_len)  				SET_FLAG(mac->flags, ZEBRA_MAC_AUTO);  		} else { +			zebra_evpn_es_mac_ref(mac, esi); +  			/* When host moves but changes its (MAC,IP)  			 * binding, BGP may install a MACIP entry that  			 * corresponds to "older" location of the host @@ -5670,7 +5808,7 @@ static void process_remote_macip_add(vni_t vni,  	if (update_neigh) {  		if (!n) { -			n = zvni_neigh_add(zvni, ipaddr, macaddr); +			n = zvni_neigh_add(zvni, ipaddr, macaddr, mac);  			if (!n) {  				zlog_warn(  					"Failed to add Neigh %s MAC %s VNI %u Remote VTEP %s", @@ -5726,8 +5864,10 @@ static void process_remote_macip_add(vni_t vni,  				old_mac = zvni_mac_lookup(zvni, &n->emac);  				if (old_mac) {  					listnode_delete(old_mac->neigh_list, n); +					n->mac = NULL;  					zvni_deref_ip2mac(zvni, old_mac);  				} +				n->mac = mac;  				listnode_add_sort(mac->neigh_list, n);  				memcpy(&n->emac, macaddr, ETH_ALEN); @@ -5938,6 +6078,7 @@ static void process_remote_macip_del(vni_t vni,  			 */  			if (remote_neigh_count(mac) == 0) {  				zvni_mac_uninstall(zvni, mac); +				zebra_evpn_es_mac_deref_entry(mac);  				UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE);  			}  			if (list_isempty(mac->neigh_list)) @@ -6379,8 +6520,8 @@ void zebra_vxlan_print_neigh_vni(struct vty *vty, struct zebra_vrf *zvrf,  		vty_out(vty,  			"Number of ARPs (local and remote) known for this VNI: %u\n",  			num_neigh); -		vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n", -wctx.addr_width, -			"IP", "Type", "State", "MAC", "Remote VTEP", "Seq #'s"); +		vty_out(vty, "%*s %-6s %-8s %-17s %-30s %s\n", -wctx.addr_width, +			"IP", "Type", "State", "MAC", "Remote ES/VTEP", "Seq #'s");  	} else  		json_object_int_add(json, "numArpNd", num_neigh); @@ -6590,9 +6731,9 @@ void zebra_vxlan_print_neigh_vni_dad(struct vty *vty,  		vty_out(vty,  			"Number of ARPs (local and remote) known for this VNI: %u\n",  			num_neigh); -		vty_out(vty, "%*s %-6s %-8s %-17s %-21s\n", +		vty_out(vty, "%*s %-6s %-8s %-17s %-30s\n",  			-wctx.addr_width, "IP", "Type", -			"State", "MAC", "Remote VTEP"); +			"State", "MAC", "Remote ES/VTEP");  	} else  		json_object_int_add(json, "numArpNd", num_neigh); @@ -6645,8 +6786,8 @@ void zebra_vxlan_print_macs_vni(struct vty *vty, struct zebra_vrf *zvrf,  		vty_out(vty,  			"Number of MACs (local and remote) known for this VNI: %u\n",  			num_macs); -		vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", "MAC", "Type", -			"Intf/Remote VTEP", "VLAN", "Seq #'s"); +		vty_out(vty, "%-17s %-6s %-30s %-5s %s\n", "MAC", "Type", +			"Intf/Remote ES/VTEP", "VLAN", "Seq #'s");  	} else  		json_object_int_add(json, "numMacs", num_macs); @@ -6838,8 +6979,8 @@ void zebra_vxlan_print_macs_vni_dad(struct vty *vty,  		vty_out(vty,  		"Number of MACs (local and remote) known for this VNI: %u\n",  			num_macs); -		vty_out(vty, "%-17s %-6s %-21s %-5s\n", "MAC", "Type", -			"Intf/Remote VTEP", "VLAN"); +		vty_out(vty, "%-17s %-6s %-30s %-5s\n", "MAC", "Type", +			"Intf/Remote ES/VTEP", "VLAN");  	} else  		json_object_int_add(json, "numMacs", num_macs); @@ -6922,11 +7063,12 @@ int zebra_vxlan_clear_dup_detect_vni_mac(struct zebra_vrf *zvrf, vni_t vni,  		if (zvni_mac_send_add_to_client(zvni->vni,  					&mac->macaddr,  					mac->flags, -					mac->loc_seq)) +					mac->loc_seq, mac->es))  			return 0;  		/* Process all neighbors associated with this MAC. */ -		zvni_process_neigh_on_local_mac_change(zvni, mac, 0); +		zvni_process_neigh_on_local_mac_change(zvni, mac, 0, +				0 /*es_change*/);  	} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {  		zvni_process_neigh_on_remote_mac_add(zvni, mac); @@ -6992,7 +7134,7 @@ int zebra_vxlan_clear_dup_detect_vni_ip(struct zebra_vrf *zvrf, vni_t vni,  	if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) {  		zvni_neigh_send_add_to_client(zvni->vni, ip, -					      &nbr->emac, +					      &nbr->emac, nbr->mac,  					      nbr->flags, nbr->loc_seq);  	} else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) {  		zvni_neigh_install(zvni, nbr); @@ -7042,11 +7184,12 @@ static void zvni_clear_dup_mac_hash(struct hash_bucket *bucket, void *ctxt)  		/* Inform to BGP */  		if (zvni_mac_send_add_to_client(zvni->vni,  					&mac->macaddr, -					mac->flags, mac->loc_seq)) +					mac->flags, mac->loc_seq, mac->es))  			return;  		/* Process all neighbors associated with this MAC. */ -		zvni_process_neigh_on_local_mac_change(zvni, mac, 0); +		zvni_process_neigh_on_local_mac_change(zvni, mac, 0, +				0 /*es_change*/);  	} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {  		zvni_process_neigh_on_remote_mac_add(zvni, mac); @@ -7087,7 +7230,7 @@ static void zvni_clear_dup_neigh_hash(struct hash_bucket *bucket, void *ctxt)  	if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) {  		zvni_neigh_send_add_to_client(zvni->vni, &nbr->ip, -					      &nbr->emac, +					      &nbr->emac, nbr->mac,  					      nbr->flags, nbr->loc_seq);  	} else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) {  		zvni_neigh_install(zvni, nbr); @@ -7560,7 +7703,8 @@ int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp,  	    ZEBRA_NEIGH_SET_INACTIVE(n);  	/* Remove neighbor from BGP. */ -	zvni_neigh_send_del_to_client(zvni->vni, &n->ip, &n->emac, 0, n->state); +	zvni_neigh_send_del_to_client(zvni->vni, &n->ip, +			&n->emac, 0, n->state);  	/* Delete this neighbor entry. */  	zvni_neigh_del(zvni, n); @@ -7627,7 +7771,7 @@ static int32_t  zebra_vxlan_remote_macip_helper(bool add, struct stream *s, vni_t *vni,  				struct ethaddr *macaddr, uint16_t *ipa_len,  				struct ipaddr *ip, struct in_addr *vtep_ip, -				uint8_t *flags, uint32_t *seq) +				uint8_t *flags, uint32_t *seq, esi_t *esi)  {  	uint16_t l = 0; @@ -7665,6 +7809,8 @@ zebra_vxlan_remote_macip_helper(bool add, struct stream *s, vni_t *vni,  		STREAM_GETC(s, *flags);  		STREAM_GETL(s, *seq);  		l += 5; +		STREAM_GET(esi, s, sizeof(esi_t)); +		l += sizeof(esi_t);  	}  	return l; @@ -7696,7 +7842,7 @@ void zebra_vxlan_remote_macip_del(ZAPI_HANDLER_ARGS)  	while (l < hdr->length) {  		int res_length = zebra_vxlan_remote_macip_helper(  			false, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip, NULL, -			NULL); +			NULL, NULL);  		if (res_length == -1)  			goto stream_failure; @@ -7737,6 +7883,8 @@ void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS)  	uint32_t seq;  	char buf[ETHER_ADDR_STRLEN];  	char buf1[INET6_ADDRSTRLEN]; +	esi_t esi; +	char esi_buf[ESI_STR_LEN];  	memset(&macaddr, 0, sizeof(struct ethaddr));  	memset(&ip, 0, sizeof(struct ipaddr)); @@ -7752,25 +7900,30 @@ void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS)  	while (l < hdr->length) {  		int res_length = zebra_vxlan_remote_macip_helper(  			true, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip, -			&flags, &seq); +			&flags, &seq, &esi);  		if (res_length == -1)  			goto stream_failure;  		l += res_length; -		if (IS_ZEBRA_DEBUG_VXLAN) +		if (IS_ZEBRA_DEBUG_VXLAN) { +			if (memcmp(&esi, zero_esi, sizeof(esi_t))) +				esi_to_str(&esi, esi_buf, sizeof(esi_buf)); +			else +				strcpy(esi_buf, "-");  			zlog_debug( -				"Recv MACIP ADD VNI %u MAC %s%s%s flags 0x%x seq %u VTEP %s from %s", +				"Recv MACIP ADD VNI %u MAC %s%s%s flags 0x%x seq %u VTEP %s ESI %s from %s",  				vni,  				prefix_mac2str(&macaddr, buf, sizeof(buf)),  				ipa_len ? " IP " : "",  				ipa_len ?  				ipaddr2str(&ip, buf1, sizeof(buf1)) : "", -				flags, seq, inet_ntoa(vtep_ip), +				flags, seq, inet_ntoa(vtep_ip), esi_buf,  				zebra_route_string(client->proto)); +		}  		process_remote_macip_add(vni, &macaddr, ipa_len, &ip, -					 flags, seq, vtep_ip); +					 flags, seq, vtep_ip, &esi);  	}  stream_failure: @@ -7984,6 +8137,8 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if,  	/* Remove MAC from BGP. */  	zvni_mac_send_del_to_client(zvni->vni, macaddr); +	zebra_evpn_es_mac_deref_entry(mac); +  	/*  	 * If there are no neigh associated with the mac delete the mac  	 * else mark it as AUTO for forward reference @@ -8016,6 +8171,8 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,  	bool upd_neigh = false;  	bool is_dup_detect = false;  	struct in_addr vtep_ip = {.s_addr = 0}; +	struct zebra_if *zif = ifp->info; +	bool es_change;  	/* We are interested in MACs only on ports or (port, VLAN) that  	 * map to a VNI. @@ -8175,16 +8332,26 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,  		}  	} +	if (zebra_evpn_es_mac_ref_entry(mac, zif->es_info.es)) { +		/* if ES associated with the MAC changed the neigh must be +		 * updated as well +		 */ +		upd_neigh = true; +		es_change = true; +	} else { +		es_change = false; +	} +  	/* Inform BGP if required. */  	if (inform_client) {  		if (zvni_mac_send_add_to_client(zvni->vni, macaddr, -						mac->flags, mac->loc_seq)) +					mac->flags, mac->loc_seq, mac->es))  			return -1;  	}  	/* Process all neighbors associated with this MAC, if required. */  	if (upd_neigh) -		zvni_process_neigh_on_local_mac_change(zvni, mac, 0); +		zvni_process_neigh_on_local_mac_change(zvni, mac, 0, es_change);  	return 0;  } @@ -8704,7 +8871,7 @@ int zebra_vxlan_if_down(struct interface *ifp)  		assert(zvni->vxlan_if == ifp);  		/* Delete this VNI from BGP. */ -		zvni_send_del_to_client(zvni->vni); +		zvni_send_del_to_client(zvni);  		/* Free up all neighbors and MACs, if any. */  		zvni_neigh_del_all(zvni, 1, 0, DEL_ALL_NEIGH); @@ -8845,9 +9012,8 @@ int zebra_vxlan_if_del(struct interface *ifp)  		zl3vni = zl3vni_from_vrf(zvni->vrf_id);  		if (zl3vni)  			listnode_delete(zl3vni->l2vnis, zvni); -  		/* Delete VNI from BGP. */ -		zvni_send_del_to_client(zvni->vni); +		zvni_send_del_to_client(zvni);  		/* Free up all neighbors and MAC, if any. */  		zvni_neigh_del_all(zvni, 0, 0, DEL_ALL_NEIGH); @@ -8966,7 +9132,7 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags)  		    && (zif->brslave_info.bridge_ifindex == IFINDEX_INTERNAL)) {  			/* Delete from client, remove all remote VTEPs */  			/* Also, free up all MACs and neighbors. */ -			zvni_send_del_to_client(zvni->vni); +			zvni_send_del_to_client(zvni);  			zvni_neigh_del_all(zvni, 1, 0, DEL_ALL_NEIGH);  			zvni_mac_del_all(zvni, 1, 0, DEL_ALL_MAC);  			zvni_vtep_del_all(zvni, 1); @@ -8989,9 +9155,12 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags)  			zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp);  			zvni->local_vtep_ip = vxl->vtep_ip;  			zvni->mcast_grp = vxl->mcast_grp; +			/* on local vtep-ip check if ES orig-ip +			 * needs to be updated +			 */ +			zebra_evpn_es_set_base_vni(zvni);  		} -		zvni->vxlan_if = ifp; - +		zvni_vxlan_if_set(zvni, ifp, true /* set */);  		/* Take further actions needed.  		 * Note that if we are here, there is a change of interest.  		 */ @@ -9102,8 +9271,12 @@ int zebra_vxlan_if_add(struct interface *ifp)  			zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp);  			zvni->local_vtep_ip = vxl->vtep_ip;  			zvni->mcast_grp = vxl->mcast_grp; +			/* on local vtep-ip check if ES orig-ip +			 * needs to be updated +			 */ +			zebra_evpn_es_set_base_vni(zvni);  		} -		zvni->vxlan_if = ifp; +		zvni_vxlan_if_set(zvni, ifp, true /* set */);  		vlan_if = zvni_map_to_svi(vxl->access_vlan,  					  zif->brslave_info.br_if);  		if (vlan_if) { @@ -9664,6 +9837,9 @@ void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS)  		/* Note BUM handling */  		zvrf->vxlan_flood_ctrl = flood_ctrl; +		/* Replay all ESs */ +		zebra_evpn_es_send_all_to_client(true /* add */); +  		/* Build VNI hash table and inform BGP. */  		zvni_build_hash_table(); @@ -9682,6 +9858,9 @@ void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS)  		 */  		hash_iterate(zvrf->vni_table, zvni_cleanup_all, zvrf); +		/* Delete all ESs in BGP */ +		zebra_evpn_es_send_all_to_client(false /* add */); +  		/* cleanup all l3vnis */  		hash_iterate(zrouter.l3vni_table, zl3vni_cleanup_all, NULL); @@ -9710,10 +9889,15 @@ void zebra_vxlan_init_tables(struct zebra_vrf *zvrf)  /* Cleanup VNI info, but don't free the table. */  void zebra_vxlan_cleanup_tables(struct zebra_vrf *zvrf)  { +	struct zebra_vrf *evpn_zvrf = zebra_vrf_get_evpn(); +  	if (!zvrf)  		return;  	hash_iterate(zvrf->vni_table, zvni_cleanup_all, zvrf);  	hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_cleanup, NULL); + +	if (zvrf == evpn_zvrf) +		zebra_evpn_es_cleanup();  }  /* Close all VNI handling */ @@ -9731,12 +9915,14 @@ void zebra_vxlan_init(void)  	zrouter.l3vni_table = hash_create(l3vni_hash_keymake, l3vni_hash_cmp,  					  "Zebra VRF L3 VNI table");  	zrouter.evpn_vrf = NULL; +	zebra_evpn_mh_init();  }  /* free l3vni table */  void zebra_vxlan_disable(void)  {  	hash_free(zrouter.l3vni_table); +	zebra_evpn_mh_terminate();  }  /* get the l3vni svi ifindex */ @@ -9793,7 +9979,7 @@ static int zebra_vxlan_dad_ip_auto_recovery_exp(struct thread *t)  	/* Send to BGP */  	if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) {  		zvni_neigh_send_add_to_client(zvni->vni, &nbr->ip, &nbr->emac, -					      nbr->flags, nbr->loc_seq); +				nbr->mac, nbr->flags, nbr->loc_seq);  	} else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) {  		zvni_neigh_install(zvni, nbr);  	} @@ -9857,11 +10043,12 @@ static int zebra_vxlan_dad_mac_auto_recovery_exp(struct thread *t)  	if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {  		/* Inform to BGP */  		if (zvni_mac_send_add_to_client(zvni->vni, &mac->macaddr, -					mac->flags, mac->loc_seq)) +					mac->flags, mac->loc_seq, mac->es))  			return -1;  		/* Process all neighbors associated with this MAC. */ -		zvni_process_neigh_on_local_mac_change(zvni, mac, 0); +		zvni_process_neigh_on_local_mac_change(zvni, mac, 0, +			0 /*es_change*/);  	} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {  		zvni_process_neigh_on_remote_mac_add(zvni, mac); @@ -10137,7 +10324,7 @@ static void zvni_send_mac_hash_entry_to_client(struct hash_bucket *bucket,  	if (CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL))  		zvni_mac_send_add_to_client(wctx->zvni->vni, &zmac->macaddr, -						zmac->flags, zmac->loc_seq); +				zmac->flags, zmac->loc_seq, zmac->es);  }  /* Iterator to Notify Local MACs of a L2VNI */ @@ -10173,7 +10360,7 @@ static void zvni_send_neigh_hash_entry_to_client(struct hash_bucket *bucket,  			return;  		zvni_neigh_send_add_to_client(wctx->zvni->vni, &zn->ip, -						&zn->emac, zn->flags, +						&zn->emac, zn->mac, zn->flags,  						zn->loc_seq);  	}  } diff --git a/zebra/zebra_vxlan.h b/zebra/zebra_vxlan.h index 064dda6cd0..0183b73664 100644 --- a/zebra/zebra_vxlan.h +++ b/zebra/zebra_vxlan.h @@ -217,6 +217,7 @@ extern void zebra_vxlan_handle_result(struct zebra_dplane_ctx *ctx);  extern void zebra_evpn_init(void);  extern void zebra_vxlan_macvlan_up(struct interface *ifp);  extern void zebra_vxlan_macvlan_down(struct interface *ifp); +extern int vni_list_cmp(void *p1, void *p2);  #ifdef __cplusplus  } diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h index 806611c50e..a9f62fde10 100644 --- a/zebra/zebra_vxlan_private.h +++ b/zebra/zebra_vxlan_private.h @@ -62,6 +62,9 @@ struct zebra_vtep_t_ {  	struct zebra_vtep_t_ *prev;  }; +RB_HEAD(zebra_es_evi_rb_head, zebra_evpn_es_evi); +RB_PROTOTYPE(zebra_es_evi_rb_head, zebra_evpn_es_evi, rb_node, +		zebra_es_evi_rb_cmp);  /*   * VNI hash table @@ -73,6 +76,10 @@ struct zebra_vni_t_ {  	/* VNI - key */  	vni_t vni; +	/* ES flags */ +	uint32_t flags; +#define ZVNI_READY_FOR_BGP (1 << 0) /* ready to be sent to BGP */ +  	/* Flag for advertising gw macip */  	uint8_t advertise_gw_macip; @@ -102,6 +109,12 @@ struct zebra_vni_t_ {  	/* List of local or remote neighbors (MAC+IP) */  	struct hash *neigh_table; + +	/* RB tree of ES-EVIs */ +	struct zebra_es_evi_rb_head es_evi_rb_tree; + +	/* List of local ESs */ +	struct list *local_es_evi_list;  };  /* L3 VNI hash table */ @@ -316,6 +329,9 @@ struct zebra_mac_t_ {  		struct in_addr r_vtep_ip;  	} fwd_info; +	/* Local or remote ES */ +	struct zebra_evpn_es *es; +  	/* Mobility sequence numbers associated with this entry. */  	uint32_t rem_seq;  	uint32_t loc_seq; @@ -391,6 +407,9 @@ struct zebra_neigh_t_ {  	/* MAC address. */  	struct ethaddr emac; +	/* Back pointer to MAC. Only applicable to hosts in a L2-VNI. */ +	zebra_mac_t *mac; +  	/* Underlying interface. */  	ifindex_t ifindex; @@ -507,4 +526,6 @@ typedef struct zebra_vxlan_sg_ {  	uint32_t ref_cnt;  } zebra_vxlan_sg_t; +extern zebra_vni_t *zvni_lookup(vni_t vni); +  #endif /* _ZEBRA_VXLAN_PRIVATE_H */ diff --git a/zebra/zserv.c b/zebra/zserv.c index 99a85fd2ce..f1b7dcc848 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -1075,6 +1075,12 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client)  	vty_out(vty, "L3-VNI delete notifications: %u\n", client->l3vnidel_cnt);  	vty_out(vty, "MAC-IP add notifications: %u\n", client->macipadd_cnt);  	vty_out(vty, "MAC-IP delete notifications: %u\n", client->macipdel_cnt); +	vty_out(vty, "ES add notifications: %u\n", client->local_es_add_cnt); +	vty_out(vty, "ES delete notifications: %u\n", client->local_es_del_cnt); +	vty_out(vty, "ES-EVI add notifications: %u\n", +			client->local_es_evi_add_cnt); +	vty_out(vty, "ES-EVI delete notifications: %u\n", +			client->local_es_evi_del_cnt);  	TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {  		vty_out(vty, "VRF : %s\n", vrf_id_to_name(info->vrf_id)); diff --git a/zebra/zserv.h b/zebra/zserv.h index f2a4523818..e904460782 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -194,6 +194,10 @@ struct zserv {  	uint32_t v6_nh_watch_rem_cnt;  	uint32_t vxlan_sg_add_cnt;  	uint32_t vxlan_sg_del_cnt; +	uint32_t local_es_add_cnt; +	uint32_t local_es_del_cnt; +	uint32_t local_es_evi_add_cnt; +	uint32_t local_es_evi_del_cnt;  	uint32_t error_cnt;  	time_t nh_reg_time;  | 
