summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bgpd/bgp_attr.c8
-rw-r--r--bgpd/bgp_attr.h34
-rw-r--r--bgpd/bgp_attr_evpn.c52
-rw-r--r--bgpd/bgp_attr_evpn.h24
-rw-r--r--bgpd/bgp_debug.c72
-rw-r--r--bgpd/bgp_debug.h4
-rw-r--r--bgpd/bgp_ecommunity.c44
-rw-r--r--bgpd/bgp_ecommunity.h8
-rw-r--r--bgpd/bgp_evpn.c1821
-rw-r--r--bgpd/bgp_evpn.h6
-rw-r--r--bgpd/bgp_evpn_mh.c2905
-rw-r--r--bgpd/bgp_evpn_mh.h299
-rw-r--r--bgpd/bgp_evpn_private.h147
-rw-r--r--bgpd/bgp_evpn_vty.c305
-rw-r--r--bgpd/bgp_memory.c5
-rw-r--r--bgpd/bgp_memory.h3
-rw-r--r--bgpd/bgp_rd.h1
-rw-r--r--bgpd/bgp_route.c231
-rw-r--r--bgpd/bgp_route.h5
-rw-r--r--bgpd/bgp_table.h2
-rw-r--r--bgpd/bgp_vty.c4
-rw-r--r--bgpd/bgp_zebra.c92
-rw-r--r--bgpd/bgpd.c7
-rw-r--r--bgpd/bgpd.h6
-rw-r--r--bgpd/subdir.am3
-rw-r--r--doc/developer/building-frr-for-openwrt.rst32
-rw-r--r--doc/user/bgp.rst20
-rw-r--r--doc/user/sharp.rst10
-rw-r--r--include/linux/if_bridge.h11
-rw-r--r--include/linux/neighbour.h2
-rw-r--r--include/linux/net_namespace.h1
-rw-r--r--include/linux/nexthop.h1
-rw-r--r--lib/bitfield.h43
-rw-r--r--lib/if.c13
-rw-r--r--lib/if.h7
-rw-r--r--lib/linklist.c58
-rw-r--r--lib/linklist.h20
-rw-r--r--lib/log.c4
-rw-r--r--lib/netns_linux.c38
-rw-r--r--lib/ns.h16
-rw-r--r--lib/prefix.h60
-rw-r--r--lib/thread.c30
-rw-r--r--lib/thread.h4
-rw-r--r--lib/vrf.c8
-rw-r--r--lib/vrf.h2
-rw-r--r--lib/zclient.c10
-rw-r--r--lib/zclient.h11
-rw-r--r--sharpd/sharp_vty.c34
-rw-r--r--sharpd/sharp_zebra.c24
-rw-r--r--sharpd/sharp_zebra.h5
-rw-r--r--tests/topotests/bgp-evpn-mh/evpn-mh-topo-tests.pdfbin0 -> 90963 bytes
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd11/evpn.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd11/pim.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd11/zebra.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd12/evpn.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd12/pim.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd12/zebra.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd21/evpn.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd21/pim.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd21/zebra.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd22/evpn.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd22/pim.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/hostd22/zebra.conf0
-rw-r--r--tests/topotests/bgp-evpn-mh/spine1/evpn.conf17
-rw-r--r--tests/topotests/bgp-evpn-mh/spine1/pim.conf18
-rw-r--r--tests/topotests/bgp-evpn-mh/spine1/zebra.conf15
-rw-r--r--tests/topotests/bgp-evpn-mh/spine2/evpn.conf17
-rw-r--r--tests/topotests/bgp-evpn-mh/spine2/pim.conf18
-rw-r--r--tests/topotests/bgp-evpn-mh/spine2/zebra.conf15
-rwxr-xr-xtests/topotests/bgp-evpn-mh/test_evpn_mh.py651
-rw-r--r--tests/topotests/bgp-evpn-mh/torm11/evpn.conf21
-rw-r--r--tests/topotests/bgp-evpn-mh/torm11/pim.conf13
-rw-r--r--tests/topotests/bgp-evpn-mh/torm11/zebra.conf23
-rw-r--r--tests/topotests/bgp-evpn-mh/torm12/evpn.conf21
-rw-r--r--tests/topotests/bgp-evpn-mh/torm12/pim.conf13
-rw-r--r--tests/topotests/bgp-evpn-mh/torm12/zebra.conf23
-rw-r--r--tests/topotests/bgp-evpn-mh/torm21/evpn.conf21
-rw-r--r--tests/topotests/bgp-evpn-mh/torm21/pim.conf13
-rw-r--r--tests/topotests/bgp-evpn-mh/torm21/zebra.conf23
-rw-r--r--tests/topotests/bgp-evpn-mh/torm22/evpn.conf21
-rw-r--r--tests/topotests/bgp-evpn-mh/torm22/pim.conf13
-rw-r--r--tests/topotests/bgp-evpn-mh/torm22/zebra.conf23
-rw-r--r--tests/topotests/lib/topogen.py4
-rw-r--r--zebra/debug.c79
-rw-r--r--zebra/debug.h15
-rw-r--r--zebra/dplane_fpm_nl.c3
-rw-r--r--zebra/if_netlink.c148
-rw-r--r--zebra/interface.c36
-rw-r--r--zebra/interface.h31
-rw-r--r--zebra/main.c4
-rw-r--r--zebra/rt.h5
-rw-r--r--zebra/rt_netlink.c317
-rw-r--r--zebra/rt_socket.c21
-rw-r--r--zebra/subdir.am4
-rw-r--r--zebra/zapi_msg.c3
-rw-r--r--zebra/zebra_dplane.c163
-rw-r--r--zebra/zebra_dplane.h53
-rw-r--r--zebra/zebra_errors.h1
-rw-r--r--zebra/zebra_evpn_mh.c2145
-rw-r--r--zebra/zebra_evpn_mh.h239
-rw-r--r--zebra/zebra_l2.c85
-rw-r--r--zebra/zebra_l2.h11
-rw-r--r--zebra/zebra_memory.c2
-rw-r--r--zebra/zebra_netns_id.c43
-rw-r--r--zebra/zebra_netns_id.h2
-rw-r--r--zebra/zebra_netns_notify.c19
-rw-r--r--zebra/zebra_ns.c17
-rw-r--r--zebra/zebra_ns.h9
-rw-r--r--zebra/zebra_router.h6
-rw-r--r--zebra/zebra_vty.c79
-rw-r--r--zebra/zebra_vxlan.c2943
-rw-r--r--zebra/zebra_vxlan.h6
-rw-r--r--zebra/zebra_vxlan_private.h77
-rw-r--r--zebra/zserv.c6
-rw-r--r--zebra/zserv.h4
115 files changed, 11478 insertions, 2633 deletions
diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c
index 08e50fc4f2..948055e375 100644
--- a/bgpd/bgp_attr.c
+++ b/bgpd/bgp_attr.c
@@ -718,6 +718,9 @@ bool attrhash_cmp(const void *p1, const void *p2)
&& IPV4_ADDR_SAME(&attr1->originator_id,
&attr2->originator_id)
&& overlay_index_same(attr1, attr2)
+ && !memcmp(&attr1->esi, &attr2->esi, sizeof(esi_t))
+ && attr1->es_flags == attr2->es_flags
+ && attr1->mm_sync_seqnum == attr2->mm_sync_seqnum
&& attr1->nh_ifindex == attr2->nh_ifindex
&& attr1->nh_lla_ifindex == attr2->nh_lla_ifindex
&& attr1->distance == attr2->distance
@@ -2186,6 +2189,7 @@ bgp_attr_ext_communities(struct bgp_attr_parser_args *args)
struct attr *const attr = args->attr;
const bgp_size_t length = args->length;
uint8_t sticky = 0;
+ bool proxy = false;
if (length == 0) {
attr->ecommunity = NULL;
@@ -2223,7 +2227,9 @@ bgp_attr_ext_communities(struct bgp_attr_parser_args *args)
attr->router_flag = 1;
/* Check EVPN Neighbor advertisement flags, R-bit */
- bgp_attr_evpn_na_flag(attr, &attr->router_flag);
+ bgp_attr_evpn_na_flag(attr, &attr->router_flag, &proxy);
+ if (proxy)
+ attr->es_flags |= ATTR_ES_PROXY_ADVERT;
/* Extract the Rmac, if any */
if (bgp_attr_rmac(attr, &attr->rmac)) {
diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h
index 94531313ae..1b2c75fbef 100644
--- a/bgpd/bgp_attr.h
+++ b/bgpd/bgp_attr.h
@@ -215,6 +215,30 @@ struct attr {
/* NA router flag (R-bit) support in EVPN */
uint8_t router_flag;
+ /* ES info */
+ uint8_t es_flags;
+ /* Path is not "locally-active" on the advertising VTEP. This is
+ * translated into an ARP-ND ECOM.
+ */
+#define ATTR_ES_PROXY_ADVERT (1 << 0)
+ /* Destination ES is present locally. This flag is set on local
+ * paths and sync paths
+ */
+#define ATTR_ES_IS_LOCAL (1 << 1)
+ /* There are one or more non-best paths from ES peers. Note that
+ * this flag is only set on the local MAC-IP paths in the VNI
+ * route table (not set in the global routing table). And only
+ * non-proxy advertisements from an ES peer can result in this
+ * flag being set.
+ */
+#define ATTR_ES_PEER_ACTIVE (1 << 2)
+ /* There are one or more non-best proxy paths from ES peers */
+#define ATTR_ES_PEER_PROXY (1 << 3)
+ /* An ES peer has router bit set - only applicable if
+ * ATTR_ES_PEER_ACTIVE is set
+ */
+#define ATTR_ES_PEER_ROUTER (1 << 4)
+
/* route tag */
route_tag_t tag;
@@ -241,6 +265,13 @@ struct attr {
/* EVPN MAC Mobility sequence number, if any. */
uint32_t mm_seqnum;
+ /* highest MM sequence number rxed in a MAC-IP route from an
+ * ES peer (this includes both proxy and non-proxy MAC-IP
+ * advertisements from ES peers).
+ * This is only applicable to local paths in the VNI routing
+ * table and derived from other imported/non-best paths.
+ */
+ uint32_t mm_sync_seqnum;
/* EVPN local router-mac */
struct ethaddr rmac;
@@ -253,6 +284,9 @@ struct attr {
/* Link bandwidth value, if any. */
uint32_t link_bw;
+
+ /* EVPN ES */
+ esi_t esi;
};
/* rmap_change_flags definition */
diff --git a/bgpd/bgp_attr_evpn.c b/bgpd/bgp_attr_evpn.c
index 65072088ae..aa0c59f3a7 100644
--- a/bgpd/bgp_attr_evpn.c
+++ b/bgpd/bgp_attr_evpn.c
@@ -54,47 +54,27 @@ void bgp_add_routermac_ecom(struct attr *attr, struct ethaddr *routermac)
* format accepted: AA:BB:CC:DD:EE:FF:GG:HH:II:JJ
* if id is null, check only is done
*/
-bool str2esi(const char *str, struct eth_segment_id *id)
+bool str2esi(const char *str, esi_t *id)
{
- unsigned int a[ESI_LEN];
+ unsigned int a[ESI_BYTES];
int i;
if (!str)
return false;
if (sscanf(str, "%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x", a + 0, a + 1,
a + 2, a + 3, a + 4, a + 5, a + 6, a + 7, a + 8, a + 9)
- != ESI_LEN) {
+ != ESI_BYTES) {
/* error in incoming str length */
return false;
}
/* valid mac address */
if (!id)
return true;
- for (i = 0; i < ESI_LEN; ++i)
+ for (i = 0; i < ESI_BYTES; ++i)
id->val[i] = a[i] & 0xff;
return true;
}
-char *esi2str(struct eth_segment_id *id)
-{
- char *ptr;
- uint8_t *val;
-
- if (!id)
- return NULL;
-
- val = id->val;
- ptr = XMALLOC(MTYPE_TMP,
- (ESI_LEN * 2 + ESI_LEN - 1 + 1) * sizeof(char));
-
- snprintf(ptr, (ESI_LEN * 2 + ESI_LEN - 1 + 1),
- "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", val[0],
- val[1], val[2], val[3], val[4], val[5], val[6], val[7], val[8],
- val[9]);
-
- return ptr;
-}
-
char *ecom_mac2str(char *ecom_mac)
{
char *en;
@@ -215,7 +195,8 @@ uint32_t bgp_attr_mac_mobility_seqnum(struct attr *attr, uint8_t *sticky)
/*
* return true if attr contains router flag extended community
*/
-void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag)
+void bgp_attr_evpn_na_flag(struct attr *attr,
+ uint8_t *router_flag, bool *proxy)
{
struct ecommunity *ecom;
int i;
@@ -237,10 +218,14 @@ void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag)
if (type == ECOMMUNITY_ENCODE_EVPN &&
sub_type == ECOMMUNITY_EVPN_SUBTYPE_ND) {
val = *pnt++;
- if (val & ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG) {
+
+ if (val & ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG)
*router_flag = 1;
- break;
- }
+
+ if (val & ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG)
+ *proxy = true;
+
+ break;
}
}
}
@@ -292,14 +277,3 @@ extern bool is_zero_gw_ip(const union gw_addr *gw_ip, const afi_t afi)
return false;
}
-
-extern bool is_zero_esi(const struct eth_segment_id *esi)
-{
- int i;
-
- for (i = 0; i < ESI_LEN; i++)
- if (esi->val[i])
- return false;
-
- return true;
-}
diff --git a/bgpd/bgp_attr_evpn.h b/bgpd/bgp_attr_evpn.h
index c1bfd83765..19c028a826 100644
--- a/bgpd/bgp_attr_evpn.h
+++ b/bgpd/bgp_attr_evpn.h
@@ -21,38 +21,20 @@
#ifndef _QUAGGA_BGP_ATTR_EVPN_H
#define _QUAGGA_BGP_ATTR_EVPN_H
-/* value of first byte of ESI */
-#define ESI_TYPE_ARBITRARY 0 /* */
-#define ESI_TYPE_LACP 1 /* <> */
-#define ESI_TYPE_BRIDGE 2 /* <Root bridge Mac-6B>:<Root Br Priority-2B>:00 */
-#define ESI_TYPE_MAC 3 /* <Syst Mac Add-6B>:<Local Discriminator Value-3B> */
-#define ESI_TYPE_ROUTER 4 /* <RouterId-4B>:<Local Discriminator Value-4B> */
-#define ESI_TYPE_AS 5 /* <AS-4B>:<Local Discriminator Value-4B> */
-
-#define MAX_ESI {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}
-#define ESI_LEN 10
-
#define MAX_ET 0xffffffff
struct attr;
-/* EVPN ESI */
-struct eth_segment_id {
- uint8_t val[ESI_LEN];
-};
-
union gw_addr {
struct in_addr ipv4;
struct in6_addr ipv6;
};
struct bgp_route_evpn {
- struct eth_segment_id eth_s_id;
union gw_addr gw_ip;
};
-extern bool str2esi(const char *str, struct eth_segment_id *id);
-extern char *esi2str(struct eth_segment_id *id);
+extern bool str2esi(const char *str, esi_t *id);
extern char *ecom_mac2str(char *ecom_mac);
extern void bgp_add_routermac_ecom(struct attr *attr,
@@ -64,9 +46,9 @@ extern uint32_t bgp_attr_mac_mobility_seqnum(struct attr *attr,
uint8_t *sticky);
extern uint8_t bgp_attr_default_gw(struct attr *attr);
-extern void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag);
+extern void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag,
+ bool *proxy);
extern bool is_zero_gw_ip(const union gw_addr *gw_ip, afi_t afi);
-extern bool is_zero_esi(const struct eth_segment_id *esi);
#endif /* _QUAGGA_BGP_ATTR_EVPN_H */
diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c
index 9f32d450b9..255a7f238b 100644
--- a/bgpd/bgp_debug.c
+++ b/bgpd/bgp_debug.c
@@ -64,6 +64,7 @@ unsigned long conf_bgp_debug_flowspec;
unsigned long conf_bgp_debug_labelpool;
unsigned long conf_bgp_debug_pbr;
unsigned long conf_bgp_debug_graceful_restart;
+unsigned long conf_bgp_debug_evpn_mh;
unsigned long term_bgp_debug_as4;
unsigned long term_bgp_debug_neighbor_events;
@@ -82,6 +83,7 @@ unsigned long term_bgp_debug_flowspec;
unsigned long term_bgp_debug_labelpool;
unsigned long term_bgp_debug_pbr;
unsigned long term_bgp_debug_graceful_restart;
+unsigned long term_bgp_debug_evpn_mh;
struct list *bgp_debug_neighbor_events_peers = NULL;
struct list *bgp_debug_keepalive_peers = NULL;
@@ -2006,6 +2008,57 @@ DEFUN (no_debug_bgp_pbr,
return CMD_SUCCESS;
}
+DEFPY (debug_bgp_evpn_mh,
+ debug_bgp_evpn_mh_cmd,
+ "[no$no] debug bgp evpn mh <es$es|route$rt>",
+ NO_STR
+ DEBUG_STR
+ BGP_STR
+ "EVPN\n"
+ "Multihoming\n"
+ "Ethernet Segment debugging\n"
+ "Route debugging\n")
+{
+ if (es) {
+ if (vty->node == CONFIG_NODE) {
+ if (no)
+ DEBUG_OFF(evpn_mh, EVPN_MH_ES);
+ else
+ DEBUG_ON(evpn_mh, EVPN_MH_ES);
+ } else {
+ if (no) {
+ TERM_DEBUG_OFF(evpn_mh, EVPN_MH_ES);
+ vty_out(vty,
+ "BGP EVPN-MH ES debugging is off\n");
+ } else {
+ TERM_DEBUG_ON(evpn_mh, EVPN_MH_ES);
+ vty_out(vty,
+ "BGP EVPN-MH ES debugging is on\n");
+ }
+ }
+ }
+ if (rt) {
+ if (vty->node == CONFIG_NODE) {
+ if (no)
+ DEBUG_OFF(evpn_mh, EVPN_MH_RT);
+ else
+ DEBUG_ON(evpn_mh, EVPN_MH_RT);
+ } else {
+ if (no) {
+ TERM_DEBUG_OFF(evpn_mh, EVPN_MH_RT);
+ vty_out(vty,
+ "BGP EVPN-MH route debugging is off\n");
+ } else {
+ TERM_DEBUG_ON(evpn_mh, EVPN_MH_RT);
+ vty_out(vty,
+ "BGP EVPN-MH route debugging is on\n");
+ }
+ }
+ }
+
+ return CMD_SUCCESS;
+}
+
DEFUN (debug_bgp_labelpool,
debug_bgp_labelpool_cmd,
"debug bgp labelpool",
@@ -2085,6 +2138,8 @@ DEFUN (no_debug_bgp,
TERM_DEBUG_OFF(pbr, PBR);
TERM_DEBUG_OFF(pbr, PBR_ERROR);
TERM_DEBUG_OFF(graceful_restart, GRACEFUL_RESTART);
+ TERM_DEBUG_OFF(evpn_mh, EVPN_MH_ES);
+ TERM_DEBUG_OFF(evpn_mh, EVPN_MH_RT);
vty_out(vty, "All possible debugging has been turned off\n");
@@ -2169,6 +2224,11 @@ DEFUN_NOSH (show_debugging_bgp,
if (BGP_DEBUG(pbr, PBR_ERROR))
vty_out(vty, " BGP policy based routing error debugging is on\n");
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ vty_out(vty, " BGP EVPN-MH ES debugging is on\n");
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ vty_out(vty, " BGP EVPN-MH route debugging is on\n");
+
vty_out(vty, "\n");
return CMD_SUCCESS;
}
@@ -2284,6 +2344,16 @@ static int bgp_config_write_debug(struct vty *vty)
vty_out(vty, "debug bgp graceful-restart\n");
write++;
}
+
+ if (CONF_BGP_DEBUG(evpn_mh, EVPN_MH_ES)) {
+ vty_out(vty, "debug bgp evpn mh es\n");
+ write++;
+ }
+ if (CONF_BGP_DEBUG(evpn_mh, EVPN_MH_RT)) {
+ vty_out(vty, "debug bgp evpn mh route\n");
+ write++;
+ }
+
return write;
}
@@ -2410,6 +2480,8 @@ void bgp_debug_init(void)
install_element(ENABLE_NODE, &no_debug_bgp_pbr_cmd);
install_element(CONFIG_NODE, &no_debug_bgp_pbr_cmd);
+ install_element(ENABLE_NODE, &debug_bgp_evpn_mh_cmd);
+ install_element(CONFIG_NODE, &debug_bgp_evpn_mh_cmd);
}
/* Return true if this prefix is on the per_prefix_list of prefixes to debug
diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h
index 69f25566a9..f16cfee4f2 100644
--- a/bgpd/bgp_debug.h
+++ b/bgpd/bgp_debug.h
@@ -77,6 +77,7 @@ extern unsigned long conf_bgp_debug_flowspec;
extern unsigned long conf_bgp_debug_labelpool;
extern unsigned long conf_bgp_debug_pbr;
extern unsigned long conf_bgp_debug_graceful_restart;
+extern unsigned long conf_bgp_debug_evpn_mh;
extern unsigned long term_bgp_debug_as4;
extern unsigned long term_bgp_debug_neighbor_events;
@@ -93,6 +94,7 @@ extern unsigned long term_bgp_debug_flowspec;
extern unsigned long term_bgp_debug_labelpool;
extern unsigned long term_bgp_debug_pbr;
extern unsigned long term_bgp_debug_graceful_restart;
+extern unsigned long term_bgp_debug_evpn_mh;
extern struct list *bgp_debug_neighbor_events_peers;
extern struct list *bgp_debug_keepalive_peers;
@@ -129,6 +131,8 @@ struct bgp_debug_filter {
#define BGP_DEBUG_LABELPOOL 0x01
#define BGP_DEBUG_PBR 0x01
#define BGP_DEBUG_PBR_ERROR 0x02
+#define BGP_DEBUG_EVPN_MH_ES 0x01
+#define BGP_DEBUG_EVPN_MH_RT 0x02
#define BGP_DEBUG_PACKET_SEND 0x01
#define BGP_DEBUG_PACKET_SEND_DETAIL 0x02
diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c
index d6c311bfa0..f2aac3646c 100644
--- a/bgpd/bgp_ecommunity.c
+++ b/bgpd/bgp_ecommunity.c
@@ -810,6 +810,35 @@ char *ecommunity_ecom2str(struct ecommunity *ecom, int format, int filter)
ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG))
strlcpy(encbuf, "ND:Router Flag",
sizeof(encbuf));
+ if (CHECK_FLAG(
+ flags,
+ ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG))
+ strlcpy(encbuf, "ND:Proxy",
+ sizeof(encbuf));
+ } else if (*pnt
+ == ECOMMUNITY_EVPN_SUBTYPE_ES_IMPORT_RT) {
+ struct ethaddr mac;
+
+ pnt++;
+ memcpy(&mac, pnt, ETH_ALEN);
+ snprintf(encbuf,
+ sizeof(encbuf),
+ "ES-Import-Rt:%02x:%02x:%02x:%02x:%02x:%02x",
+ (uint8_t)mac.octet[0],
+ (uint8_t)mac.octet[1],
+ (uint8_t)mac.octet[2],
+ (uint8_t)mac.octet[3],
+ (uint8_t)mac.octet[4],
+ (uint8_t)mac.octet[5]);
+ } else if (*pnt
+ == ECOMMUNITY_EVPN_SUBTYPE_ESI_LABEL) {
+ uint8_t flags = *++pnt;
+
+ snprintf(encbuf,
+ sizeof(encbuf), "ESI-label-Rt:%s",
+ (flags &
+ ECOMMUNITY_EVPN_SUBTYPE_ESI_SA_FLAG) ?
+ "SA":"AA");
} else
unk_ecom = 1;
} else if (type == ECOMMUNITY_ENCODE_REDIRECT_IP_NH) {
@@ -865,21 +894,6 @@ char *ecommunity_ecom2str(struct ecommunity *ecom, int format, int filter)
} else if (sub_type == ECOMMUNITY_TRAFFIC_MARKING) {
snprintf(encbuf, sizeof(encbuf),
"FS:marking %u", *(pnt + 5));
- } else if (*pnt
- == ECOMMUNITY_EVPN_SUBTYPE_ES_IMPORT_RT) {
- struct ethaddr mac;
-
- memcpy(&mac, pnt, ETH_ALEN);
-
- snprintf(
- encbuf, sizeof(encbuf),
- "ES-Import-Rt:%02x:%02x:%02x:%02x:%02x:%02x",
- (uint8_t)mac.octet[0],
- (uint8_t)mac.octet[1],
- (uint8_t)mac.octet[2],
- (uint8_t)mac.octet[3],
- (uint8_t)mac.octet[4],
- (uint8_t)mac.octet[5]);
} else
unk_ecom = 1;
} else if (type == ECOMMUNITY_ENCODE_AS_NON_TRANS) {
diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h
index 7deae8e746..812bcc46e7 100644
--- a/bgpd/bgp_ecommunity.h
+++ b/bgpd/bgp_ecommunity.h
@@ -73,8 +73,12 @@
#define ECOMMUNITY_EVPN_SUBTYPE_ND 0x08
#define ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY_FLAG_STICKY 0x01
-#define ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG 0x01
-#define ECOMMUNITY_EVPN_SUBTYPE_ND_OVERRIDE_FLAG 0x02
+
+#define ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG 0x01
+#define ECOMMUNITY_EVPN_SUBTYPE_ND_OVERRIDE_FLAG 0x02
+#define ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG 0x04
+
+#define ECOMMUNITY_EVPN_SUBTYPE_ESI_SA_FLAG (1 << 0) /* single-active */
/* Low-order octet of the Extended Communities type field for OPAQUE types */
#define ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP 0x0c
diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c
index 8c5d6421f1..4a5d5c3b6e 100644
--- a/bgpd/bgp_evpn.c
+++ b/bgpd/bgp_evpn.c
@@ -40,6 +40,7 @@
#include "bgpd/bgp_label.h"
#include "bgpd/bgp_evpn.h"
#include "bgpd/bgp_evpn_private.h"
+#include "bgpd/bgp_evpn_mh.h"
#include "bgpd/bgp_ecommunity.h"
#include "bgpd/bgp_encap_types.h"
#include "bgpd/bgp_debug.h"
@@ -54,61 +55,24 @@
/*
* Definitions and external declarations.
*/
-extern struct zclient *zclient;
-
DEFINE_QOBJ_TYPE(bgpevpn)
-DEFINE_QOBJ_TYPE(evpnes)
+DEFINE_QOBJ_TYPE(bgp_evpn_es)
/*
* Static function declarations
*/
-static void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi,
- struct bgp_dest *dest,
- struct bgp_path_info **pi);
static int delete_all_vni_routes(struct bgp *bgp, struct bgpevpn *vpn);
+static void bgp_evpn_update_type2_route_entry(struct bgp *bgp,
+ struct bgpevpn *vpn,
+ struct bgp_node *rn, struct bgp_path_info *local_pi,
+ const char *caller);
+static struct in_addr zero_vtep_ip;
/*
* Private functions.
*/
-/* compare two IPV4 VTEP IPs */
-static int evpn_vtep_ip_cmp(void *p1, void *p2)
-{
- const struct in_addr *ip1 = p1;
- const struct in_addr *ip2 = p2;
-
- return ip1->s_addr - ip2->s_addr;
-}
-
-/*
- * Make hash key for ESI.
- */
-static unsigned int esi_hash_keymake(const void *p)
-{
- const struct evpnes *pes = p;
- const void *pnt = (void *)pes->esi.val;
-
- return jhash(pnt, ESI_BYTES, 0xa5a5a55a);
-}
-
-/*
- * Compare two ESIs.
- */
-static bool esi_cmp(const void *p1, const void *p2)
-{
- const struct evpnes *pes1 = p1;
- const struct evpnes *pes2 = p2;
-
- if (pes1 == NULL && pes2 == NULL)
- return true;
-
- if (pes1 == NULL || pes2 == NULL)
- return false;
-
- return (memcmp(pes1->esi.val, pes2->esi.val, ESI_BYTES) == 0);
-}
-
/*
* Make vni hash key.
*/
@@ -133,7 +97,7 @@ static bool vni_hash_cmp(const void *p1, const void *p2)
return (vpn1->vni == vpn2->vni);
}
-static int vni_list_cmp(void *p1, void *p2)
+int vni_list_cmp(void *p1, void *p2)
{
const struct bgpevpn *vpn1 = p1;
const struct bgpevpn *vpn2 = p2;
@@ -579,19 +543,54 @@ static void evpn_convert_nexthop_to_ipv6(struct attr *attr)
attr->mp_nexthop_len = IPV6_MAX_BYTELEN;
}
+struct bgp_node *bgp_global_evpn_node_get(
+ struct bgp_table *table, afi_t afi,
+ safi_t safi, const struct prefix_evpn *evp,
+ struct prefix_rd *prd)
+{
+ struct prefix_evpn global_p;
+
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) {
+ /* prefix in the global table doesn't include the VTEP-IP so
+ * we need to create a different copy of the prefix
+ */
+ evpn_type1_prefix_global_copy(&global_p, evp);
+ evp = &global_p;
+ }
+ return bgp_afi_node_get(table, afi, safi, (struct prefix *)evp, prd);
+}
+
+struct bgp_node *bgp_global_evpn_node_lookup(
+ struct bgp_table *table, afi_t afi,
+ safi_t safi, const struct prefix_evpn *evp,
+ struct prefix_rd *prd)
+{
+ struct prefix_evpn global_p;
+
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) {
+ /* prefix in the global table doesn't include the VTEP-IP so
+ * we need to create a different copy of the prefix
+ */
+ evpn_type1_prefix_global_copy(&global_p, evp);
+ evp = &global_p;
+ }
+ return bgp_afi_node_lookup(table, afi, safi, (struct prefix *)evp, prd);
+}
+
/*
* Add (update) or delete MACIP from zebra.
*/
static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn,
const struct prefix_evpn *p,
struct in_addr remote_vtep_ip, int add,
- uint8_t flags, uint32_t seq)
+ uint8_t flags, uint32_t seq, esi_t *esi)
{
struct stream *s;
int ipa_len;
char buf1[ETHER_ADDR_STRLEN];
char buf2[INET6_ADDRSTRLEN];
char buf3[INET6_ADDRSTRLEN];
+ static struct in_addr zero_remote_vtep_ip;
/* Check socket. */
if (!zclient || zclient->sock < 0)
@@ -605,6 +604,9 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn,
__func__);
return 0;
}
+
+ if (!esi)
+ esi = zero_esi;
s = zclient->obuf;
stream_reset(s);
@@ -622,13 +624,20 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn,
stream_putl(s, ipa_len);
stream_put(s, &p->prefix.macip_addr.ip.ip.addr, ipa_len);
}
- stream_put_in_addr(s, &remote_vtep_ip);
+ /* If the ESI is valid that becomes the nexthop; tape out the
+ * VTEP-IP for that case
+ */
+ if (bgp_evpn_is_esi_valid(esi))
+ stream_put_in_addr(s, &zero_remote_vtep_ip);
+ else
+ stream_put_in_addr(s, &remote_vtep_ip);
/* TX flags - MAC sticky status and/or gateway mac */
/* Also TX the sequence number of the best route. */
if (add) {
stream_putc(s, flags);
stream_putl(s, seq);
+ stream_put(s, esi, sizeof(esi_t));
}
stream_putw_at(s, 0, stream_get_endp(s));
@@ -698,40 +707,6 @@ static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn,
}
/*
- * Build extended community for EVPN ES (type-4) route
- */
-static void build_evpn_type4_route_extcomm(struct evpnes *es,
- struct attr *attr)
-{
- struct ecommunity ecom_encap;
- struct ecommunity ecom_es_rt;
- struct ecommunity_val eval;
- struct ecommunity_val eval_es_rt;
- bgp_encap_types tnl_type;
- struct ethaddr mac;
-
- /* Encap */
- tnl_type = BGP_ENCAP_TYPE_VXLAN;
- memset(&ecom_encap, 0, sizeof(ecom_encap));
- encode_encap_extcomm(tnl_type, &eval);
- ecom_encap.size = 1;
- ecom_encap.val = (uint8_t *)eval.val;
- attr->ecommunity = ecommunity_dup(&ecom_encap);
-
- /* ES import RT */
- memset(&mac, 0, sizeof(struct ethaddr));
- memset(&ecom_es_rt, 0, sizeof(ecom_es_rt));
- es_get_system_mac(&es->esi, &mac);
- encode_es_rt_extcomm(&eval_es_rt, &mac);
- ecom_es_rt.size = 1;
- ecom_es_rt.val = (uint8_t *)eval_es_rt.val;
- attr->ecommunity =
- ecommunity_merge(attr->ecommunity, &ecom_es_rt);
-
- attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES);
-}
-
-/*
* Build extended communities for EVPN prefix route.
*/
static void build_evpn_type5_route_extcomm(struct bgp *bgp_vrf,
@@ -800,6 +775,7 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr,
struct ecommunity_val eval_default_gw;
struct ecommunity_val eval_rmac;
struct ecommunity_val eval_na;
+ bool proxy;
bgp_encap_types tnl_type;
struct listnode *node, *nnode;
@@ -861,9 +837,10 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr,
ecommunity_merge(attr->ecommunity, &ecom_default_gw);
}
- if (attr->router_flag) {
+ proxy = !!(attr->es_flags & ATTR_ES_PROXY_ADVERT);
+ if (attr->router_flag || proxy) {
memset(&ecom_na, 0, sizeof(ecom_na));
- encode_na_flag_extcomm(&eval_na, attr->router_flag);
+ encode_na_flag_extcomm(&eval_na, attr->router_flag, proxy);
ecom_na.size = 1;
ecom_na.val = (uint8_t *)eval_na.val;
attr->ecommunity = ecommunity_merge(attr->ecommunity,
@@ -934,19 +911,60 @@ static int evpn_zebra_install(struct bgp *bgp, struct bgpevpn *vpn,
int ret;
uint8_t flags;
int flood_control;
+ uint32_t seq;
if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) {
flags = 0;
- if (pi->attr->sticky)
- SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY);
- if (pi->attr->default_gw)
- SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW);
- if (is_evpn_prefix_ipaddr_v6(p) &&
- pi->attr->router_flag)
- SET_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG);
+
+ if (pi->sub_type == BGP_ROUTE_IMPORTED) {
+ if (pi->attr->sticky)
+ SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY);
+ if (pi->attr->default_gw)
+ SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW);
+ if (is_evpn_prefix_ipaddr_v6(p) &&
+ pi->attr->router_flag)
+ SET_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG);
+
+ seq = mac_mobility_seqnum(pi->attr);
+ /* if local ES notify zebra that this is a sync path */
+ if (bgp_evpn_attr_is_local_es(pi->attr)) {
+ SET_FLAG(flags, ZEBRA_MACIP_TYPE_SYNC_PATH);
+ if (bgp_evpn_attr_is_proxy(pi->attr))
+ SET_FLAG(flags,
+ ZEBRA_MACIP_TYPE_PROXY_ADVERT);
+ }
+ } else {
+ if (!bgp_evpn_attr_is_sync(pi->attr))
+ return 0;
+
+ /* if a local path is being turned around and sent
+ * to zebra it is because it is a sync path on
+ * a local ES
+ */
+ SET_FLAG(flags, ZEBRA_MACIP_TYPE_SYNC_PATH);
+ /* supply the highest peer seq number to zebra
+ * for MM seq syncing
+ */
+ seq = bgp_evpn_attr_get_sync_seq(pi->attr);
+ /* if any of the paths from the peer have the ROUTER
+ * flag set install the local entry as a router entry
+ */
+ if (is_evpn_prefix_ipaddr_v6(p) &&
+ (pi->attr->es_flags &
+ ATTR_ES_PEER_ROUTER))
+ SET_FLAG(flags,
+ ZEBRA_MACIP_TYPE_ROUTER_FLAG);
+
+ if (!(pi->attr->es_flags & ATTR_ES_PEER_ACTIVE))
+ SET_FLAG(flags,
+ ZEBRA_MACIP_TYPE_PROXY_ADVERT);
+ }
+
ret = bgp_zebra_send_remote_macip(
- bgp, vpn, p, pi->attr->nexthop, 1, flags,
- mac_mobility_seqnum(pi->attr));
+ bgp, vpn, p, pi->attr->nexthop, 1, flags,
+ seq, bgp_evpn_attr_get_esi(pi->attr));
+ } else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) {
+ ret = bgp_evpn_remote_es_evi_add(bgp, vpn, p);
} else {
switch (pi->attr->pmsi_tnl_type) {
case PMSI_TNLTYPE_INGR_REPL:
@@ -976,7 +994,9 @@ static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn,
if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)
ret = bgp_zebra_send_remote_macip(bgp, vpn, p, remote_vtep_ip,
- 0, 0, 0);
+ 0, 0, 0, NULL);
+ else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE)
+ ret = bgp_evpn_remote_es_evi_del(bgp, vpn, p);
else
ret = bgp_zebra_send_remote_vtep(bgp, vpn, p,
VXLAN_FLOOD_DISABLED, 0);
@@ -991,19 +1011,36 @@ static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn,
*/
static void evpn_delete_old_local_route(struct bgp *bgp, struct bgpevpn *vpn,
struct bgp_dest *dest,
- struct bgp_path_info *old_local)
+ struct bgp_path_info *old_local,
+ struct bgp_path_info *new_select)
{
struct bgp_dest *global_dest;
struct bgp_path_info *pi;
afi_t afi = AFI_L2VPN;
safi_t safi = SAFI_EVPN;
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) {
+ char prefix_buf[PREFIX_STRLEN];
+ char esi_buf[ESI_STR_LEN];
+ char esi_buf2[ESI_STR_LEN];
+ struct prefix_evpn *evp = (struct prefix_evpn *)&dest->p;
+
+ zlog_debug("local path deleted %s es %s; new-path-es %s",
+ prefix2str(evp,
+ prefix_buf, sizeof(prefix_buf)),
+ esi_to_str(&old_local->attr->esi,
+ esi_buf, sizeof(esi_buf)),
+ new_select ? esi_to_str(&new_select->attr->esi,
+ esi_buf2, sizeof(esi_buf2)) : "");
+ }
+
/* Locate route node in the global EVPN routing table. Note that
* this table is a 2-level tree (RD-level + Prefix-level) similar to
* L3VPN routes.
*/
- global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi,
- bgp_dest_get_prefix(dest), &vpn->prd);
+ global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)bgp_dest_get_prefix(dest),
+ &vpn->prd);
if (global_dest) {
/* Delete route entry in the global EVPN table. */
delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi);
@@ -1020,172 +1057,12 @@ static void evpn_delete_old_local_route(struct bgp *bgp, struct bgpevpn *vpn,
bgp_path_info_delete(dest, old_local);
}
-static struct in_addr *es_vtep_new(struct in_addr vtep)
-{
- struct in_addr *ip;
-
- ip = XCALLOC(MTYPE_BGP_EVPN_ES_VTEP, sizeof(struct in_addr));
-
- ip->s_addr = vtep.s_addr;
- return ip;
-}
-
-static void es_vtep_free(struct in_addr *ip)
-{
- XFREE(MTYPE_BGP_EVPN_ES_VTEP, ip);
-}
-
-/* check if VTEP is already part of the list */
-static int is_vtep_present_in_list(struct list *list,
- struct in_addr vtep)
-{
- struct listnode *node = NULL;
- struct in_addr *tmp;
-
- for (ALL_LIST_ELEMENTS_RO(list, node, tmp)) {
- if (tmp->s_addr == vtep.s_addr)
- return 1;
- }
- return 0;
-}
-
-/*
- * Best path for ES route was changed,
- * update the list of VTEPs for this ES
- */
-static int evpn_es_install_vtep(struct bgp *bgp, struct evpnes *es,
- const struct prefix_evpn *p,
- struct in_addr rvtep)
-{
- struct in_addr *vtep_ip;
-
- if (is_vtep_present_in_list(es->vtep_list, rvtep))
- return 0;
-
-
- vtep_ip = es_vtep_new(rvtep);
- if (vtep_ip)
- listnode_add_sort(es->vtep_list, vtep_ip);
- return 0;
-}
-
-/*
- * Best path for ES route was changed,
- * update the list of VTEPs for this ES
- */
-static int evpn_es_uninstall_vtep(struct bgp *bgp,
- struct evpnes *es,
- struct prefix_evpn *p,
- struct in_addr rvtep)
-{
- struct listnode *node, *nnode, *node_to_del = NULL;
- struct in_addr *tmp;
-
- for (ALL_LIST_ELEMENTS(es->vtep_list, node, nnode, tmp)) {
- if (tmp->s_addr == rvtep.s_addr) {
- es_vtep_free(tmp);
- node_to_del = node;
- }
- }
-
- if (node_to_del)
- list_delete_node(es->vtep_list, node_to_del);
-
- return 0;
-}
-
-/*
- * Calculate the best path for a ES(type-4) route.
- */
-static int evpn_es_route_select_install(struct bgp *bgp, struct evpnes *es,
- struct bgp_dest *dest)
-{
- int ret = 0;
- afi_t afi = AFI_L2VPN;
- safi_t safi = SAFI_EVPN;
- struct bgp_path_info *old_select; /* old best */
- struct bgp_path_info *new_select; /* new best */
- struct bgp_path_info_pair old_and_new;
-
- /* Compute the best path. */
- bgp_best_selection(bgp, dest, &bgp->maxpaths[afi][safi], &old_and_new,
- afi, safi);
- old_select = old_and_new.old;
- new_select = old_and_new.new;
-
- /*
- * If the best path hasn't changed - see if something needs to be
- * updated
- */
- if (old_select && old_select == new_select
- && old_select->type == ZEBRA_ROUTE_BGP
- && old_select->sub_type == BGP_ROUTE_IMPORTED
- && !CHECK_FLAG(dest->flags, BGP_NODE_USER_CLEAR)
- && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED)
- && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) {
- if (bgp_zebra_has_route_changed(old_select)) {
- ret = evpn_es_install_vtep(
- bgp, es,
- (const struct prefix_evpn *)bgp_dest_get_prefix(
- dest),
- old_select->attr->nexthop);
- }
- UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG);
- UNSET_FLAG(old_select->flags, BGP_PATH_LINK_BW_CHG);
- bgp_zebra_clear_route_change_flags(dest);
- return ret;
- }
-
- /* If the user did a "clear" this flag will be set */
- UNSET_FLAG(dest->flags, BGP_NODE_USER_CLEAR);
-
- /*
- * bestpath has changed; update relevant fields and install or uninstall
- * into the zebra RIB.
- */
- if (old_select || new_select)
- bgp_bump_version(dest);
-
- if (old_select)
- bgp_path_info_unset_flag(dest, old_select, BGP_PATH_SELECTED);
- if (new_select) {
- bgp_path_info_set_flag(dest, new_select, BGP_PATH_SELECTED);
- bgp_path_info_unset_flag(dest, new_select,
- BGP_PATH_ATTR_CHANGED);
- UNSET_FLAG(new_select->flags, BGP_PATH_MULTIPATH_CHG);
- UNSET_FLAG(new_select->flags, BGP_PATH_LINK_BW_CHG);
- }
-
- if (new_select && new_select->type == ZEBRA_ROUTE_BGP
- && new_select->sub_type == BGP_ROUTE_IMPORTED) {
- ret = evpn_es_install_vtep(
- bgp, es,
- (const struct prefix_evpn *)bgp_dest_get_prefix(dest),
- new_select->attr->nexthop);
- } else {
- if (old_select && old_select->type == ZEBRA_ROUTE_BGP
- && old_select->sub_type == BGP_ROUTE_IMPORTED)
- ret = evpn_es_uninstall_vtep(
- bgp, es,
- (struct prefix_evpn *)bgp_dest_get_prefix(dest),
- old_select->attr->nexthop);
- }
-
- /* Clear any route change flags. */
- bgp_zebra_clear_route_change_flags(dest);
-
- /* Reap old select bgp_path_info, if it has been removed */
- if (old_select && CHECK_FLAG(old_select->flags, BGP_PATH_REMOVED))
- bgp_path_info_reap(dest, old_select);
-
- return ret;
-}
-
/*
* Calculate the best path for an EVPN route. Install/update best path in zebra,
* if appropriate.
+ * Note: vpn is NULL for local EAD-ES routes.
*/
-static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
+int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
struct bgp_dest *dest)
{
struct bgp_path_info *old_select, *new_select;
@@ -1201,12 +1078,15 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
new_select = old_and_new.new;
/* If the best path hasn't changed - see if there is still something to
- * update
- * to zebra RIB.
+ * update to zebra RIB.
+ * Remote routes and SYNC route (i.e. local routes with
+ * SYNCED_FROM_PEER flag) need to updated to zebra on any attr
+ * change.
*/
if (old_select && old_select == new_select
&& old_select->type == ZEBRA_ROUTE_BGP
- && old_select->sub_type == BGP_ROUTE_IMPORTED
+ && (old_select->sub_type == BGP_ROUTE_IMPORTED ||
+ bgp_evpn_attr_is_sync(old_select->attr))
&& !CHECK_FLAG(dest->flags, BGP_NODE_USER_CLEAR)
&& !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED)
&& !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) {
@@ -1241,8 +1121,12 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
UNSET_FLAG(new_select->flags, BGP_PATH_LINK_BW_CHG);
}
+ /* a local entry with the SYNC flag also results in a MAC-IP update
+ * to zebra
+ */
if (new_select && new_select->type == ZEBRA_ROUTE_BGP
- && new_select->sub_type == BGP_ROUTE_IMPORTED) {
+ && (new_select->sub_type == BGP_ROUTE_IMPORTED ||
+ bgp_evpn_attr_is_sync(new_select->attr))) {
ret = evpn_zebra_install(
bgp, vpn,
(struct prefix_evpn *)bgp_dest_get_prefix(dest),
@@ -1255,10 +1139,13 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
* need to do an implicit delete and withdraw that route from
* peers.
*/
- if (old_select && old_select->peer == bgp->peer_self
- && old_select->type == ZEBRA_ROUTE_BGP
- && old_select->sub_type == BGP_ROUTE_STATIC)
- evpn_delete_old_local_route(bgp, vpn, dest, old_select);
+ if (new_select->sub_type == BGP_ROUTE_IMPORTED &&
+ old_select && old_select->peer == bgp->peer_self
+ && old_select->type == ZEBRA_ROUTE_BGP
+ && old_select->sub_type == BGP_ROUTE_STATIC
+ && vpn)
+ evpn_delete_old_local_route(bgp, vpn, dest,
+ old_select, new_select);
} else {
if (old_select && old_select->type == ZEBRA_ROUTE_BGP
&& old_select->sub_type == BGP_ROUTE_IMPORTED)
@@ -1279,222 +1166,21 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
return ret;
}
-/*
- * Return true if the local ri for this rn is of type gateway mac
- */
-static int evpn_route_is_def_gw(struct bgp *bgp, struct bgp_dest *dest)
-{
- struct bgp_path_info *tmp_pi = NULL;
- struct bgp_path_info *local_pi = NULL;
-
- local_pi = NULL;
- for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi;
- tmp_pi = tmp_pi->next) {
- if (tmp_pi->peer == bgp->peer_self
- && tmp_pi->type == ZEBRA_ROUTE_BGP
- && tmp_pi->sub_type == BGP_ROUTE_STATIC)
- local_pi = tmp_pi;
- }
-
- if (!local_pi)
- return 0;
-
- return local_pi->attr->default_gw;
-}
-
-
-/*
- * Return true if the local ri for this rn has sticky set
- */
-static int evpn_route_is_sticky(struct bgp *bgp, struct bgp_dest *dest)
+static struct bgp_path_info *bgp_evpn_route_get_local_path(
+ struct bgp *bgp, struct bgp_dest *dest)
{
struct bgp_path_info *tmp_pi;
- struct bgp_path_info *local_pi;
-
- local_pi = NULL;
- for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi;
- tmp_pi = tmp_pi->next) {
- if (tmp_pi->peer == bgp->peer_self
- && tmp_pi->type == ZEBRA_ROUTE_BGP
- && tmp_pi->sub_type == BGP_ROUTE_STATIC)
- local_pi = tmp_pi;
- }
-
- if (!local_pi)
- return 0;
-
- return local_pi->attr->sticky;
-}
-
-/*
- * create or update EVPN type4 route entry.
- * This could be in the ES table or the global table.
- * TODO: handle remote ES (type4) routes as well
- */
-static int update_evpn_type4_route_entry(struct bgp *bgp, struct evpnes *es,
- afi_t afi, safi_t safi,
- struct bgp_dest *dest,
- struct attr *attr, int add,
- struct bgp_path_info **ri,
- int *route_changed)
-{
- char buf[ESI_STR_LEN];
- char buf1[INET6_ADDRSTRLEN];
- struct bgp_path_info *tmp_pi = NULL;
- struct bgp_path_info *local_pi = NULL; /* local route entry if any */
- struct bgp_path_info *remote_pi = NULL; /* remote route entry if any */
- struct attr *attr_new = NULL;
- const struct prefix_evpn *evp = NULL;
-
- *ri = NULL;
- *route_changed = 1;
- evp = (const struct prefix_evpn *)bgp_dest_get_prefix(dest);
+ struct bgp_path_info *local_pi = NULL;
- /* locate the local and remote entries if any */
for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi;
- tmp_pi = tmp_pi->next) {
- if (tmp_pi->peer == bgp->peer_self
- && tmp_pi->type == ZEBRA_ROUTE_BGP
- && tmp_pi->sub_type == BGP_ROUTE_STATIC)
+ tmp_pi = tmp_pi->next) {
+ if (bgp_evpn_is_path_local(bgp, tmp_pi)) {
local_pi = tmp_pi;
- if (tmp_pi->type == ZEBRA_ROUTE_BGP
- && tmp_pi->sub_type == BGP_ROUTE_IMPORTED
- && CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID))
- remote_pi = tmp_pi;
- }
-
- /* we don't expect to see a remote_ri at this point.
- * An ES route has esi + vtep_ip as the key,
- * We shouldn't see the same route from any other vtep.
- */
- if (remote_pi) {
- flog_err(
- EC_BGP_ES_INVALID,
- "%u ERROR: local es route for ESI: %s Vtep %s also learnt from remote",
- bgp->vrf_id,
- esi_to_str(&evp->prefix.es_addr.esi, buf, sizeof(buf)),
- ipaddr2str(&es->originator_ip, buf1, sizeof(buf1)));
- return -1;
- }
-
- if (!local_pi && !add)
- return 0;
-
- /* create or update the entry */
- if (!local_pi) {
-
- /* Add or update attribute to hash */
- attr_new = bgp_attr_intern(attr);
-
- /* Create new route with its attribute. */
- tmp_pi = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0,
- bgp->peer_self, attr_new, dest);
- SET_FLAG(tmp_pi->flags, BGP_PATH_VALID);
-
- /* add the newly created path to the route-node */
- bgp_path_info_add(dest, tmp_pi);
- } else {
- tmp_pi = local_pi;
- if (attrhash_cmp(tmp_pi->attr, attr)
- && !CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED))
- *route_changed = 0;
- else {
- /* The attribute has changed.
- * Add (or update) attribute to hash. */
- attr_new = bgp_attr_intern(attr);
- bgp_path_info_set_flag(dest, tmp_pi,
- BGP_PATH_ATTR_CHANGED);
-
- /* Restore route, if needed. */
- if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED))
- bgp_path_info_restore(dest, tmp_pi);
-
- /* Unintern existing, set to new. */
- bgp_attr_unintern(&tmp_pi->attr);
- tmp_pi->attr = attr_new;
- tmp_pi->uptime = bgp_clock();
+ break;
}
}
- /* Return back the route entry. */
- *ri = tmp_pi;
- return 0;
-}
-
-/* update evpn es (type-4) route */
-static int update_evpn_type4_route(struct bgp *bgp,
- struct evpnes *es,
- struct prefix_evpn *p)
-{
- int ret = 0;
- int route_changed = 0;
- char buf[ESI_STR_LEN];
- char buf1[INET6_ADDRSTRLEN];
- afi_t afi = AFI_L2VPN;
- safi_t safi = SAFI_EVPN;
- struct attr attr;
- struct attr *attr_new = NULL;
- struct bgp_dest *dest = NULL;
- struct bgp_path_info *pi = NULL;
-
- memset(&attr, 0, sizeof(struct attr));
-
- /* Build path-attribute for this route. */
- bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
- attr.nexthop = es->originator_ip.ipaddr_v4;
- attr.mp_nexthop_global_in = es->originator_ip.ipaddr_v4;
- attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
-
- /* Set up extended community. */
- build_evpn_type4_route_extcomm(es, &attr);
-
- /* First, create (or fetch) route node within the ESI. */
- /* NOTE: There is no RD here. */
- dest = bgp_node_get(es->route_table, (struct prefix *)p);
-
- /* Create or update route entry. */
- ret = update_evpn_type4_route_entry(bgp, es, afi, safi, dest, &attr, 1,
- &pi, &route_changed);
- if (ret != 0) {
- flog_err(EC_BGP_ES_INVALID,
- "%u ERROR: Failed to updated ES route ESI: %s VTEP %s",
- bgp->vrf_id,
- esi_to_str(&p->prefix.es_addr.esi, buf, sizeof(buf)),
- ipaddr2str(&es->originator_ip, buf1, sizeof(buf1)));
- }
-
- assert(pi);
- attr_new = pi->attr;
-
- /* Perform route selection;
- * this is just to set the flags correctly
- * as local route in the ES always wins.
- */
- evpn_es_route_select_install(bgp, es, dest);
- bgp_dest_unlock_node(dest);
-
- /* If this is a new route or some attribute has changed, export the
- * route to the global table. The route will be advertised to peers
- * from there. Note that this table is a 2-level tree (RD-level +
- * Prefix-level) similar to L3VPN routes.
- */
- if (route_changed) {
- struct bgp_path_info *global_pi;
-
- dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)p, &es->prd);
- update_evpn_type4_route_entry(bgp, es, afi, safi, dest,
- attr_new, 1, &global_pi,
- &route_changed);
-
- /* Schedule for processing and unlock node. */
- bgp_process(bgp, dest, afi, safi);
- bgp_dest_unlock_node(dest);
- }
-
- /* Unintern temporary. */
- aspath_unintern(&attr.aspath);
- return 0;
+ return local_pi;
}
static int update_evpn_type5_route_entry(struct bgp *bgp_evpn,
@@ -1640,8 +1326,9 @@ static int update_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp,
build_evpn_type5_route_extcomm(bgp_vrf, &attr);
/* get the route node in global table */
- dest = bgp_afi_node_get(bgp_evpn->rib[afi][safi], afi, safi,
- (struct prefix *)evp, &bgp_vrf->vrf_prd);
+ dest = bgp_global_evpn_node_get(bgp_evpn->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)evp,
+ &bgp_vrf->vrf_prd);
assert(dest);
/* create or update the route entry within the route node */
@@ -1660,15 +1347,137 @@ static int update_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp,
return 0;
}
+static void bgp_evpn_get_sync_info(struct bgp *bgp, esi_t *esi,
+ struct bgp_node *rn, uint32_t loc_seq, uint32_t *max_sync_seq,
+ bool *active_on_peer, bool *peer_router,
+ bool *proxy_from_peer)
+{
+ struct bgp_path_info *tmp_pi;
+ struct bgp_path_info *second_best_path = NULL;
+ uint32_t tmp_mm_seq = 0;
+ esi_t *tmp_esi;
+ int paths_eq;
+
+ /* find the best non-local path. a local path can only be present
+ * as best path
+ */
+ for (tmp_pi = bgp_dest_get_bgp_path_info(rn); tmp_pi;
+ tmp_pi = tmp_pi->next) {
+ if (tmp_pi->sub_type != BGP_ROUTE_IMPORTED ||
+ !CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID))
+ continue;
+
+ if (bgp_evpn_path_info_cmp(bgp, tmp_pi,
+ second_best_path, &paths_eq))
+ second_best_path = tmp_pi;
+ }
+
+ if (!second_best_path)
+ return;
+
+ tmp_esi = bgp_evpn_attr_get_esi(second_best_path->attr);
+ /* if this has the same ES desination as the local path
+ * it is a sync path
+ */
+ if (!memcmp(esi, tmp_esi, sizeof(esi_t))) {
+ tmp_mm_seq = mac_mobility_seqnum(second_best_path->attr);
+ if (tmp_mm_seq < loc_seq)
+ return;
+
+ /* we have a non-proxy path from the ES peer. */
+ if (second_best_path->attr->es_flags &
+ ATTR_ES_PROXY_ADVERT) {
+ *proxy_from_peer = true;
+ } else {
+ *active_on_peer = true;
+ }
+
+ if (second_best_path->attr->router_flag)
+ *peer_router = true;
+
+ /* we use both proxy and non-proxy imports to
+ * determine the max sync sequence
+ */
+ if (tmp_mm_seq > *max_sync_seq)
+ *max_sync_seq = tmp_mm_seq;
+ }
+}
+
+/* Bubble up sync-info from all paths (non-best) to the local-path.
+ * This is need for MM sequence number syncing and proxy advertisement.
+ * Note: The local path can only exist as a best path in the
+ * VPN route table. It will take precedence over all sync paths.
+ */
+static void update_evpn_route_entry_sync_info(struct bgp *bgp,
+ struct bgp_node *rn, struct attr *attr, uint32_t loc_seq,
+ bool setup_sync)
+{
+ esi_t *esi;
+ struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
+
+ if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
+ return;
+
+ esi = bgp_evpn_attr_get_esi(attr);
+ if (bgp_evpn_is_esi_valid(esi)) {
+ if (setup_sync) {
+ uint32_t max_sync_seq = 0;
+ bool active_on_peer = false;
+ bool peer_router = false;
+ bool proxy_from_peer = false;
+
+ bgp_evpn_get_sync_info(bgp, esi, rn, loc_seq,
+ &max_sync_seq, &active_on_peer,
+ &peer_router, &proxy_from_peer);
+ attr->mm_sync_seqnum = max_sync_seq;
+ if (active_on_peer)
+ attr->es_flags |= ATTR_ES_PEER_ACTIVE;
+ else
+ attr->es_flags &= ~ATTR_ES_PEER_ACTIVE;
+ if (proxy_from_peer)
+ attr->es_flags |= ATTR_ES_PEER_PROXY;
+ else
+ attr->es_flags &= ~ATTR_ES_PEER_PROXY;
+ if (peer_router)
+ attr->es_flags |= ATTR_ES_PEER_ROUTER;
+ else
+ attr->es_flags &= ~ATTR_ES_PEER_ROUTER;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) {
+ char prefix_buf[PREFIX_STRLEN];
+ char esi_buf[ESI_STR_LEN];
+
+ zlog_debug("setup sync info for %s es %s max_seq %d %s%s%s",
+ prefix2str(evp, prefix_buf,
+ sizeof(prefix_buf)),
+ esi_to_str(esi, esi_buf,
+ sizeof(esi_buf)),
+ max_sync_seq,
+ (attr->es_flags & ATTR_ES_PEER_ACTIVE) ?
+ "peer-active " : "",
+ (attr->es_flags & ATTR_ES_PEER_PROXY) ?
+ "peer-proxy " : "",
+ (attr->es_flags & ATTR_ES_PEER_ROUTER) ?
+ "peer-router " : "");
+ }
+ }
+ } else {
+ attr->mm_sync_seqnum = 0;
+ attr->es_flags &= ~ATTR_ES_PEER_ACTIVE;
+ attr->es_flags &= ~ATTR_ES_PEER_PROXY;
+ }
+}
+
/*
* Create or update EVPN route entry. This could be in the VNI route table
* or the global route table.
*/
static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
- afi_t afi, safi_t safi,
- struct bgp_dest *dest, struct attr *attr,
- int add, struct bgp_path_info **pi,
- uint8_t flags, uint32_t seq)
+ afi_t afi, safi_t safi, struct bgp_dest *dest,
+ struct attr *attr, int add,
+ struct bgp_path_info **pi, uint8_t flags,
+ uint32_t seq, bool setup_sync,
+ bool *old_is_sync)
{
struct bgp_path_info *tmp_pi;
struct bgp_path_info *local_pi;
@@ -1684,14 +1493,7 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
memset(&label, 0, sizeof(label));
/* See if this is an update of an existing route, or a new add. */
- local_pi = NULL;
- for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi;
- tmp_pi = tmp_pi->next) {
- if (tmp_pi->peer == bgp->peer_self
- && tmp_pi->type == ZEBRA_ROUTE_BGP
- && tmp_pi->sub_type == BGP_ROUTE_STATIC)
- local_pi = tmp_pi;
- }
+ local_pi = bgp_evpn_route_get_local_path(bgp, dest);
/* If route doesn't exist already, create a new one, if told to.
* Otherwise act based on whether the attributes of the route have
@@ -1700,6 +1502,14 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
if (!local_pi && !add)
return 0;
+ if (old_is_sync && local_pi)
+ *old_is_sync = bgp_evpn_attr_is_sync(local_pi->attr);
+
+ /* if a local path is being added with a non-zero esi look
+ * for SYNC paths from ES peers and bubble up the sync-info
+ */
+ update_evpn_route_entry_sync_info(bgp, dest, attr, seq, setup_sync);
+
/* For non-GW MACs, update MAC mobility seq number, if needed. */
if (seq && !CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW))
add_mac_mobility_to_attr(seq, attr);
@@ -1811,11 +1621,11 @@ static void evpn_zebra_reinstall_best_route(struct bgp *bgp,
}
if (curr_select && curr_select->type == ZEBRA_ROUTE_BGP
- && curr_select->sub_type == BGP_ROUTE_IMPORTED)
- evpn_zebra_install(
- bgp, vpn,
- (const struct prefix_evpn *)bgp_dest_get_prefix(dest),
- curr_select);
+ && (curr_select->sub_type == BGP_ROUTE_IMPORTED ||
+ bgp_evpn_attr_is_sync(curr_select->attr)))
+ evpn_zebra_install(bgp, vpn,
+ (const struct prefix_evpn *)bgp_dest_get_prefix(dest),
+ curr_select);
}
/*
@@ -1842,7 +1652,7 @@ static void evpn_cleanup_local_non_best_route(struct bgp *bgp,
zlog_debug("evicting local evpn prefix %pRN as remote won",
dest);
- evpn_delete_old_local_route(bgp, vpn, dest, local_pi);
+ evpn_delete_old_local_route(bgp, vpn, dest, local_pi, NULL);
bgp_path_info_reap(dest, local_pi);
/* tell zebra to re-add the best remote path */
@@ -1855,7 +1665,7 @@ static void evpn_cleanup_local_non_best_route(struct bgp *bgp,
*/
static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
struct prefix_evpn *p, uint8_t flags,
- uint32_t seq)
+ uint32_t seq, esi_t *esi)
{
struct bgp_dest *dest;
struct attr attr;
@@ -1865,6 +1675,7 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
afi_t afi = AFI_L2VPN;
safi_t safi = SAFI_EVPN;
int route_change;
+ bool old_is_sync = false;
memset(&attr, 0, sizeof(struct attr));
@@ -1877,6 +1688,13 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
attr.default_gw = CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW) ? 1 : 0;
attr.router_flag = CHECK_FLAG(flags,
ZEBRA_MACIP_TYPE_ROUTER_FLAG) ? 1 : 0;
+ if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT))
+ attr.es_flags |= ATTR_ES_PROXY_ADVERT;
+
+ if (esi && bgp_evpn_is_esi_valid(esi)) {
+ memcpy(&attr.esi, esi, sizeof(esi_t));
+ attr.es_flags |= ATTR_ES_IS_LOCAL;
+ }
/* PMSI is only needed for type-3 routes */
if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) {
@@ -1884,6 +1702,21 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
attr.pmsi_tnl_type = PMSI_TNLTYPE_INGR_REPL;
}
+ if (bgp_debug_zebra(NULL)) {
+ char buf[ETHER_ADDR_STRLEN];
+ char buf1[PREFIX_STRLEN];
+ char buf3[ESI_STR_LEN];
+
+ zlog_debug("VRF %s vni %u type-2 route evp %s RMAC %s nexthop %s esi %s",
+ vpn->bgp_vrf ?
+ vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ",
+ vpn->vni,
+ prefix2str(p, buf1, sizeof(buf1)),
+ prefix_mac2str(&attr.rmac, buf,
+ sizeof(buf)),
+ inet_ntoa(attr.mp_nexthop_global_in),
+ esi_to_str(esi, buf3, sizeof(buf3)));
+ }
/* router mac is only needed for type-2 routes here. */
if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) {
uint8_t af_flags = 0;
@@ -1892,20 +1725,6 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
SET_FLAG(af_flags, BGP_EVPN_MACIP_TYPE_SVI_IP);
bgp_evpn_get_rmac_nexthop(vpn, p, &attr, af_flags);
-
- if (bgp_debug_zebra(NULL)) {
- char buf[ETHER_ADDR_STRLEN];
- char buf1[PREFIX_STRLEN];
-
- zlog_debug("VRF %s vni %u type-2 route evp %s RMAC %s nexthop %s",
- vpn->bgp_vrf ?
- vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ",
- vpn->vni,
- prefix2str(p, buf1, sizeof(buf1)),
- prefix_mac2str(&attr.rmac, buf,
- sizeof(buf)),
- inet_ntoa(attr.mp_nexthop_global_in));
- }
}
vni2label(vpn->vni, &(attr.label));
@@ -1930,7 +1749,8 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
/* Create or update route entry. */
route_change = update_evpn_route_entry(bgp, vpn, afi, safi, dest, &attr,
- 1, &pi, flags, seq);
+ 1, &pi, flags, seq,
+ true /* setup_sync */, &old_is_sync);
assert(pi);
attr_new = pi->attr;
@@ -1951,9 +1771,25 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
* to re-add the best remote dest. BGP doesn't retain non-best local
* routes.
*/
- if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
+ if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
route_change = 0;
- evpn_cleanup_local_non_best_route(bgp, vpn, dest, pi);
+ } else {
+ if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
+ route_change = 0;
+ evpn_cleanup_local_non_best_route(bgp, vpn, dest, pi);
+ } else {
+ bool new_is_sync;
+
+ /* If the local path already existed and is still the
+ * best path we need to also check if it transitioned
+ * from being a sync path to a non-sync path. If it
+ * it did we need to notify zebra that the sync-path
+ * has been removed.
+ */
+ new_is_sync = bgp_evpn_attr_is_sync(pi->attr);
+ if (!new_is_sync && old_is_sync)
+ evpn_zebra_uninstall(bgp, vpn, p, zero_vtep_ip);
+ }
}
bgp_path_info_unlock(pi);
@@ -1967,10 +1803,12 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
if (route_change) {
struct bgp_path_info *global_pi;
- dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)p, &vpn->prd);
+ dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)p,
+ &vpn->prd);
update_evpn_route_entry(bgp, vpn, afi, safi, dest, attr_new, 1,
- &global_pi, flags, seq);
+ &global_pi, flags, seq,
+ false /* setup_sync */, NULL /* old_is_sync */);
/* Schedule for processing and unlock node. */
bgp_process(bgp, dest, afi, safi);
@@ -1987,7 +1825,7 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
* Delete EVPN route entry.
* The entry can be in ESI/VNI table or the global table.
*/
-static void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi,
+void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi,
struct bgp_dest *dest,
struct bgp_path_info **pi)
{
@@ -2010,56 +1848,6 @@ static void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi,
bgp_path_info_delete(dest, tmp_pi);
}
-
-
-/* Delete EVPN ES (type-4) route */
-static int delete_evpn_type4_route(struct bgp *bgp,
- struct evpnes *es,
- struct prefix_evpn *p)
-{
- afi_t afi = AFI_L2VPN;
- safi_t safi = SAFI_EVPN;
- struct bgp_path_info *pi;
- struct bgp_dest *dest = NULL; /* dest in esi table */
- struct bgp_dest *global_dest = NULL; /* dest in global table */
-
- /* First, locate the route node within the ESI.
- * If it doesn't exist, ther is nothing to do.
- * Note: there is no RD here.
- */
- dest = bgp_node_lookup(es->route_table, (struct prefix *)p);
- if (!dest)
- return 0;
-
- /* Next, locate route node in the global EVPN routing table.
- * Note that this table is a 2-level tree (RD-level + Prefix-level)
- */
- global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)p, &es->prd);
- if (global_dest) {
-
- /* Delete route entry in the global EVPN table. */
- delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi);
-
- /* Schedule for processing - withdraws to peers happen from
- * this table.
- */
- if (pi)
- bgp_process(bgp, global_dest, afi, safi);
- bgp_dest_unlock_node(global_dest);
- }
-
- /*
- * Delete route entry in the ESI route table.
- * This can just be removed.
- */
- delete_evpn_route_entry(bgp, afi, safi, dest, &pi);
- if (pi)
- bgp_path_info_reap(dest, pi);
- bgp_dest_unlock_node(dest);
- return 0;
-}
-
/* Delete EVPN type5 route */
static int delete_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp)
{
@@ -2074,8 +1862,8 @@ static int delete_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp)
return 0;
/* locate the global route entry for this type-5 prefix */
- dest = bgp_afi_node_lookup(bgp_evpn->rib[afi][safi], afi, safi,
- (struct prefix *)evp, &bgp_vrf->vrf_prd);
+ dest = bgp_global_evpn_node_lookup(bgp_evpn->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)evp, &bgp_vrf->vrf_prd);
if (!dest)
return 0;
@@ -2111,8 +1899,8 @@ static int delete_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
* this table is a 2-level tree (RD-level + Prefix-level) similar to
* L3VPN routes.
*/
- global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)p, &vpn->prd);
+ global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)p, &vpn->prd);
if (global_dest) {
/* Delete route entry in the global EVPN table. */
delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi);
@@ -2137,139 +1925,177 @@ static int delete_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
return 0;
}
+static void bgp_evpn_update_type2_route_entry(struct bgp *bgp,
+ struct bgpevpn *vpn, struct bgp_node *rn,
+ struct bgp_path_info *local_pi, const char *caller)
+{
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct bgp_path_info *pi;
+ struct attr attr;
+ struct attr *attr_new;
+ uint32_t seq;
+ int add_l3_ecomm = 0;
+ struct bgp_node *global_rn;
+ struct bgp_path_info *global_pi;
+ struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
+ int route_change;
+ bool old_is_sync = false;
+
+ if (CHECK_FLAG(local_pi->flags, BGP_PATH_REMOVED))
+ return;
+
+ /*
+ * Build attribute per local route as the MAC mobility and
+ * some other values could differ for different routes. The
+ * attributes will be shared in the hash table.
+ */
+ bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
+ attr.nexthop = vpn->originator_ip;
+ attr.mp_nexthop_global_in = vpn->originator_ip;
+ attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+ attr.sticky = (local_pi->attr->sticky) ? 1 : 0;
+ attr.router_flag = (local_pi->attr->router_flag) ? 1 : 0;
+ attr.es_flags = local_pi->attr->es_flags;
+ if (local_pi->attr->default_gw) {
+ attr.default_gw = 1;
+ if (is_evpn_prefix_ipaddr_v6(evp))
+ attr.router_flag = 1;
+ }
+ memcpy(&attr.esi, &local_pi->attr->esi, sizeof(esi_t));
+ bgp_evpn_get_rmac_nexthop(vpn, evp, &attr,
+ local_pi->extra->af_flags);
+ vni2label(vpn->vni, &(attr.label));
+ /* Add L3 VNI RTs and RMAC for non IPv6 link-local if
+ * using L3 VNI for type-2 routes also.
+ */
+ if ((is_evpn_prefix_ipaddr_v4(evp) ||
+ !IN6_IS_ADDR_LINKLOCAL(
+ &evp->prefix.macip_addr.ip.ipaddr_v6)) &&
+ CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS) &&
+ bgpevpn_get_l3vni(vpn))
+ add_l3_ecomm = 1;
+
+ /* Set up extended community. */
+ build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm);
+ seq = mac_mobility_seqnum(local_pi->attr);
+
+ if (bgp_debug_zebra(NULL)) {
+ char buf[ETHER_ADDR_STRLEN];
+ char buf1[PREFIX_STRLEN];
+ char buf3[ESI_STR_LEN];
+
+ zlog_debug("VRF %s vni %u evp %s RMAC %s nexthop %s esi %s esf 0x%x from %s",
+ vpn->bgp_vrf ?
+ vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ",
+ vpn->vni,
+ prefix2str(evp, buf1, sizeof(buf1)),
+ prefix_mac2str(&attr.rmac, buf, sizeof(buf)),
+ inet_ntoa(attr.mp_nexthop_global_in),
+ esi_to_str(&attr.esi, buf3, sizeof(buf3)),
+ attr.es_flags, caller);
+ }
+
+ /* Update the route entry. */
+ route_change = update_evpn_route_entry(bgp, vpn, afi, safi,
+ rn, &attr, 0, &pi, 0, seq,
+ true /* setup_sync */, &old_is_sync);
+
+ assert(pi);
+ attr_new = pi->attr;
+ /* lock ri to prevent freeing in evpn_route_select_install */
+ bgp_path_info_lock(pi);
+
+ /* Perform route selection. Normally, the local route in the
+ * VNI is expected to win and be the best route. However,
+ * under peculiar situations (e.g., tunnel (next hop) IP change
+ * that causes best selection to be based on next hop), a
+ * remote route could win. If the local route is the best,
+ * ensure it is updated in the global EVPN route table and
+ * advertised to peers; otherwise, ensure it is evicted and
+ * (re)install the remote route into zebra.
+ */
+ evpn_route_select_install(bgp, vpn, rn);
+
+ if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
+ route_change = 0;
+ } else {
+ if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
+ route_change = 0;
+ evpn_cleanup_local_non_best_route(bgp, vpn, rn, pi);
+ } else {
+ bool new_is_sync;
+
+ /* If the local path already existed and is still the
+ * best path we need to also check if it transitioned
+ * from being a sync path to a non-sync path. If it
+ * it did we need to notify zebra that the sync-path
+ * has been removed.
+ */
+ new_is_sync = bgp_evpn_attr_is_sync(pi->attr);
+ if (!new_is_sync && old_is_sync)
+ evpn_zebra_uninstall(bgp, vpn,
+ evp, zero_vtep_ip);
+ }
+ }
+
+
+ /* unlock pi */
+ bgp_path_info_unlock(pi);
+
+ if (route_change) {
+ /* Update route in global routing table. */
+ global_rn = bgp_global_evpn_node_get(bgp->rib[afi][safi],
+ afi, safi, evp, &vpn->prd);
+ assert(global_rn);
+ update_evpn_route_entry(bgp, vpn, afi, safi, global_rn,
+ attr_new, 0, &global_pi, 0,
+ mac_mobility_seqnum(attr_new),
+ false /* setup_sync */, NULL /* old_is_sync */);
+
+ /* Schedule for processing and unlock node. */
+ bgp_process(bgp, global_rn, afi, safi);
+ bgp_dest_unlock_node(global_rn);
+ }
+
+ /* Unintern temporary. */
+ aspath_unintern(&attr.aspath);
+}
+
/*
* Update all type-2 (MACIP) local routes for this VNI - these should also
* be scheduled for advertise to peers.
*/
static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn)
{
- afi_t afi;
- safi_t safi;
struct bgp_dest *dest;
- struct bgp_path_info *pi, *tmp_pi;
- struct attr attr;
- struct attr *attr_new;
- uint32_t seq;
- int add_l3_ecomm = 0;
-
- afi = AFI_L2VPN;
- safi = SAFI_EVPN;
+ struct bgp_path_info *tmp_pi;
/* Walk this VNI's route table and update local type-2 routes. For any
* routes updated, update corresponding entry in the global table too.
*/
for (dest = bgp_table_top(vpn->route_table); dest;
- dest = bgp_route_next(dest)) {
+ dest = bgp_route_next(dest)) {
const struct prefix_evpn *evp =
(const struct prefix_evpn *)bgp_dest_get_prefix(dest);
- struct bgp_dest *rd_dest;
- struct bgp_path_info *global_pi;
if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
continue;
/* Identify local route. */
for (tmp_pi = bgp_dest_get_bgp_path_info(dest); tmp_pi;
- tmp_pi = tmp_pi->next) {
+ tmp_pi = tmp_pi->next) {
if (tmp_pi->peer == bgp->peer_self
- && tmp_pi->type == ZEBRA_ROUTE_BGP
- && tmp_pi->sub_type == BGP_ROUTE_STATIC)
+ && tmp_pi->type == ZEBRA_ROUTE_BGP
+ && tmp_pi->sub_type == BGP_ROUTE_STATIC)
break;
}
if (!tmp_pi)
continue;
- /*
- * Build attribute per local route as the MAC mobility and
- * some other values could differ for different routes. The
- * attributes will be shared in the hash table.
- */
- bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
- attr.nexthop = vpn->originator_ip;
- attr.mp_nexthop_global_in = vpn->originator_ip;
- attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
- bgp_evpn_get_rmac_nexthop(vpn, evp, &attr,
- tmp_pi->extra->af_flags);
-
- if (evpn_route_is_sticky(bgp, dest))
- attr.sticky = 1;
- else if (evpn_route_is_def_gw(bgp, dest)) {
- attr.default_gw = 1;
- if (is_evpn_prefix_ipaddr_v6(evp))
- attr.router_flag = 1;
- }
-
- if (bgp_debug_zebra(NULL)) {
- char buf[ETHER_ADDR_STRLEN];
- char buf1[PREFIX_STRLEN];
-
- zlog_debug("VRF %s vni %u evp %s RMAC %s nexthop %s",
- vpn->bgp_vrf ?
- vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ",
- vpn->vni,
- prefix2str(evp, buf1, sizeof(buf1)),
- prefix_mac2str(&attr.rmac, buf, sizeof(buf)),
- inet_ntoa(attr.mp_nexthop_global_in));
- }
-
- /* Add L3 VNI RTs and RMAC for non IPv6 link-local if
- * using L3 VNI for type-2 routes also.
- */
- if ((is_evpn_prefix_ipaddr_v4(evp) ||
- !IN6_IS_ADDR_LINKLOCAL(
- &evp->prefix.macip_addr.ip.ipaddr_v6)) &&
- CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS) &&
- bgpevpn_get_l3vni(vpn))
- add_l3_ecomm = 1;
-
- /* Set up extended community. */
- build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm);
-
- seq = mac_mobility_seqnum(tmp_pi->attr);
-
- /* Update the route entry. */
- update_evpn_route_entry(bgp, vpn, afi, safi, dest, &attr, 0,
- &pi, 0, seq);
-
- /* lock ri to prevent freeing in evpn_route_select_install */
- bgp_path_info_lock(pi);
-
- /* Perform route selection. Normally, the local route in the
- * VNI is expected to win and be the best route. However,
- * under peculiar situations (e.g., tunnel (next hop) IP change
- * that causes best selection to be based on next hop), a
- * remote route could win. If the local route is the best,
- * ensure it is updated in the global EVPN route table and
- * advertised to peers; otherwise, ensure it is evicted and
- * (re)install the remote route into zebra.
- */
- evpn_route_select_install(bgp, vpn, dest);
- if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
- evpn_cleanup_local_non_best_route(bgp, vpn, dest, pi);
- /* unlock pi */
- bgp_path_info_unlock(pi);
- } else {
- attr_new = pi->attr;
- /* unlock pi */
- bgp_path_info_unlock(pi);
-
- /* Update route in global routing table. */
- rd_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi,
- safi, (struct prefix *)evp,
- &vpn->prd);
- assert(rd_dest);
- update_evpn_route_entry(bgp, vpn, afi, safi, rd_dest,
- attr_new, 0, &global_pi, 0,
- mac_mobility_seqnum(attr_new));
-
- /* Schedule for processing and unlock node. */
- bgp_process(bgp, rd_dest, afi, safi);
- bgp_dest_unlock_node(rd_dest);
- }
-
- /* Unintern temporary. */
- aspath_unintern(&attr.aspath);
+ bgp_evpn_update_type2_route_entry(bgp, vpn, dest, tmp_pi,
+ __func__);
}
return 0;
@@ -2356,27 +2182,6 @@ static int delete_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn)
}
/*
- * Delete all routes in per ES route-table
- */
-static int delete_all_es_routes(struct bgp *bgp, struct evpnes *es)
-{
- struct bgp_dest *dest;
- struct bgp_path_info *pi, *nextpi;
-
- /* Walk this ES's route table and delete all routes. */
- for (dest = bgp_table_top(es->route_table); dest;
- dest = bgp_route_next(dest)) {
- for (pi = bgp_dest_get_bgp_path_info(dest);
- (pi != NULL) && (nextpi = pi->next, 1); pi = nextpi) {
- bgp_path_info_delete(dest, pi);
- bgp_path_info_reap(dest, pi);
- }
- }
-
- return 0;
-}
-
-/*
* Delete all routes in the per-VNI route table.
*/
static int delete_all_vni_routes(struct bgp *bgp, struct bgpevpn *vpn)
@@ -2434,7 +2239,7 @@ int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
if (bgp_evpn_vni_flood_mode_get(bgp, vpn)
== VXLAN_FLOOD_HEAD_END_REPL) {
build_evpn_type3_prefix(&p, vpn->originator_ip);
- ret = update_evpn_route(bgp, vpn, &p, 0, 0);
+ ret = update_evpn_route(bgp, vpn, &p, 0, 0, NULL);
if (ret)
return ret;
}
@@ -2442,29 +2247,6 @@ int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
return update_all_type2_routes(bgp, vpn);
}
-/* Delete (and withdraw) local routes for specified ES from global and ES table.
- * Also remove all other routes from the per ES table.
- * Invoked when ES is deleted.
- */
-static int delete_routes_for_es(struct bgp *bgp, struct evpnes *es)
-{
- int ret;
- char buf[ESI_STR_LEN];
- struct prefix_evpn p;
-
- /* Delete and withdraw locally learnt ES route */
- build_evpn_type4_prefix(&p, &es->esi, es->originator_ip.ipaddr_v4);
- ret = delete_evpn_type4_route(bgp, es, &p);
- if (ret) {
- flog_err(EC_BGP_EVPN_ROUTE_DELETE,
- "%u failed to delete type-4 route for ESI %s",
- bgp->vrf_id, esi_to_str(&es->esi, buf, sizeof(buf)));
- }
-
- /* Delete all routes from per ES table */
- return delete_all_es_routes(bgp, es);
-}
-
/*
* Delete (and withdraw) local routes for specified VNI from the global
* table and per-VNI table. After this, remove all other routes from
@@ -2574,68 +2356,6 @@ bgp_create_evpn_bgp_path_info(struct bgp_path_info *parent_pi,
return pi;
}
-/* Install EVPN route entry in ES */
-static int install_evpn_route_entry_in_es(struct bgp *bgp, struct evpnes *es,
- const struct prefix_evpn *p,
- struct bgp_path_info *parent_pi)
-{
- int ret = 0;
- struct bgp_dest *dest = NULL;
- struct bgp_path_info *pi = NULL;
- struct attr *attr_new = NULL;
-
- /* Create (or fetch) route within the VNI.
- * NOTE: There is no RD here.
- */
- dest = bgp_node_get(es->route_table, (struct prefix *)p);
-
- /* Check if route entry is already present. */
- for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
- if (pi->extra
- && (struct bgp_path_info *)pi->extra->parent == parent_pi)
- break;
-
- if (!pi) {
- /* Add (or update) attribute to hash. */
- attr_new = bgp_attr_intern(parent_pi->attr);
-
- /* Create new route with its attribute. */
- pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0,
- parent_pi->peer, attr_new, dest);
- SET_FLAG(pi->flags, BGP_PATH_VALID);
- bgp_path_info_extra_get(pi);
- pi->extra->parent = bgp_path_info_lock(parent_pi);
- bgp_dest_lock_node((struct bgp_dest *)parent_pi->net);
- bgp_path_info_add(dest, pi);
- } else {
- if (attrhash_cmp(pi->attr, parent_pi->attr)
- && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
- bgp_dest_unlock_node(dest);
- return 0;
- }
- /* The attribute has changed. */
- /* Add (or update) attribute to hash. */
- attr_new = bgp_attr_intern(parent_pi->attr);
-
- /* Restore route, if needed. */
- if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED))
- bgp_path_info_restore(dest, pi);
-
- /* Mark if nexthop has changed. */
- if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop))
- SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED);
-
- /* Unintern existing, set to new. */
- bgp_attr_unintern(&pi->attr);
- pi->attr = attr_new;
- pi->uptime = bgp_clock();
- }
-
- /* Perform route selection and update zebra, if required. */
- ret = evpn_es_route_select_install(bgp, es, dest);
- return ret;
-}
-
/*
* Install route entry into the VRF routing table and invoke route selection.
*/
@@ -2761,8 +2481,17 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
{
struct bgp_dest *dest;
struct bgp_path_info *pi;
+ struct bgp_path_info *local_pi;
struct attr *attr_new;
int ret;
+ struct prefix_evpn ad_evp;
+
+ /* EAD prefix in the global table doesn't include the VTEP-IP so
+ * we need to create a different copy for the VNI
+ */
+ if (p->prefix.route_type == BGP_EVPN_AD_ROUTE)
+ p = evpn_type1_prefix_vni_copy(&ad_evp, p,
+ parent_pi->attr->nexthop);
/* Create (or fetch) route within the VNI. */
/* NOTE: There is no RD here. */
@@ -2805,46 +2534,16 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
/* Perform route selection and update zebra, if required. */
ret = evpn_route_select_install(bgp, vpn, dest);
- bgp_dest_unlock_node(dest);
-
- return ret;
-}
-
-/* Uninstall EVPN route entry from ES route table */
-static int uninstall_evpn_route_entry_in_es(struct bgp *bgp, struct evpnes *es,
- const struct prefix_evpn *p,
- struct bgp_path_info *parent_pi)
-{
- int ret;
- struct bgp_dest *dest;
- struct bgp_path_info *pi;
-
- if (!es->route_table)
- return 0;
-
- /* Locate route within the ESI.
- * NOTE: There is no RD here.
+ /* if the best path is a local path with a non-zero ES
+ * sync info against the local path may need to be updated
+ * when a remote path is added/updated (including changes
+ * from sync-path to remote-path)
*/
- dest = bgp_node_lookup(es->route_table, (struct prefix *)p);
- if (!dest)
- return 0;
-
- /* Find matching route entry. */
- for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
- if (pi->extra
- && (struct bgp_path_info *)pi->extra->parent == parent_pi)
- break;
-
- if (!pi)
- return 0;
-
- /* Mark entry for deletion */
- bgp_path_info_delete(dest, pi);
+ local_pi = bgp_evpn_route_get_local_path(bgp, dest);
+ if (local_pi && bgp_evpn_attr_is_local_es(local_pi->attr))
+ bgp_evpn_update_type2_route_entry(bgp, vpn, dest, local_pi,
+ __func__);
- /* Perform route selection and update zebra, if required. */
- ret = evpn_es_route_select_install(bgp, es, dest);
-
- /* Unlock route node. */
bgp_dest_unlock_node(dest);
return ret;
@@ -2936,7 +2635,16 @@ static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
{
struct bgp_dest *dest;
struct bgp_path_info *pi;
+ struct bgp_path_info *local_pi;
int ret;
+ struct prefix_evpn ad_evp;
+
+ /* EAD prefix in the global table doesn't include the VTEP-IP so
+ * we need to create a different copy for the VNI
+ */
+ if (p->prefix.route_type == BGP_EVPN_AD_ROUTE)
+ p = evpn_type1_prefix_vni_copy(&ad_evp, p,
+ parent_pi->attr->nexthop);
/* Locate route within the VNI. */
/* NOTE: There is no RD here. */
@@ -2959,6 +2667,15 @@ static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
/* Perform route selection and update zebra, if required. */
ret = evpn_route_select_install(bgp, vpn, dest);
+ /* if the best path is a local path with a non-zero ES
+ * sync info against the local path may need to be updated
+ * when a remote path is deleted
+ */
+ local_pi = bgp_evpn_route_get_local_path(bgp, dest);
+ if (local_pi && bgp_evpn_attr_is_local_es(local_pi->attr))
+ bgp_evpn_update_type2_route_entry(bgp, vpn, dest, local_pi,
+ __func__);
+
/* Unlock route node. */
bgp_dest_unlock_node(dest);
@@ -2966,22 +2683,6 @@ static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
}
/*
- * Given a prefix, see if it belongs to ES.
- */
-static int is_prefix_matching_for_es(const struct prefix_evpn *p,
- struct evpnes *es)
-{
- /* if not an ES route return false */
- if (p->prefix.route_type != BGP_EVPN_ES_ROUTE)
- return 0;
-
- if (memcmp(&p->prefix.es_addr.esi, &es->esi, sizeof(esi_t)) == 0)
- return 1;
-
- return 0;
-}
-
-/*
* Given a route entry and a VRF, see if this route entry should be
* imported into the VRF i.e., RTs match.
*/
@@ -3115,78 +2816,6 @@ static int is_route_matching_for_vni(struct bgp *bgp, struct bgpevpn *vpn,
return 0;
}
-static int install_uninstall_routes_for_es(struct bgp *bgp,
- struct evpnes *es,
- int install)
-{
- int ret;
- afi_t afi;
- safi_t safi;
- char buf[PREFIX_STRLEN];
- char buf1[ESI_STR_LEN];
- struct bgp_dest *rd_dest, *dest;
- struct bgp_table *table;
- struct bgp_path_info *pi;
-
- afi = AFI_L2VPN;
- safi = SAFI_EVPN;
-
- /*
- * Walk entire global routing table and evaluate routes which could be
- * imported into this VRF. Note that we need to loop through all global
- * routes to determine which route matches the import rt on vrf
- */
- for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest;
- rd_dest = bgp_route_next(rd_dest)) {
- table = bgp_dest_get_bgp_table_info(rd_dest);
- if (!table)
- continue;
-
- for (dest = bgp_table_top(table); dest;
- dest = bgp_route_next(dest)) {
- const struct prefix_evpn *evp =
- (const struct prefix_evpn *)bgp_dest_get_prefix(
- dest);
-
- for (pi = bgp_dest_get_bgp_path_info(dest); pi;
- pi = pi->next) {
- /*
- * Consider "valid" remote routes applicable for
- * this ES.
- */
- if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID)
- && pi->type == ZEBRA_ROUTE_BGP
- && pi->sub_type == BGP_ROUTE_NORMAL))
- continue;
-
- if (!is_prefix_matching_for_es(evp, es))
- continue;
-
- if (install)
- ret = install_evpn_route_entry_in_es(
- bgp, es, evp, pi);
- else
- ret = uninstall_evpn_route_entry_in_es(
- bgp, es, evp, pi);
-
- if (ret) {
- flog_err(
- EC_BGP_EVPN_FAIL,
- "Failed to %s EVPN %s route in ESI %s",
- install ? "install"
- : "uninstall",
- prefix2str(evp, buf,
- sizeof(buf)),
- esi_to_str(&es->esi, buf1,
- sizeof(buf1)));
- return ret;
- }
- }
- }
- }
- return 0;
-}
-
/* This API will scan evpn routes for checking attribute's rmac
* macthes with bgp instance router mac. It avoid installing
* route into bgp vrf table and remote rmac in bridge table.
@@ -3390,15 +3019,6 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp,
return 0;
}
-/* Install any existing remote ES routes applicable for this ES into its routing
- * table. This is invoked when ES comes up.
- */
-static int install_routes_for_es(struct bgp *bgp, struct evpnes *es)
-{
- return install_uninstall_routes_for_es(bgp, es, 1);
-}
-
-
/* Install any existing remote routes applicable for this VRF into VRF RIB. This
* is invoked upon l3vni-add or l3vni import rt change
*/
@@ -3425,6 +3045,11 @@ static int install_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
if (ret)
return ret;
+ ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE,
+ 1);
+ if (ret)
+ return ret;
+
return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_MAC_IP_ROUTE,
1);
}
@@ -3453,33 +3078,14 @@ static int uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
if (ret)
return ret;
- return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE,
- 0);
-}
+ ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE,
+ 1);
+ if (ret)
+ return ret;
-/* Install or unistall route in ES */
-static int install_uninstall_route_in_es(struct bgp *bgp, struct evpnes *es,
- afi_t afi, safi_t safi,
- struct prefix_evpn *evp,
- struct bgp_path_info *pi, int install)
-{
- int ret = 0;
- char buf[ESI_STR_LEN];
- if (install)
- ret = install_evpn_route_entry_in_es(bgp, es, evp, pi);
- else
- ret = uninstall_evpn_route_entry_in_es(bgp, es, evp, pi);
-
- if (ret) {
- flog_err(
- EC_BGP_EVPN_FAIL,
- "%u: Failed to %s EVPN %s route in ESI %s", bgp->vrf_id,
- install ? "install" : "uninstall", "ES",
- esi_to_str(&evp->prefix.es_addr.esi, buf, sizeof(buf)));
- return ret;
- }
- return 0;
+ return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE,
+ 0);
}
/*
@@ -3576,6 +3182,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi,
struct attr *attr = pi->attr;
struct ecommunity *ecom;
int i;
+ struct prefix_evpn ad_evp;
assert(attr);
@@ -3583,6 +3190,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi,
if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE
|| evp->prefix.route_type == BGP_EVPN_IMET_ROUTE
|| evp->prefix.route_type == BGP_EVPN_ES_ROUTE
+ || evp->prefix.route_type == BGP_EVPN_AD_ROUTE
|| evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE))
return 0;
@@ -3590,6 +3198,12 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi,
if (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)))
return 0;
+ /* EAD prefix in the global table doesn't include the VTEP-IP so
+ * we need to create a different copy for the VNI
+ */
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE)
+ evp = evpn_type1_prefix_vni_copy(&ad_evp, evp, attr->nexthop);
+
ecom = attr->ecommunity;
if (!ecom || !ecom->size)
return -1;
@@ -3603,7 +3217,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi,
struct ecommunity_val eval_tmp;
struct irt_node *irt; /* import rt for l2vni */
struct vrf_irt_node *vrf_irt; /* import rt for l3vni */
- struct evpnes *es;
+ struct bgp_evpn_es *es;
/* Only deal with RTs */
pnt = (ecom->val + (i * ECOMMUNITY_SIZE));
@@ -3621,6 +3235,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi,
*/
if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE ||
evp->prefix.route_type == BGP_EVPN_IMET_ROUTE ||
+ evp->prefix.route_type == BGP_EVPN_AD_ROUTE ||
evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) {
irt = lookup_import_rt(bgp, eval);
@@ -3668,9 +3283,9 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi,
/* we will match based on the entire esi to avoid
* imoort of an es route for esi2 into esi1
*/
- es = bgp_evpn_lookup_es(bgp, &evp->prefix.es_addr.esi);
- if (es && is_es_local(es))
- install_uninstall_route_in_es(
+ es = bgp_evpn_es_find(&evp->prefix.es_addr.esi);
+ if (es && bgp_evpn_is_es_local(es))
+ bgp_evpn_es_route_install_uninstall(
bgp, es, afi, safi, evp, pi, import);
}
}
@@ -3804,10 +3419,11 @@ static int update_advertise_vni_routes(struct bgp *bgp, struct bgpevpn *vpn)
return 0;
attr = pi->attr;
- global_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)&p, &vpn->prd);
+ global_dest = bgp_global_evpn_node_get(bgp->rib[afi][safi],
+ afi, safi, &p, &vpn->prd);
update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr,
- 1, &pi, 0, mac_mobility_seqnum(attr));
+ 1, &pi, 0, mac_mobility_seqnum(attr),
+ false /* setup_sync */, NULL /* old_is_sync */);
/* Schedule for processing and unlock node. */
bgp_process(bgp, global_dest, afi, safi);
@@ -3838,12 +3454,13 @@ static int update_advertise_vni_routes(struct bgp *bgp, struct bgpevpn *vpn)
* attribute.
*/
attr = pi->attr;
- global_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)evp, &vpn->prd);
+ global_dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi,
+ evp, &vpn->prd);
assert(global_dest);
- update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr,
- 1, &global_pi, 0,
- mac_mobility_seqnum(attr));
+ update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr, 1,
+ &global_pi, 0,
+ mac_mobility_seqnum(attr),
+ false /* setup_sync */, NULL /* old_is_sync */);
/* Schedule for processing and unlock node. */
bgp_process(bgp, global_dest, afi, safi);
@@ -3875,8 +3492,8 @@ static int delete_withdraw_vni_routes(struct bgp *bgp, struct bgpevpn *vpn)
/* Remove type-3 route for this VNI from global table. */
build_evpn_type3_prefix(&p, vpn->originator_ip);
- global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)&p, &vpn->prd);
+ global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)&p, &vpn->prd);
if (global_dest) {
/* Delete route entry in the global EVPN table. */
delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi);
@@ -3943,7 +3560,7 @@ static void create_advertise_type3(struct hash_bucket *bucket, void *data)
return;
build_evpn_type3_prefix(&p, vpn->originator_ip);
- if (update_evpn_route(bgp, vpn, &p, 0, 0))
+ if (update_evpn_route(bgp, vpn, &p, 0, 0, NULL))
flog_err(EC_BGP_EVPN_ROUTE_CREATE,
"Type3 route creation failure for VNI %u", vpn->vni);
}
@@ -4011,8 +3628,14 @@ static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi,
p.prefix.route_type = BGP_EVPN_MAC_IP_ROUTE;
/* Copy Ethernet Seg Identifier */
- memcpy(&evpn.eth_s_id.val, pfx, ESI_LEN);
- pfx += ESI_LEN;
+ if (attr) {
+ memcpy(&attr->esi, pfx, sizeof(esi_t));
+ if (bgp_evpn_is_esi_local(&attr->esi))
+ attr->es_flags |= ATTR_ES_IS_LOCAL;
+ else
+ attr->es_flags &= ~ATTR_ES_IS_LOCAL;
+ }
+ pfx += sizeof(esi_t);
/* Copy Ethernet Tag */
memcpy(&eth_tag, pfx, 4);
@@ -4165,68 +3788,6 @@ static int process_type3_route(struct peer *peer, afi_t afi, safi_t safi,
}
/*
- * Process received EVPN type-4 route (advertise or withdraw).
- */
-static int process_type4_route(struct peer *peer, afi_t afi, safi_t safi,
- struct attr *attr, uint8_t *pfx, int psize,
- uint32_t addpath_id)
-{
- int ret;
- esi_t esi;
- uint8_t ipaddr_len;
- struct in_addr vtep_ip;
- struct prefix_rd prd;
- struct prefix_evpn p;
-
- /* Type-4 route should be either 23 or 35 bytes
- * RD (8), ESI (10), ip-len (1), ip (4 or 16)
- */
- if (psize != 23 && psize != 35) {
- flog_err(EC_BGP_EVPN_ROUTE_INVALID,
- "%u:%s - Rx EVPN Type-4 NLRI with invalid length %d",
- peer->bgp->vrf_id, peer->host, psize);
- return -1;
- }
-
- /* Make prefix_rd */
- prd.family = AF_UNSPEC;
- prd.prefixlen = 64;
- memcpy(&prd.val, pfx, 8);
- pfx += 8;
-
- /* get the ESI */
- memcpy(&esi, pfx, ESI_BYTES);
- pfx += ESI_BYTES;
-
-
- /* Get the IP. */
- ipaddr_len = *pfx++;
- if (ipaddr_len == IPV4_MAX_BITLEN) {
- memcpy(&vtep_ip, pfx, IPV4_MAX_BYTELEN);
- } else {
- flog_err(
- EC_BGP_EVPN_ROUTE_INVALID,
- "%u:%s - Rx EVPN Type-4 NLRI with unsupported IP address length %d",
- peer->bgp->vrf_id, peer->host, ipaddr_len);
- return -1;
- }
-
- build_evpn_type4_prefix(&p, &esi, vtep_ip);
- /* Process the route. */
- if (attr) {
- ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
- afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
- &prd, NULL, 0, 0, NULL);
- } else {
- ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
- afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
- &prd, NULL, 0, NULL);
- }
- return ret;
-}
-
-
-/*
* Process received EVPN type-5 route (advertise or withdraw).
*/
static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi,
@@ -4271,8 +3832,9 @@ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi,
memset(&evpn, 0, sizeof(evpn));
/* Fetch ESI */
- memcpy(&evpn.eth_s_id.val, pfx, 10);
- pfx += 10;
+ if (attr)
+ memcpy(&attr->esi, pfx, sizeof(esi_t));
+ pfx += ESI_BYTES;
/* Fetch Ethernet Tag. */
memcpy(&eth_tag, pfx, 4);
@@ -4322,7 +3884,7 @@ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi,
if (attr) {
is_valid_update = true;
- if (is_zero_mac(&attr->rmac) && is_zero_esi(&evpn.eth_s_id) &&
+ if (is_zero_mac(&attr->rmac) &&
is_zero_gw_ip(&evpn.gw_ip, gw_afi))
is_valid_update = false;
@@ -4368,9 +3930,9 @@ static void evpn_mpattr_encode_type5(struct stream *s, const struct prefix *p,
stream_putc(s, 8 + 10 + 4 + 1 + len + 3);
stream_put(s, prd->val, 8);
if (attr)
- stream_put(s, &(attr->evpn_overlay.eth_s_id), 10);
+ stream_put(s, &attr->esi, sizeof(esi_t));
else
- stream_put(s, &temp, 10);
+ stream_put(s, 0, sizeof(esi_t));
stream_putl(s, p_evpn_p->prefix_addr.eth_tag);
stream_putc(s, p_evpn_p->prefix_addr.ip_prefix_length);
if (IS_IPADDR_V4(&p_evpn_p->prefix_addr.ip))
@@ -5073,6 +4635,15 @@ char *bgp_evpn_route2str(const struct prefix_evpn *p, char *buf, int len)
is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN
: IPV6_MAX_BITLEN,
inet_ntoa(p->prefix.es_addr.ip.ipaddr_v4));
+ } else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) {
+ snprintf(buf, len, "[%d]:[%u]:[%s]:[%d]:[%s]",
+ p->prefix.route_type,
+ p->prefix.ead_addr.eth_tag,
+ esi_to_str(&p->prefix.ead_addr.esi,
+ buf3, sizeof(buf3)),
+ is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN
+ : IPV6_MAX_BITLEN,
+ inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4));
} else {
/* For EVPN route types not supported yet. */
snprintf(buf, len, "(unsupported route type %d)",
@@ -5112,7 +4683,7 @@ void bgp_evpn_encode_prefix(struct stream *s, const struct prefix *p,
stream_putc(s, len);
stream_put(s, prd->val, 8); /* RD */
if (attr)
- stream_put(s, &attr->evpn_overlay.eth_s_id, ESI_LEN);
+ stream_put(s, &attr->esi, ESI_BYTES);
else
stream_put(s, 0, 10);
stream_putl(s, evp->prefix.macip_addr.eth_tag); /* Ethernet Tag ID */
@@ -5147,6 +4718,16 @@ void bgp_evpn_encode_prefix(struct stream *s, const struct prefix *p,
stream_put_in_addr(s, &evp->prefix.es_addr.ip.ipaddr_v4);
break;
+ case BGP_EVPN_AD_ROUTE:
+ /* RD, ESI, EthTag, 1 VNI */
+ len = RD_BYTES + ESI_BYTES + EVPN_ETH_TAG_BYTES + BGP_LABEL_BYTES;
+ stream_putc(s, len);
+ stream_put(s, prd->val, RD_BYTES); /* RD */
+ stream_put(s, evp->prefix.ead_addr.esi.val, ESI_BYTES); /* ESI */
+ stream_putl(s, evp->prefix.ead_addr.eth_tag); /* Ethernet Tag */
+ stream_put(s, label, BGP_LABEL_BYTES);
+ break;
+
case BGP_EVPN_IP_PREFIX_ROUTE:
/* TODO: AddPath support. */
evpn_mpattr_encode_type5(s, p, prd, label, num_labels, attr);
@@ -5234,7 +4815,7 @@ int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr,
break;
case BGP_EVPN_ES_ROUTE:
- if (process_type4_route(peer, afi, safi,
+ if (bgp_evpn_type4_route_process(peer, afi, safi,
withdraw ? NULL : attr, pnt,
psize, addpath_id)) {
flog_err(
@@ -5245,6 +4826,18 @@ int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr,
}
break;
+ case BGP_EVPN_AD_ROUTE:
+ if (bgp_evpn_type1_route_process(peer, afi, safi,
+ withdraw ? NULL : attr, pnt,
+ psize, addpath_id)) {
+ flog_err(
+ EC_BGP_PKT_PROCESS,
+ "%u:%s - Error in processing EVPN type-1 NLRI size %d",
+ peer->bgp->vrf_id, peer->host, psize);
+ return BGP_NLRI_PARSE_ERROR_EVPN_TYPE1_SIZE;
+ }
+ break;
+
case BGP_EVPN_IP_PREFIX_ROUTE:
if (process_type5_route(peer, afi, safi,
withdraw ? NULL : attr, pnt,
@@ -5423,7 +5016,7 @@ void bgp_evpn_derive_auto_rd_for_vrf(struct bgp *bgp)
*/
void bgp_evpn_derive_auto_rd(struct bgp *bgp, struct bgpevpn *vpn)
{
- char buf[100];
+ char buf[BGP_EVPN_PREFIX_RD_LEN];
vpn->prd.family = AF_UNSPEC;
vpn->prd.prefixlen = 64;
@@ -5507,6 +5100,8 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni,
/* add to l2vni list on corresponding vrf */
bgpevpn_link_to_l3vni(vpn);
+ bgp_evpn_vni_es_init(vpn);
+
QOBJ_REG(vpn, bgpevpn);
return vpn;
}
@@ -5519,6 +5114,7 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni,
*/
void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn)
{
+ bgp_evpn_vni_es_cleanup(vpn);
bgpevpn_unlink_from_l3vni(vpn);
bgp_table_unlock(vpn->route_table);
bgp_evpn_unmap_vni_from_its_rts(bgp, vpn);
@@ -5531,79 +5127,6 @@ void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn)
}
/*
- * Lookup local ES.
- */
-struct evpnes *bgp_evpn_lookup_es(struct bgp *bgp, esi_t *esi)
-{
- struct evpnes *es;
- struct evpnes tmp;
-
- memset(&tmp, 0, sizeof(struct evpnes));
- memcpy(&tmp.esi, esi, sizeof(esi_t));
- es = hash_lookup(bgp->esihash, &tmp);
- return es;
-}
-
-/*
- * Create a new local es - invoked upon zebra notification.
- */
-struct evpnes *bgp_evpn_es_new(struct bgp *bgp,
- esi_t *esi,
- struct ipaddr *originator_ip)
-{
- char buf[100];
- struct evpnes *es;
-
- if (!bgp)
- return NULL;
-
- es = XCALLOC(MTYPE_BGP_EVPN_ES, sizeof(struct evpnes));
-
- /* set the ESI and originator_ip */
- memcpy(&es->esi, esi, sizeof(esi_t));
- memcpy(&es->originator_ip, originator_ip, sizeof(struct ipaddr));
-
- /* Initialise the VTEP list */
- es->vtep_list = list_new();
- es->vtep_list->cmp = evpn_vtep_ip_cmp;
-
- /* auto derive RD for this es */
- bf_assign_index(bm->rd_idspace, es->rd_id);
- es->prd.family = AF_UNSPEC;
- es->prd.prefixlen = 64;
- snprintf(buf, sizeof(buf), "%s:%hu", inet_ntoa(bgp->router_id),
- es->rd_id);
- (void)str2prefix_rd(buf, &es->prd);
-
- /* Initialize the ES route table */
- es->route_table = bgp_table_init(bgp, AFI_L2VPN, SAFI_EVPN);
-
- /* Add to hash */
- if (!hash_get(bgp->esihash, es, hash_alloc_intern)) {
- XFREE(MTYPE_BGP_EVPN_ES, es);
- return NULL;
- }
-
- QOBJ_REG(es, evpnes);
- return es;
-}
-
-/*
- * Free a given ES -
- * This just frees appropriate memory, caller should have taken other
- * needed actions.
- */
-void bgp_evpn_es_free(struct bgp *bgp, struct evpnes *es)
-{
- list_delete(&es->vtep_list);
- bgp_table_unlock(es->route_table);
- bf_release_index(bm->rd_idspace, es->rd_id);
- hash_release(bgp->esihash, es);
- QOBJ_UNREG(es);
- XFREE(MTYPE_BGP_EVPN_ES, es);
-}
-
-/*
* Import evpn route from global table to VNI/VRF/ESI.
*/
int bgp_evpn_import_route(struct bgp *bgp, afi_t afi, safi_t safi,
@@ -5724,7 +5247,7 @@ int bgp_evpn_local_macip_del(struct bgp *bgp, vni_t vni, struct ethaddr *mac,
* Handle add of a local MACIP.
*/
int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac,
- struct ipaddr *ip, uint8_t flags, uint32_t seq)
+ struct ipaddr *ip, uint8_t flags, uint32_t seq, esi_t *esi)
{
struct bgpevpn *vpn;
struct prefix_evpn p;
@@ -5740,7 +5263,7 @@ int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac,
/* Create EVPN type-2 route and schedule for processing. */
build_evpn_type2_prefix(&p, mac, ip);
- if (update_evpn_route(bgp, vpn, &p, flags, seq)) {
+ if (update_evpn_route(bgp, vpn, &p, flags, seq, esi)) {
char buf[ETHER_ADDR_STRLEN];
char buf2[INET6_ADDRSTRLEN];
@@ -6112,7 +5635,7 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni,
if (bgp_evpn_vni_flood_mode_get(bgp, vpn)
== VXLAN_FLOOD_HEAD_END_REPL) {
build_evpn_type3_prefix(&p, vpn->originator_ip);
- if (update_evpn_route(bgp, vpn, &p, 0, 0)) {
+ if (update_evpn_route(bgp, vpn, &p, 0, 0, NULL)) {
flog_err(EC_BGP_EVPN_ROUTE_CREATE,
"%u: Type3 route creation failure for VNI %u",
bgp->vrf_id, vni);
@@ -6137,88 +5660,6 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni,
}
/*
- * bgp_evpn_local_es_del
- */
-int bgp_evpn_local_es_del(struct bgp *bgp,
- esi_t *esi,
- struct ipaddr *originator_ip)
-{
- char buf[ESI_STR_LEN];
- struct evpnes *es = NULL;
-
- if (!bgp->esihash) {
- flog_err(EC_BGP_ES_CREATE, "%u: ESI hash not yet created",
- bgp->vrf_id);
- return -1;
- }
-
- /* Lookup ESI hash - should exist. */
- es = bgp_evpn_lookup_es(bgp, esi);
- if (!es) {
- flog_warn(EC_BGP_EVPN_ESI,
- "%u: ESI hash entry for ESI %s at Local ES DEL",
- bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf)));
- return -1;
- }
-
- /* Delete all local EVPN ES routes from ESI table
- * and schedule for processing (to withdraw from peers))
- */
- delete_routes_for_es(bgp, es);
-
- /* free the hash entry */
- bgp_evpn_es_free(bgp, es);
-
- return 0;
-}
-
-/*
- * bgp_evpn_local_es_add
- */
-int bgp_evpn_local_es_add(struct bgp *bgp,
- esi_t *esi,
- struct ipaddr *originator_ip)
-{
- char buf[ESI_STR_LEN];
- struct evpnes *es = NULL;
- struct prefix_evpn p;
-
- if (!bgp->esihash) {
- flog_err(EC_BGP_ES_CREATE, "%u: ESI hash not yet created",
- bgp->vrf_id);
- return -1;
- }
-
- /* create the new es */
- es = bgp_evpn_lookup_es(bgp, esi);
- if (!es) {
- es = bgp_evpn_es_new(bgp, esi, originator_ip);
- if (!es) {
- flog_err(
- EC_BGP_ES_CREATE,
- "%u: Failed to allocate ES entry for ESI %s - at Local ES Add",
- bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf)));
- return -1;
- }
- }
- UNSET_FLAG(es->flags, EVPNES_REMOTE);
- SET_FLAG(es->flags, EVPNES_LOCAL);
-
- build_evpn_type4_prefix(&p, esi, originator_ip->ipaddr_v4);
- if (update_evpn_type4_route(bgp, es, &p)) {
- flog_err(EC_BGP_EVPN_ROUTE_CREATE,
- "%u: Type4 route creation failure for ESI %s",
- bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf)));
- return -1;
- }
-
- /* import all remote ES routes in th ES table */
- install_routes_for_es(bgp, es);
-
- return 0;
-}
-
-/*
* Handle change in setting for BUM handling. The supported values
* are head-end replication and dropping all BUM packets. Any change
* should be registered with zebra. Also, if doing head-end replication,
@@ -6267,9 +5708,6 @@ void bgp_evpn_cleanup(struct bgp *bgp)
hash_free(bgp->vnihash);
bgp->vnihash = NULL;
- if (bgp->esihash)
- hash_free(bgp->esihash);
- bgp->esihash = NULL;
list_delete(&bgp->vrf_import_rtl);
list_delete(&bgp->vrf_export_rtl);
@@ -6286,9 +5724,6 @@ void bgp_evpn_init(struct bgp *bgp)
{
bgp->vnihash =
hash_create(vni_hash_key_make, vni_hash_cmp, "BGP VNI Hash");
- bgp->esihash =
- hash_create(esi_hash_keymake, esi_cmp,
- "BGP EVPN Local ESI Hash");
bgp->import_rt_hash =
hash_create(import_rt_hash_key_make, import_rt_hash_cmp,
"BGP Import RT Hash");
diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h
index 267c87ee54..8535f1fa31 100644
--- a/bgpd/bgp_evpn.h
+++ b/bgpd/bgp_evpn.h
@@ -176,7 +176,7 @@ extern int bgp_evpn_local_macip_del(struct bgp *bgp, vni_t vni,
int state);
extern int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni,
struct ethaddr *mac, struct ipaddr *ip,
- uint8_t flags, uint32_t seq);
+ uint8_t flags, uint32_t seq, esi_t *esi);
extern int bgp_evpn_local_l3vni_add(vni_t vni, vrf_id_t vrf_id,
struct ethaddr *rmac,
struct ethaddr *vrr_rmac,
@@ -188,10 +188,6 @@ extern int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni,
struct in_addr originator_ip,
vrf_id_t tenant_vrf_id,
struct in_addr mcast_grp);
-extern int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
- struct ipaddr *originator_ip);
-extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi,
- struct ipaddr *originator_ip);
extern void bgp_evpn_flood_control_change(struct bgp *bgp);
extern void bgp_evpn_cleanup_on_disable(struct bgp *bgp);
extern void bgp_evpn_cleanup(struct bgp *bgp);
diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c
new file mode 100644
index 0000000000..eb65c43bb9
--- /dev/null
+++ b/bgpd/bgp_evpn_mh.c
@@ -0,0 +1,2905 @@
+/* EVPN Multihoming procedures
+ *
+ * Copyright (C) 2019 Cumulus Networks, Inc.
+ * Anuradha Karuppiah
+ *
+ * This file is part of FRR.
+ *
+ * FRRouting is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRRouting is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "filter.h"
+#include "prefix.h"
+#include "log.h"
+#include "memory.h"
+#include "stream.h"
+#include "hash.h"
+#include "jhash.h"
+#include "zclient.h"
+
+#include "bgpd/bgp_attr_evpn.h"
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_mplsvpn.h"
+#include "bgpd/bgp_evpn.h"
+#include "bgpd/bgp_evpn_private.h"
+#include "bgpd/bgp_evpn_mh.h"
+#include "bgpd/bgp_ecommunity.h"
+#include "bgpd/bgp_encap_types.h"
+#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_errors.h"
+#include "bgpd/bgp_aspath.h"
+#include "bgpd/bgp_zebra.h"
+#include "bgpd/bgp_addpath.h"
+#include "bgpd/bgp_label.h"
+
+static void bgp_evpn_local_es_down(struct bgp *bgp,
+ struct bgp_evpn_es *es);
+static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp,
+ struct bgp_evpn_es *es);
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr);
+static void bgp_evpn_es_vtep_del(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr);
+static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es);
+static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es);
+static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi);
+
+esi_t zero_esi_buf, *zero_esi = &zero_esi_buf;
+
+/******************************************************************************
+ * per-ES (Ethernet Segment) routing table
+ *
+ * Following routes are added to the ES's routing table -
+ * 1. Local and remote ESR (Type-4)
+ * 2. Local EAD-per-ES (Type-1).
+ *
+ * Key for these routes is {ESI, VTEP-IP} so the path selection is practically
+ * a no-op i.e. all paths lead to same VTEP-IP (i.e. result in the same VTEP
+ * being added to same ES).
+ *
+ * Note the following routes go into the VNI routing table (instead of the
+ * ES routing table) -
+ * 1. Remote EAD-per-ES
+ * 2. Local and remote EAD-per-EVI
+ */
+
+/* Calculate the best path for a multi-homing (Type-1 or Type-4) route
+ * installed in the ES's routing table.
+ */
+static int bgp_evpn_es_route_select_install(struct bgp *bgp,
+ struct bgp_evpn_es *es,
+ struct bgp_node *rn)
+{
+ int ret = 0;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct bgp_path_info *old_select; /* old best */
+ struct bgp_path_info *new_select; /* new best */
+ struct bgp_path_info_pair old_and_new;
+
+ /* Compute the best path. */
+ bgp_best_selection(bgp, rn, &bgp->maxpaths[afi][safi],
+ &old_and_new, afi, safi);
+ old_select = old_and_new.old;
+ new_select = old_and_new.new;
+
+ /*
+ * If the best path hasn't changed - see if something needs to be
+ * updated
+ */
+ if (old_select && old_select == new_select
+ && old_select->type == ZEBRA_ROUTE_BGP
+ && old_select->sub_type == BGP_ROUTE_IMPORTED
+ && !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR)
+ && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED)
+ && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) {
+ if (bgp_zebra_has_route_changed(old_select)) {
+ bgp_evpn_es_vtep_add(bgp, es,
+ old_select->attr->nexthop,
+ true /*esr*/);
+ }
+ UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG);
+ bgp_zebra_clear_route_change_flags(rn);
+ return ret;
+ }
+
+ /* If the user did a "clear" this flag will be set */
+ UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR);
+
+ /* bestpath has changed; update relevant fields and install or uninstall
+ * into the zebra RIB.
+ */
+ if (old_select || new_select)
+ bgp_bump_version(rn);
+
+ if (old_select)
+ bgp_path_info_unset_flag(rn, old_select, BGP_PATH_SELECTED);
+ if (new_select) {
+ bgp_path_info_set_flag(rn, new_select, BGP_PATH_SELECTED);
+ bgp_path_info_unset_flag(rn, new_select, BGP_PATH_ATTR_CHANGED);
+ UNSET_FLAG(new_select->flags, BGP_PATH_MULTIPATH_CHG);
+ }
+
+ if (new_select && new_select->type == ZEBRA_ROUTE_BGP
+ && new_select->sub_type == BGP_ROUTE_IMPORTED) {
+ bgp_evpn_es_vtep_add(bgp, es,
+ new_select->attr->nexthop, true /*esr */);
+ } else {
+ if (old_select && old_select->type == ZEBRA_ROUTE_BGP
+ && old_select->sub_type == BGP_ROUTE_IMPORTED)
+ bgp_evpn_es_vtep_del(
+ bgp, es, old_select->attr->nexthop,
+ true /*esr*/);
+ }
+
+ /* Clear any route change flags. */
+ bgp_zebra_clear_route_change_flags(rn);
+
+ /* Reap old select bgp_path_info, if it has been removed */
+ if (old_select && CHECK_FLAG(old_select->flags, BGP_PATH_REMOVED))
+ bgp_path_info_reap(rn, old_select);
+
+ return ret;
+}
+
+/* Install Type-1/Type-4 route entry in the per-ES routing table */
+static int bgp_evpn_es_route_install(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct prefix_evpn *p,
+ struct bgp_path_info *parent_pi)
+{
+ int ret = 0;
+ struct bgp_node *rn = NULL;
+ struct bgp_path_info *pi = NULL;
+ struct attr *attr_new = NULL;
+
+ /* Create (or fetch) route within the VNI.
+ * NOTE: There is no RD here.
+ */
+ rn = bgp_node_get(es->route_table, (struct prefix *)p);
+
+ /* Check if route entry is already present. */
+ for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next)
+ if (pi->extra
+ && (struct bgp_path_info *)pi->extra->parent ==
+ parent_pi)
+ break;
+
+ if (!pi) {
+ /* Add (or update) attribute to hash. */
+ attr_new = bgp_attr_intern(parent_pi->attr);
+
+ /* Create new route with its attribute. */
+ pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0,
+ parent_pi->peer, attr_new, rn);
+ SET_FLAG(pi->flags, BGP_PATH_VALID);
+ bgp_path_info_extra_get(pi);
+ pi->extra->parent = bgp_path_info_lock(parent_pi);
+ bgp_dest_lock_node((struct bgp_node *)parent_pi->net);
+ bgp_path_info_add(rn, pi);
+ } else {
+ if (attrhash_cmp(pi->attr, parent_pi->attr)
+ && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
+ bgp_dest_unlock_node(rn);
+ return 0;
+ }
+ /* The attribute has changed. */
+ /* Add (or update) attribute to hash. */
+ attr_new = bgp_attr_intern(parent_pi->attr);
+
+ /* Restore route, if needed. */
+ if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED))
+ bgp_path_info_restore(rn, pi);
+
+ /* Mark if nexthop has changed. */
+ if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop))
+ SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED);
+
+ /* Unintern existing, set to new. */
+ bgp_attr_unintern(&pi->attr);
+ pi->attr = attr_new;
+ pi->uptime = bgp_clock();
+ }
+
+ /* Perform route selection and update zebra, if required. */
+ ret = bgp_evpn_es_route_select_install(bgp, es, rn);
+
+ bgp_dest_unlock_node(rn);
+
+ return ret;
+}
+
+/* Uninstall Type-1/Type-4 route entry from the ES routing table */
+static int bgp_evpn_es_route_uninstall(struct bgp *bgp, struct bgp_evpn_es *es,
+ struct prefix_evpn *p, struct bgp_path_info *parent_pi)
+{
+ int ret;
+ struct bgp_node *rn;
+ struct bgp_path_info *pi;
+
+ if (!es->route_table)
+ return 0;
+
+ /* Locate route within the ESI.
+ * NOTE: There is no RD here.
+ */
+ rn = bgp_node_lookup(es->route_table, (struct prefix *)p);
+ if (!rn)
+ return 0;
+
+ /* Find matching route entry. */
+ for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next)
+ if (pi->extra
+ && (struct bgp_path_info *)pi->extra->parent ==
+ parent_pi)
+ break;
+
+ if (!pi)
+ return 0;
+
+ /* Mark entry for deletion */
+ bgp_path_info_delete(rn, pi);
+
+ /* Perform route selection and update zebra, if required. */
+ ret = bgp_evpn_es_route_select_install(bgp, es, rn);
+
+ /* Unlock route node. */
+ bgp_dest_unlock_node(rn);
+
+ return ret;
+}
+
+/* Install or unistall a Tyoe-4 route in the per-ES routing table */
+int bgp_evpn_es_route_install_uninstall(struct bgp *bgp, struct bgp_evpn_es *es,
+ afi_t afi, safi_t safi, struct prefix_evpn *evp,
+ struct bgp_path_info *pi, int install)
+{
+ int ret = 0;
+
+ if (install)
+ ret = bgp_evpn_es_route_install(bgp, es, evp, pi);
+ else
+ ret = bgp_evpn_es_route_uninstall(bgp, es, evp, pi);
+
+ if (ret) {
+ flog_err(
+ EC_BGP_EVPN_FAIL,
+ "%u: Failed to %s EVPN %s route in ESI %s",
+ bgp->vrf_id,
+ install ? "install" : "uninstall",
+ "ES", es->esi_str);
+ return ret;
+ }
+ return 0;
+}
+
+/* Delete (and withdraw) local routes for specified ES from global and ES table.
+ * Also remove all remote routes from the per ES table. Invoked when ES
+ * is deleted.
+ */
+static void bgp_evpn_es_route_del_all(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ struct bgp_node *rn;
+ struct bgp_path_info *pi, *nextpi;
+
+ /* de-activate the ES */
+ bgp_evpn_local_es_down(bgp, es);
+ bgp_evpn_local_type1_evi_route_del(bgp, es);
+
+ /* Walk this ES's routing table and delete all routes. */
+ for (rn = bgp_table_top(es->route_table); rn;
+ rn = bgp_route_next(rn)) {
+ for (pi = bgp_dest_get_bgp_path_info(rn);
+ (pi != NULL) && (nextpi = pi->next, 1);
+ pi = nextpi) {
+ bgp_path_info_delete(rn, pi);
+ bgp_path_info_reap(rn, pi);
+ }
+ }
+}
+
+/*****************************************************************************
+ * Base APIs for creating MH routes (Type-1 or Type-4) on local ethernet
+ * segment updates.
+ */
+
+/* create or update local EVPN type1/type4 route entry.
+ *
+ * This could be in -
+ * the ES table if ESR/EAD-ES (or)
+ * the VNI table if EAD-EVI (or)
+ * the global table if ESR/EAD-ES/EAD-EVI
+ *
+ * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and
+ * ESR).
+ */
+static int bgp_evpn_mh_route_update(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct bgpevpn *vpn, afi_t afi,
+ safi_t safi, struct bgp_node *rn, struct attr *attr,
+ int add, struct bgp_path_info **ri, int *route_changed)
+{
+ struct bgp_path_info *tmp_pi = NULL;
+ struct bgp_path_info *local_pi = NULL; /* local route entry if any */
+ struct bgp_path_info *remote_pi = NULL; /* remote route entry if any */
+ struct attr *attr_new = NULL;
+ struct prefix_evpn *evp;
+
+ *ri = NULL;
+ evp = (struct prefix_evpn *)&rn->p;
+ *route_changed = 1;
+
+ /* locate the local and remote entries if any */
+ for (tmp_pi = bgp_dest_get_bgp_path_info(rn); tmp_pi;
+ tmp_pi = tmp_pi->next) {
+ if (tmp_pi->peer == bgp->peer_self
+ && tmp_pi->type == ZEBRA_ROUTE_BGP
+ && tmp_pi->sub_type == BGP_ROUTE_STATIC)
+ local_pi = tmp_pi;
+ if (tmp_pi->type == ZEBRA_ROUTE_BGP
+ && tmp_pi->sub_type == BGP_ROUTE_IMPORTED
+ && CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID))
+ remote_pi = tmp_pi;
+ }
+
+ /* we don't expect to see a remote_ri at this point as
+ * an ES route has {esi, vtep_ip} as the key in the ES-rt-table
+ * in the VNI-rt-table.
+ */
+ if (remote_pi) {
+ flog_err(
+ EC_BGP_ES_INVALID,
+ "%u ERROR: local es route for ESI: %s Vtep %s also learnt from remote",
+ bgp->vrf_id, es->esi_str,
+ inet_ntoa(es->originator_ip));
+ return -1;
+ }
+
+ if (!local_pi && !add)
+ return 0;
+
+ /* create or update the entry */
+ if (!local_pi) {
+
+ /* Add or update attribute to hash */
+ attr_new = bgp_attr_intern(attr);
+
+ /* Create new route with its attribute. */
+ tmp_pi = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0,
+ bgp->peer_self, attr_new, rn);
+ SET_FLAG(tmp_pi->flags, BGP_PATH_VALID);
+
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) {
+ bgp_path_info_extra_get(tmp_pi);
+ tmp_pi->extra->num_labels = 1;
+ if (vpn)
+ vni2label(vpn->vni, &tmp_pi->extra->label[0]);
+ else
+ tmp_pi->extra->label[0] = 0;
+ }
+
+ /* add the newly created path to the route-node */
+ bgp_path_info_add(rn, tmp_pi);
+ } else {
+ tmp_pi = local_pi;
+ if (attrhash_cmp(tmp_pi->attr, attr)
+ && !CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED))
+ *route_changed = 0;
+ else {
+ /* The attribute has changed.
+ * Add (or update) attribute to hash.
+ */
+ attr_new = bgp_attr_intern(attr);
+ bgp_path_info_set_flag(rn, tmp_pi,
+ BGP_PATH_ATTR_CHANGED);
+
+ /* Restore route, if needed. */
+ if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED))
+ bgp_path_info_restore(rn, tmp_pi);
+
+ /* Unintern existing, set to new. */
+ bgp_attr_unintern(&tmp_pi->attr);
+ tmp_pi->attr = attr_new;
+ tmp_pi->uptime = bgp_clock();
+ }
+ }
+
+ if (*route_changed) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("local ES %s vni %u route-type %s nexthop %s updated",
+ es->esi_str,
+ vpn ? vpn->vni : 0,
+ evp->prefix.route_type ==
+ BGP_EVPN_ES_ROUTE ? "esr" :
+ (vpn ? "ead-evi" : "ead-es"),
+ inet_ntoa(attr->mp_nexthop_global_in));
+ }
+
+ /* Return back the route entry. */
+ *ri = tmp_pi;
+ return 0;
+}
+
+/* Delete local EVPN ESR (type-4) and EAD (type-1) route
+ *
+ * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and
+ * ESR).
+ */
+static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es,
+ struct bgpevpn *vpn, struct prefix_evpn *p)
+{
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct bgp_path_info *pi;
+ struct bgp_node *rn = NULL; /* rn in esi table */
+ struct bgp_node *global_rn = NULL; /* rn in global table */
+ struct bgp_table *rt_table;
+ struct prefix_rd *prd;
+
+ if (vpn) {
+ rt_table = vpn->route_table;
+ prd = &vpn->prd;
+ } else {
+ rt_table = es->route_table;
+ prd = &es->prd;
+ }
+
+ /* First, locate the route node within the ESI or VNI.
+ * If it doesn't exist, ther is nothing to do.
+ * Note: there is no RD here.
+ */
+ rn = bgp_node_lookup(rt_table, (struct prefix *)p);
+ if (!rn)
+ return 0;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_RT))
+ zlog_debug("local ES %s vni %u route-type %s nexthop %s delete",
+ es->esi_str,
+ vpn ? vpn->vni : 0,
+ p->prefix.route_type == BGP_EVPN_ES_ROUTE ?
+ "esr" : (vpn ? "ead-evi" : "ead-es"),
+ inet_ntoa(es->originator_ip));
+
+ /* Next, locate route node in the global EVPN routing table.
+ * Note that this table is a 2-level tree (RD-level + Prefix-level)
+ */
+ global_rn = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi,
+ (const struct prefix_evpn *)p, prd);
+ if (global_rn) {
+
+ /* Delete route entry in the global EVPN table. */
+ delete_evpn_route_entry(bgp, afi, safi, global_rn, &pi);
+
+ /* Schedule for processing - withdraws to peers happen from
+ * this table.
+ */
+ if (pi)
+ bgp_process(bgp, global_rn, afi, safi);
+ bgp_dest_unlock_node(global_rn);
+ }
+
+ /*
+ * Delete route entry in the ESI or VNI routing table.
+ * This can just be removed.
+ */
+ delete_evpn_route_entry(bgp, afi, safi, rn, &pi);
+ if (pi)
+ bgp_path_info_reap(rn, pi);
+ bgp_dest_unlock_node(rn);
+ return 0;
+}
+
+/*****************************************************************************
+ * Ethernet Segment (Type-4) Routes
+ * ESRs are used for BUM handling. XXX - BUM support is planned for phase-2 i.e.
+ * this code is just a place holder for now
+ */
+/* Build extended community for EVPN ES (type-4) route */
+static void bgp_evpn_type4_route_extcomm_build(struct bgp_evpn_es *es,
+ struct attr *attr)
+{
+ struct ecommunity ecom_encap;
+ struct ecommunity ecom_es_rt;
+ struct ecommunity_val eval;
+ struct ecommunity_val eval_es_rt;
+ bgp_encap_types tnl_type;
+ struct ethaddr mac;
+
+ /* Encap */
+ tnl_type = BGP_ENCAP_TYPE_VXLAN;
+ memset(&ecom_encap, 0, sizeof(ecom_encap));
+ encode_encap_extcomm(tnl_type, &eval);
+ ecom_encap.size = 1;
+ ecom_encap.val = (uint8_t *)eval.val;
+ attr->ecommunity = ecommunity_dup(&ecom_encap);
+
+ /* ES import RT */
+ memset(&mac, 0, sizeof(struct ethaddr));
+ memset(&ecom_es_rt, 0, sizeof(ecom_es_rt));
+ es_get_system_mac(&es->esi, &mac);
+ encode_es_rt_extcomm(&eval_es_rt, &mac);
+ ecom_es_rt.size = 1;
+ ecom_es_rt.val = (uint8_t *)eval_es_rt.val;
+ attr->ecommunity =
+ ecommunity_merge(attr->ecommunity, &ecom_es_rt);
+
+ attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES);
+}
+
+/* Create or update local type-4 route */
+static int bgp_evpn_type4_route_update(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct prefix_evpn *p)
+{
+ int ret = 0;
+ int route_changed = 0;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct attr attr;
+ struct attr *attr_new = NULL;
+ struct bgp_node *rn = NULL;
+ struct bgp_path_info *pi = NULL;
+
+ memset(&attr, 0, sizeof(struct attr));
+
+ /* Build path-attribute for this route. */
+ bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
+ attr.nexthop = es->originator_ip;
+ attr.mp_nexthop_global_in = es->originator_ip;
+ attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+
+ /* Set up extended community. */
+ bgp_evpn_type4_route_extcomm_build(es, &attr);
+
+ /* First, create (or fetch) route node within the ESI. */
+ /* NOTE: There is no RD here. */
+ rn = bgp_node_get(es->route_table, (struct prefix *)p);
+
+ /* Create or update route entry. */
+ ret = bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi,
+ rn, &attr, 1, &pi, &route_changed);
+ if (ret != 0) {
+ flog_err(EC_BGP_ES_INVALID,
+ "%u ERROR: Failed to updated ES route ESI: %s VTEP %s",
+ bgp->vrf_id, es->esi_str,
+ inet_ntoa(es->originator_ip));
+ }
+
+ assert(pi);
+ attr_new = pi->attr;
+
+ /* Perform route selection;
+ * this is just to set the flags correctly
+ * as local route in the ES always wins.
+ */
+ bgp_evpn_es_route_select_install(bgp, es, rn);
+ bgp_dest_unlock_node(rn);
+
+ /* If this is a new route or some attribute has changed, export the
+ * route to the global table. The route will be advertised to peers
+ * from there. Note that this table is a 2-level tree (RD-level +
+ * Prefix-level) similar to L3VPN routes.
+ */
+ if (route_changed) {
+ struct bgp_path_info *global_pi;
+
+ rn = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi,
+ p, &es->prd);
+ bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi,
+ rn, attr_new, 1, &global_pi, &route_changed);
+
+ /* Schedule for processing and unlock node. */
+ bgp_process(bgp, rn, afi, safi);
+ bgp_dest_unlock_node(rn);
+ }
+
+ /* Unintern temporary. */
+ aspath_unintern(&attr.aspath);
+ return 0;
+}
+
+/* Delete local type-4 route */
+static int bgp_evpn_type4_route_delete(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct prefix_evpn *p)
+{
+ return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p);
+}
+
+/* Process remote/received EVPN type-4 route (advertise or withdraw) */
+int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi,
+ struct attr *attr, uint8_t *pfx, int psize,
+ uint32_t addpath_id)
+{
+ int ret;
+ esi_t esi;
+ uint8_t ipaddr_len;
+ struct in_addr vtep_ip;
+ struct prefix_rd prd;
+ struct prefix_evpn p;
+
+ /* Type-4 route should be either 23 or 35 bytes
+ * RD (8), ESI (10), ip-len (1), ip (4 or 16)
+ */
+ if (psize != BGP_EVPN_TYPE4_V4_PSIZE &&
+ psize != BGP_EVPN_TYPE4_V6_PSIZE) {
+ flog_err(EC_BGP_EVPN_ROUTE_INVALID,
+ "%u:%s - Rx EVPN Type-4 NLRI with invalid length %d",
+ peer->bgp->vrf_id, peer->host, psize);
+ return -1;
+ }
+
+ /* Make prefix_rd */
+ prd.family = AF_UNSPEC;
+ prd.prefixlen = 64;
+ memcpy(&prd.val, pfx, RD_BYTES);
+ pfx += RD_BYTES;
+
+ /* get the ESI */
+ memcpy(&esi, pfx, ESI_BYTES);
+ pfx += ESI_BYTES;
+
+
+ /* Get the IP. */
+ ipaddr_len = *pfx++;
+ if (ipaddr_len == IPV4_MAX_BITLEN) {
+ memcpy(&vtep_ip, pfx, IPV4_MAX_BYTELEN);
+ } else {
+ flog_err(
+ EC_BGP_EVPN_ROUTE_INVALID,
+ "%u:%s - Rx EVPN Type-4 NLRI with unsupported IP address length %d",
+ peer->bgp->vrf_id, peer->host, ipaddr_len);
+ return -1;
+ }
+
+ build_evpn_type4_prefix(&p, &esi, vtep_ip);
+ /* Process the route. */
+ if (attr) {
+ ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, 0, NULL);
+ } else {
+ ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, NULL);
+ }
+ return ret;
+}
+
+/* Check if a prefix belongs to the local ES */
+static bool bgp_evpn_type4_prefix_match(struct prefix_evpn *p,
+ struct bgp_evpn_es *es)
+{
+ return (p->prefix.route_type == BGP_EVPN_ES_ROUTE) &&
+ !memcmp(&p->prefix.es_addr.esi, &es->esi, sizeof(esi_t));
+}
+
+/* Import remote ESRs on local ethernet segment add */
+static int bgp_evpn_type4_remote_routes_import(struct bgp *bgp,
+ struct bgp_evpn_es *es, bool install)
+{
+ int ret;
+ afi_t afi;
+ safi_t safi;
+ char buf[PREFIX_STRLEN];
+ struct bgp_node *rd_rn, *rn;
+ struct bgp_table *table;
+ struct bgp_path_info *pi;
+
+ afi = AFI_L2VPN;
+ safi = SAFI_EVPN;
+
+ /* Walk entire global routing table and evaluate routes which could be
+ * imported into this Ethernet Segment.
+ */
+ for (rd_rn = bgp_table_top(bgp->rib[afi][safi]); rd_rn;
+ rd_rn = bgp_route_next(rd_rn)) {
+ table = bgp_dest_get_bgp_table_info(rd_rn);
+ if (!table)
+ continue;
+
+ for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
+ struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
+
+ for (pi = bgp_dest_get_bgp_path_info(rn); pi;
+ pi = pi->next) {
+ /*
+ * Consider "valid" remote routes applicable for
+ * this ES.
+ */
+ if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID)
+ && pi->type == ZEBRA_ROUTE_BGP
+ && pi->sub_type == BGP_ROUTE_NORMAL))
+ continue;
+
+ if (!bgp_evpn_type4_prefix_match(evp, es))
+ continue;
+
+ if (install)
+ ret = bgp_evpn_es_route_install(
+ bgp, es, evp, pi);
+ else
+ ret = bgp_evpn_es_route_uninstall(
+ bgp, es, evp, pi);
+
+ if (ret) {
+ flog_err(
+ EC_BGP_EVPN_FAIL,
+ "Failed to %s EVPN %s route in ESI %s",
+ install ? "install"
+ : "uninstall",
+ prefix2str(evp, buf,
+ sizeof(buf)),
+ es->esi_str);
+ return ret;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/*****************************************************************************
+ * Ethernet Auto Discovery (EAD/Type-1) route handling
+ * There are two types of EAD routes -
+ * 1. EAD-per-ES - Key: {ESI, ET=0xffffffff}
+ * 2. EAD-per-EVI - Key: {ESI, ET=0}
+ */
+
+/* Extended communities associated with EAD-per-ES */
+static void bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es *es,
+ struct attr *attr)
+{
+ struct ecommunity ecom_encap;
+ struct ecommunity ecom_esi_label;
+ struct ecommunity_val eval;
+ struct ecommunity_val eval_esi_label;
+ bgp_encap_types tnl_type;
+ struct listnode *evi_node, *rt_node;
+ struct ecommunity *ecom;
+ struct bgp_evpn_es_evi *es_evi;
+
+ /* Encap */
+ tnl_type = BGP_ENCAP_TYPE_VXLAN;
+ memset(&ecom_encap, 0, sizeof(ecom_encap));
+ encode_encap_extcomm(tnl_type, &eval);
+ ecom_encap.size = 1;
+ ecom_encap.val = (uint8_t *)eval.val;
+ attr->ecommunity = ecommunity_dup(&ecom_encap);
+
+ /* ESI label */
+ encode_esi_label_extcomm(&eval_esi_label,
+ false /*single_active*/);
+ ecom_esi_label.size = 1;
+ ecom_esi_label.val = (uint8_t *)eval_esi_label.val;
+ attr->ecommunity =
+ ecommunity_merge(attr->ecommunity, &ecom_esi_label);
+
+ /* Add export RTs for all L2-VNIs associated with this ES */
+ /* XXX - suppress EAD-ES advertisment if there are no EVIs associated
+ * with it.
+ */
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list,
+ evi_node, es_evi)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ continue;
+ for (ALL_LIST_ELEMENTS_RO(es_evi->vpn->export_rtl,
+ rt_node, ecom))
+ attr->ecommunity = ecommunity_merge(attr->ecommunity,
+ ecom);
+ }
+
+ attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES);
+}
+
+/* Extended communities associated with EAD-per-EVI */
+static void bgp_evpn_type1_evi_route_extcomm_build(struct bgp_evpn_es *es,
+ struct bgpevpn *vpn, struct attr *attr)
+{
+ struct ecommunity ecom_encap;
+ struct ecommunity_val eval;
+ bgp_encap_types tnl_type;
+ struct listnode *rt_node;
+ struct ecommunity *ecom;
+
+ /* Encap */
+ tnl_type = BGP_ENCAP_TYPE_VXLAN;
+ memset(&ecom_encap, 0, sizeof(ecom_encap));
+ encode_encap_extcomm(tnl_type, &eval);
+ ecom_encap.size = 1;
+ ecom_encap.val = (uint8_t *)eval.val;
+ attr->ecommunity = ecommunity_dup(&ecom_encap);
+
+ /* Add export RTs for the L2-VNI */
+ for (ALL_LIST_ELEMENTS_RO(vpn->export_rtl, rt_node, ecom))
+ attr->ecommunity = ecommunity_merge(attr->ecommunity, ecom);
+
+ attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES);
+}
+
+/* Update EVPN EAD (type-1) route -
+ * vpn - valid for EAD-EVI routes and NULL for EAD-ES routes
+ */
+static int bgp_evpn_type1_route_update(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct bgpevpn *vpn,
+ struct prefix_evpn *p)
+{
+ int ret = 0;
+ afi_t afi = AFI_L2VPN;
+ safi_t safi = SAFI_EVPN;
+ struct attr attr;
+ struct attr *attr_new = NULL;
+ struct bgp_node *rn = NULL;
+ struct bgp_path_info *pi = NULL;
+ int route_changed = 0;
+ struct prefix_rd *global_rd;
+
+ memset(&attr, 0, sizeof(struct attr));
+
+ /* Build path-attribute for this route. */
+ bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
+ attr.nexthop = es->originator_ip;
+ attr.mp_nexthop_global_in = es->originator_ip;
+ attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+
+ if (vpn) {
+ /* EAD-EVI route update */
+ /* MPLS label */
+ vni2label(vpn->vni, &(attr.label));
+
+ /* Set up extended community */
+ bgp_evpn_type1_evi_route_extcomm_build(es, vpn, &attr);
+
+ /* First, create (or fetch) route node within the VNI. */
+ rn = bgp_node_get(vpn->route_table, (struct prefix *)p);
+
+ /* Create or update route entry. */
+ ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi,
+ rn, &attr, 1, &pi, &route_changed);
+ if (ret != 0) {
+ flog_err(EC_BGP_ES_INVALID,
+ "%u Failed to update EAD-EVI route ESI: %s VNI %u VTEP %s",
+ bgp->vrf_id, es->esi_str, vpn->vni,
+ inet_ntoa(es->originator_ip));
+ }
+ global_rd = &vpn->prd;
+ } else {
+ /* EAD-ES route update */
+ /* MPLS label is 0 for EAD-ES route */
+
+ /* Set up extended community */
+ bgp_evpn_type1_es_route_extcomm_build(es, &attr);
+
+ /* First, create (or fetch) route node within the ES. */
+ /* NOTE: There is no RD here. */
+ /* XXX: fragment ID must be included as a part of the prefix. */
+ rn = bgp_node_get(es->route_table, (struct prefix *)p);
+
+ /* Create or update route entry. */
+ ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi,
+ rn, &attr, 1, &pi, &route_changed);
+ if (ret != 0) {
+ flog_err(EC_BGP_ES_INVALID,
+ "%u ERROR: Failed to updated EAD-EVI route ESI: %s VTEP %s",
+ bgp->vrf_id, es->esi_str,
+ inet_ntoa(es->originator_ip));
+ }
+ global_rd = &es->prd;
+ }
+
+
+ assert(pi);
+ attr_new = pi->attr;
+
+ /* Perform route selection;
+ * this is just to set the flags correctly as local route in
+ * the ES always wins.
+ */
+ evpn_route_select_install(bgp, vpn, rn);
+ bgp_dest_unlock_node(rn);
+
+ /* If this is a new route or some attribute has changed, export the
+ * route to the global table. The route will be advertised to peers
+ * from there. Note that this table is a 2-level tree (RD-level +
+ * Prefix-level) similar to L3VPN routes.
+ */
+ if (route_changed) {
+ struct bgp_path_info *global_pi;
+
+ rn = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi,
+ p, global_rd);
+ bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi,
+ rn, attr_new, 1, &global_pi, &route_changed);
+
+ /* Schedule for processing and unlock node. */
+ bgp_process(bgp, rn, afi, safi);
+ bgp_dest_unlock_node(rn);
+ }
+
+ /* Unintern temporary. */
+ aspath_unintern(&attr.aspath);
+ return 0;
+}
+
+/* Delete local Type-1 route */
+static int bgp_evpn_type1_es_route_delete(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct prefix_evpn *p)
+{
+ return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p);
+}
+
+static int bgp_evpn_type1_evi_route_delete(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct bgpevpn *vpn,
+ struct prefix_evpn *p)
+{
+ return bgp_evpn_mh_route_delete(bgp, es, vpn, p);
+}
+
+/* Generate EAD-EVI for all VNIs */
+static void bgp_evpn_local_type1_evi_route_add(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct listnode *evi_node;
+ struct prefix_evpn p;
+ struct bgp_evpn_es_evi *es_evi;
+
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI))
+ /* EAD-EVI route add for this ES is already done */
+ return;
+
+ SET_FLAG(es->flags, BGP_EVPNES_ADV_EVI);
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ continue;
+ if (bgp_evpn_type1_route_update(bgp, es, es_evi->vpn, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: Type4 route creation failure for ESI %s",
+ bgp->vrf_id, es->esi_str);
+ }
+}
+
+/*
+ * Withdraw EAD-EVI for all VNIs
+ */
+static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct listnode *evi_node;
+ struct prefix_evpn p;
+ struct bgp_evpn_es_evi *es_evi;
+
+ /* Delete and withdraw locally learnt EAD-EVI route */
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI))
+ /* EAD-EVI route has not been advertised for this ES */
+ return;
+
+ UNSET_FLAG(es->flags, BGP_EVPNES_ADV_EVI);
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ continue;
+ if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: Type4 route creation failure for ESI %s",
+ bgp->vrf_id, es->esi_str);
+ }
+}
+
+/*
+ * Process received EVPN type-1 route (advertise or withdraw).
+ */
+int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi,
+ struct attr *attr, uint8_t *pfx, int psize,
+ uint32_t addpath_id)
+{
+ int ret;
+ struct prefix_rd prd;
+ esi_t esi;
+ uint32_t eth_tag;
+ mpls_label_t label;
+ struct in_addr vtep_ip;
+ struct prefix_evpn p;
+
+ if (psize != BGP_EVPN_TYPE1_PSIZE) {
+ flog_err(EC_BGP_EVPN_ROUTE_INVALID,
+ "%u:%s - Rx EVPN Type-1 NLRI with invalid length %d",
+ peer->bgp->vrf_id, peer->host, psize);
+ return -1;
+ }
+
+ /* Make prefix_rd */
+ prd.family = AF_UNSPEC;
+ prd.prefixlen = 64;
+ memcpy(&prd.val, pfx, RD_BYTES);
+ pfx += RD_BYTES;
+
+ /* get the ESI */
+ memcpy(&esi, pfx, ESI_BYTES);
+ pfx += ESI_BYTES;
+
+ /* Copy Ethernet Tag */
+ memcpy(&eth_tag, pfx, EVPN_ETH_TAG_BYTES);
+ eth_tag = ntohl(eth_tag);
+ pfx += EVPN_ETH_TAG_BYTES;
+
+ memcpy(&label, pfx, BGP_LABEL_BYTES);
+
+ /* EAD route prefix doesn't include the nexthop in the global
+ * table
+ */
+ vtep_ip.s_addr = 0;
+ build_evpn_type1_prefix(&p, eth_tag, &esi, vtep_ip);
+ /* Process the route. */
+ if (attr) {
+ ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, 0, NULL);
+ } else {
+ ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
+ afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
+ &prd, NULL, 0, NULL);
+ }
+ return ret;
+}
+
+/*****************************************************************************/
+/* Ethernet Segment Management
+ * 1. Ethernet Segment is a collection of links attached to the same
+ * server (MHD) or switch (MHN)
+ * 2. An Ethernet Segment can span multiple PEs and is identified by the
+ * 10-byte ES-ID.
+ * 3. Local ESs are configured in zebra and sent to BGP
+ * 4. Remote ESs are created by BGP when one or more ES-EVIs reference it i.e.
+ * created on first reference and release on last de-reference
+ * 5. An ES can be both local and remote. Infact most local ESs are expected
+ * to have an ES peer.
+ */
+
+/* A list of remote VTEPs is maintained for each ES. This list includes -
+ * 1. VTEPs for which we have imported the ESR i.e. ES-peers
+ * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI
+ * have been imported into one or more VNIs
+ */
+static int bgp_evpn_es_vtep_cmp(void *p1, void *p2)
+{
+ const struct bgp_evpn_es_vtep *es_vtep1 = p1;
+ const struct bgp_evpn_es_vtep *es_vtep2 = p2;
+
+ return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr;
+}
+
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_new(struct bgp_evpn_es *es,
+ struct in_addr vtep_ip)
+{
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ es_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_VTEP, sizeof(*es_vtep));
+
+ es_vtep->es = es;
+ es_vtep->vtep_ip.s_addr = vtep_ip.s_addr;
+ listnode_init(&es_vtep->es_listnode, es_vtep);
+ listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode);
+
+ return es_vtep;
+}
+
+static void bgp_evpn_es_vtep_free(struct bgp_evpn_es_vtep *es_vtep)
+{
+ struct bgp_evpn_es *es = es_vtep->es;
+
+ if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR) ||
+ es_vtep->evi_cnt)
+ /* as long as there is some reference we can't free it */
+ return;
+
+ list_delete_node(es->es_vtep_list, &es_vtep->es_listnode);
+ XFREE(MTYPE_BGP_EVPN_ES_VTEP, es_vtep);
+}
+
+/* check if VTEP is already part of the list */
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_find(struct bgp_evpn_es *es,
+ struct in_addr vtep_ip)
+{
+ struct listnode *node = NULL;
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr)
+ return es_vtep;
+ }
+ return NULL;
+}
+
+/* Send the remote ES to zebra for NHG programming */
+static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp,
+ struct bgp_evpn_es_vtep *es_vtep, bool add)
+{
+ struct bgp_evpn_es *es = es_vtep->es;
+ struct stream *s;
+
+ /* Check socket. */
+ if (!zclient || zclient->sock < 0)
+ return 0;
+
+ /* Don't try to register if Zebra doesn't know of this instance. */
+ if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) {
+ if (BGP_DEBUG(zebra, ZEBRA))
+ zlog_debug("No zebra instance, not installing remote es %s",
+ es->esi_str);
+ return 0;
+ }
+
+ s = zclient->obuf;
+ stream_reset(s);
+
+ zclient_create_header(s,
+ add ? ZEBRA_REMOTE_ES_VTEP_ADD : ZEBRA_REMOTE_ES_VTEP_DEL,
+ bgp->vrf_id);
+ stream_put(s, &es->esi, sizeof(esi_t));
+ stream_put_ipv4(s, es_vtep->vtep_ip.s_addr);
+
+ stream_putw_at(s, 0, stream_get_endp(s));
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("Tx %s Remote ESI %s VTEP %s",
+ add ? "ADD" : "DEL", es->esi_str,
+ inet_ntoa(es_vtep->vtep_ip));
+
+ return zclient_send_message(zclient);
+}
+
+static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp,
+ struct bgp_evpn_es_vtep *es_vtep)
+{
+ bool old_active;
+ bool new_active;
+
+ old_active = !!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+ /* currently we need an active EVI reference to use the VTEP as
+ * a nexthop. this may change...
+ */
+ if (es_vtep->evi_cnt)
+ SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+ else
+ UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+
+ new_active = !!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
+
+ if (old_active == new_active)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vtep %s %s",
+ es_vtep->es->esi_str,
+ inet_ntoa(es_vtep->vtep_ip),
+ new_active ? "active" : "inactive");
+
+ /* send remote ES to zebra */
+ bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active);
+
+ /* queue up the es for background consistency checks */
+ bgp_evpn_es_cons_checks_pend_add(es_vtep->es);
+}
+
+static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr)
+{
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip);
+
+ if (!es_vtep)
+ es_vtep = bgp_evpn_es_vtep_new(es, vtep_ip);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vtep %s add %s",
+ es_vtep->es->esi_str,
+ inet_ntoa(es_vtep->vtep_ip),
+ esr ? "esr" : "ead");
+
+ if (esr)
+ SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR);
+ else
+ ++es_vtep->evi_cnt;
+
+ bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep);
+
+ return es_vtep;
+}
+
+static void bgp_evpn_es_vtep_do_del(struct bgp *bgp,
+ struct bgp_evpn_es_vtep *es_vtep, bool esr)
+{
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s vtep %s del %s",
+ es_vtep->es->esi_str,
+ inet_ntoa(es_vtep->vtep_ip),
+ esr ? "esr" : "ead");
+ if (esr) {
+ UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR);
+ } else {
+ if (es_vtep->evi_cnt)
+ --es_vtep->evi_cnt;
+ }
+
+ bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep);
+ bgp_evpn_es_vtep_free(es_vtep);
+}
+
+static void bgp_evpn_es_vtep_del(struct bgp *bgp,
+ struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr)
+{
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip);
+ if (es_vtep)
+ bgp_evpn_es_vtep_do_del(bgp, es_vtep, esr);
+}
+
+/* compare ES-IDs for the global ES RB tree */
+static int bgp_es_rb_cmp(const struct bgp_evpn_es *es1,
+ const struct bgp_evpn_es *es2)
+{
+ return memcmp(&es1->esi, &es2->esi, ESI_BYTES);
+}
+RB_GENERATE(bgp_es_rb_head, bgp_evpn_es, rb_node, bgp_es_rb_cmp);
+
+struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi)
+{
+ struct bgp_evpn_es tmp;
+
+ memcpy(&tmp.esi, esi, sizeof(esi_t));
+ return RB_FIND(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, &tmp);
+}
+
+static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi)
+{
+ struct bgp_evpn_es *es;
+
+ if (!bgp)
+ return NULL;
+
+ es = XCALLOC(MTYPE_BGP_EVPN_ES, sizeof(struct bgp_evpn_es));
+
+ /* set the ESI */
+ memcpy(&es->esi, esi, sizeof(esi_t));
+
+ /* Initialise the VTEP list */
+ es->es_vtep_list = list_new();
+ listset_app_node_mem(es->es_vtep_list);
+ es->es_vtep_list->cmp = bgp_evpn_es_vtep_cmp;
+
+ esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str));
+
+ /* Initialize the ES routing table */
+ es->route_table = bgp_table_init(bgp, AFI_L2VPN, SAFI_EVPN);
+
+ /* Add to rb_tree */
+ if (RB_INSERT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es)) {
+ XFREE(MTYPE_BGP_EVPN_ES, es);
+ return NULL;
+ }
+
+ /* Initialise the ES-EVI list */
+ es->es_evi_list = list_new();
+ listset_app_node_mem(es->es_evi_list);
+
+ QOBJ_REG(es, bgp_evpn_es);
+
+ return es;
+}
+
+/* Free a given ES -
+ * This just frees appropriate memory, caller should have taken other
+ * needed actions.
+ */
+static void bgp_evpn_es_free(struct bgp_evpn_es *es)
+{
+ if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE))
+ return;
+
+ /* cleanup resources maintained against the ES */
+ list_delete(&es->es_evi_list);
+ list_delete(&es->es_vtep_list);
+ bgp_table_unlock(es->route_table);
+
+ /* remove the entry from various databases */
+ RB_REMOVE(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es);
+ bgp_evpn_es_cons_checks_pend_del(es);
+
+ QOBJ_UNREG(es);
+ XFREE(MTYPE_BGP_EVPN_ES, es);
+}
+
+/* init local info associated with the ES */
+static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ char buf[BGP_EVPN_PREFIX_RD_LEN];
+
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL))
+ return;
+
+ SET_FLAG(es->flags, BGP_EVPNES_LOCAL);
+ listnode_init(&es->es_listnode, es);
+ listnode_add(bgp_mh_info->local_es_list, &es->es_listnode);
+
+ /* auto derive RD for this es */
+ bf_assign_index(bm->rd_idspace, es->rd_id);
+ es->prd.family = AF_UNSPEC;
+ es->prd.prefixlen = 64;
+ snprintf(buf, sizeof(buf), "%s:%hu", inet_ntoa(bgp->router_id),
+ es->rd_id);
+ (void)str2prefix_rd(buf, &es->prd);
+}
+
+/* clear any local info associated with the ES */
+static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es)
+{
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL))
+ return;
+
+ UNSET_FLAG(es->flags, BGP_EVPNES_LOCAL);
+
+ /* remove from the ES local list */
+ list_delete_node(bgp_mh_info->local_es_list, &es->es_listnode);
+
+ bf_release_index(bm->rd_idspace, es->rd_id);
+
+ bgp_evpn_es_free(es);
+}
+
+/* eval remote info associated with the ES */
+static void bgp_evpn_es_remote_info_re_eval(struct bgp_evpn_es *es)
+{
+ if (es->remote_es_evi_cnt) {
+ SET_FLAG(es->flags, BGP_EVPNES_REMOTE);
+ } else {
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_REMOTE)) {
+ UNSET_FLAG(es->flags, BGP_EVPNES_REMOTE);
+ bgp_evpn_es_free(es);
+ }
+ }
+}
+
+/* Process ES link oper-down by withdrawing ES-EAD and ESR */
+static void bgp_evpn_local_es_down(struct bgp *bgp,
+ struct bgp_evpn_es *es)
+{
+ struct prefix_evpn p;
+ int ret;
+
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP))
+ return;
+
+ UNSET_FLAG(es->flags, BGP_EVPNES_OPER_UP);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("local es %s down", es->esi_str);
+
+ /* withdraw ESR */
+ /* Delete and withdraw locally learnt ES route */
+ build_evpn_type4_prefix(&p, &es->esi, es->originator_ip);
+ ret = bgp_evpn_type4_route_delete(bgp, es, &p);
+ if (ret) {
+ flog_err(EC_BGP_EVPN_ROUTE_DELETE,
+ "%u failed to delete type-4 route for ESI %s",
+ bgp->vrf_id, es->esi_str);
+ }
+
+ /* withdraw EAD-EVI */
+ if (!bgp_mh_info->ead_evi_adv_for_down_links)
+ bgp_evpn_local_type1_evi_route_del(bgp, es);
+
+ /* withdraw EAD-ES */
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
+ &es->esi, es->originator_ip);
+ ret = bgp_evpn_type1_es_route_delete(bgp, es, &p);
+ if (ret) {
+ flog_err(EC_BGP_EVPN_ROUTE_DELETE,
+ "%u failed to delete type-1 route for ESI %s",
+ bgp->vrf_id, es->esi_str);
+ }
+}
+
+/* Process ES link oper-up by generating ES-EAD and ESR */
+static void bgp_evpn_local_es_up(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ struct prefix_evpn p;
+
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP))
+ return;
+
+ SET_FLAG(es->flags, BGP_EVPNES_OPER_UP);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("local es %s up", es->esi_str);
+
+ /* generate ESR */
+ build_evpn_type4_prefix(&p, &es->esi, es->originator_ip);
+ if (bgp_evpn_type4_route_update(bgp, es, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: Type4 route creation failure for ESI %s",
+ bgp->vrf_id, es->esi_str);
+
+ /* generate EAD-EVI */
+ bgp_evpn_local_type1_evi_route_add(bgp, es);
+
+ /* generate EAD-ES */
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
+ &es->esi, es->originator_ip);
+ bgp_evpn_type1_route_update(bgp, es, NULL, &p);
+}
+
+static void bgp_evpn_local_es_do_del(struct bgp *bgp, struct bgp_evpn_es *es)
+{
+ struct bgp_evpn_es_evi *es_evi;
+ struct listnode *evi_node, *evi_next_node;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("del local es %s", es->esi_str);
+
+ /* Delete all local EVPN ES routes from ESI table
+ * and schedule for processing (to withdraw from peers))
+ */
+ bgp_evpn_es_route_del_all(bgp, es);
+
+ /* release all local ES EVIs associated with the ES */
+ for (ALL_LIST_ELEMENTS(es->es_evi_list, evi_node,
+ evi_next_node, es_evi)) {
+ bgp_evpn_local_es_evi_do_del(es_evi);
+ }
+
+ /* Clear local info associated with the ES and free it up if there is
+ * no remote reference
+ */
+ bgp_evpn_es_local_info_clear(es);
+}
+
+bool bgp_evpn_is_esi_local(esi_t *esi)
+{
+ struct bgp_evpn_es *es = NULL;
+
+ /* Lookup ESI hash - should exist. */
+ es = bgp_evpn_es_find(esi);
+ return es ? !!(es->flags & BGP_EVPNES_LOCAL) : false;
+}
+
+int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi)
+{
+ struct bgp_evpn_es *es = NULL;
+
+ /* Lookup ESI hash - should exist. */
+ es = bgp_evpn_es_find(esi);
+ if (!es) {
+ flog_warn(EC_BGP_EVPN_ESI,
+ "%u: ES %s missing at local ES DEL",
+ bgp->vrf_id, es->esi_str);
+ return -1;
+ }
+
+ bgp_evpn_local_es_do_del(bgp, es);
+ return 0;
+}
+
+/* Handle device to ES id association. Results in the creation of a local
+ * ES.
+ */
+int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
+ struct in_addr originator_ip, bool oper_up)
+{
+ char buf[ESI_STR_LEN];
+ struct bgp_evpn_es *es;
+ bool new_es = true;
+
+ /* create the new es */
+ es = bgp_evpn_es_find(esi);
+ if (es) {
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL))
+ new_es = false;
+ } else {
+ es = bgp_evpn_es_new(bgp, esi);
+ if (!es) {
+ flog_err(EC_BGP_ES_CREATE,
+ "%u: Failed to allocate ES entry for ESI %s - at Local ES Add",
+ bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf)));
+ return -1;
+ }
+ }
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add local es %s orig-ip %s",
+ es->esi_str,
+ inet_ntoa(originator_ip));
+
+ es->originator_ip = originator_ip;
+ bgp_evpn_es_local_info_set(bgp, es);
+
+ /* import all remote Type-4 routes in the ES table */
+ if (new_es)
+ bgp_evpn_type4_remote_routes_import(bgp, es,
+ true /* install */);
+
+ /* create and advertise EAD-EVI routes for the ES -
+ * XXX - till an ES-EVI reference is created there is really nothing to
+ * advertise
+ */
+ if (bgp_mh_info->ead_evi_adv_for_down_links)
+ bgp_evpn_local_type1_evi_route_add(bgp, es);
+
+ /* If the ES link is operationally up generate EAD-ES. EAD-EVI
+ * can be generated even if the link is inactive.
+ */
+ if (oper_up)
+ bgp_evpn_local_es_up(bgp, es);
+ else
+ bgp_evpn_local_es_down(bgp, es);
+
+ return 0;
+}
+
+static char *bgp_evpn_es_vteps_str(char *vtep_str, struct bgp_evpn_es *es,
+ uint8_t vtep_str_size)
+{
+ char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ];
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *es_vtep;
+ bool first = true;
+
+ vtep_str[0] = '\0';
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ vtep_flag_str[0] = '\0';
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+ strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str));
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE)
+ strlcat(vtep_flag_str, "A", sizeof(vtep_flag_str));
+
+ if (!strlen(vtep_flag_str))
+ strlcat(vtep_flag_str, "-", sizeof(vtep_flag_str));
+ if (first)
+ first = false;
+ else
+ strlcat(vtep_str, ",", vtep_str_size);
+ strlcat(vtep_str, inet_ntoa(es_vtep->vtep_ip), vtep_str_size);
+ strlcat(vtep_str, "(", vtep_str_size);
+ strlcat(vtep_str, vtep_flag_str, vtep_str_size);
+ strlcat(vtep_str, ")", vtep_str_size);
+ }
+
+ return vtep_str;
+}
+
+static inline void json_array_string_add(json_object *json, const char *str)
+{
+ json_object_array_add(json, json_object_new_string(str));
+}
+
+static void bgp_evpn_es_json_vtep_fill(json_object *json_vteps,
+ struct bgp_evpn_es_vtep *es_vtep)
+{
+ json_object *json_vtep_entry;
+ json_object *json_flags;
+
+ json_vtep_entry = json_object_new_object();
+
+ json_object_string_add(json_vtep_entry, "vtep_ip",
+ inet_ntoa(es_vtep->vtep_ip));
+ if (es_vtep->flags & (BGP_EVPNES_VTEP_ESR |
+ BGP_EVPNES_VTEP_ACTIVE)) {
+ json_flags = json_object_new_array();
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+ json_array_string_add(json_flags, "esr");
+ if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE)
+ json_array_string_add(json_flags, "active");
+ json_object_object_add(json_vtep_entry, "flags", json_flags);
+ }
+
+ json_object_array_add(json_vteps,
+ json_vtep_entry);
+}
+
+static void bgp_evpn_es_show_entry(struct vty *vty,
+ struct bgp_evpn_es *es, json_object *json)
+{
+ char buf1[RD_ADDRSTRLEN];
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ if (json) {
+ json_object *json_vteps;
+ json_object *json_types;
+
+ json_object_string_add(json, "esi", es->esi_str);
+ json_object_string_add(json, "rd",
+ prefix_rd2str(&es->prd, buf1,
+ sizeof(buf1)));
+
+ if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) {
+ json_types = json_object_new_array();
+ if (es->flags & BGP_EVPNES_LOCAL)
+ json_array_string_add(json_types, "local");
+ if (es->flags & BGP_EVPNES_REMOTE)
+ json_array_string_add(json_types, "remote");
+ json_object_object_add(json, "type", json_types);
+ }
+
+ if (listcount(es->es_vtep_list)) {
+ json_vteps = json_object_new_array();
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list,
+ node, es_vtep)) {
+ bgp_evpn_es_json_vtep_fill(json_vteps, es_vtep);
+ }
+ json_object_object_add(json, "vteps", json_vteps);
+ }
+ json_object_int_add(json, "vniCount",
+ listcount(es->es_evi_list));
+ } else {
+ char type_str[4];
+ char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
+
+ type_str[0] = '\0';
+ if (es->flags & BGP_EVPNES_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es->flags & BGP_EVPNES_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+ if (es->inconsistencies)
+ strlcat(type_str, "I", sizeof(type_str));
+
+ bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str));
+
+ if (es->flags & BGP_EVPNES_LOCAL)
+ prefix_rd2str(&es->prd, buf1, sizeof(buf1));
+ else
+ strlcpy(buf1, "-", sizeof(buf1));
+
+ vty_out(vty, "%-30s %-5s %-21s %-8d %s\n",
+ es->esi_str, type_str, buf1,
+ listcount(es->es_evi_list), vtep_str);
+ }
+}
+
+static void bgp_evpn_es_show_entry_detail(struct vty *vty,
+ struct bgp_evpn_es *es, json_object *json)
+{
+ if (json) {
+ json_object *json_flags;
+ json_object *json_incons;
+
+ /* Add the "brief" info first */
+ bgp_evpn_es_show_entry(vty, es, json);
+ if (es->flags & (BGP_EVPNES_OPER_UP | BGP_EVPNES_ADV_EVI)) {
+ json_flags = json_object_new_array();
+ if (es->flags & BGP_EVPNES_OPER_UP)
+ json_array_string_add(json_flags, "up");
+ if (es->flags & BGP_EVPNES_ADV_EVI)
+ json_array_string_add(json_flags,
+ "advertiseEVI");
+ json_object_object_add(json, "flags", json_flags);
+ }
+ json_object_string_add(json, "originator_ip",
+ inet_ntoa(es->originator_ip));
+ json_object_int_add(json, "remoteVniCount",
+ es->remote_es_evi_cnt);
+ json_object_int_add(json, "inconsistentVniVtepCount",
+ es->incons_evi_vtep_cnt);
+ if (es->inconsistencies) {
+ json_incons = json_object_new_array();
+ if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST)
+ json_array_string_add(json_incons,
+ "vni-vtep-mismatch");
+ json_object_object_add(json, "inconsistencies",
+ json_incons);
+ }
+ } else {
+ char incons_str[BGP_EVPNES_INCONS_STR_SZ];
+ char type_str[4];
+ char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
+ char buf1[RD_ADDRSTRLEN];
+
+ type_str[0] = '\0';
+ if (es->flags & BGP_EVPNES_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es->flags & BGP_EVPNES_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+
+ bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str));
+ if (!strlen(vtep_str))
+ strlcpy(buf1, "-", sizeof(buf1));
+
+ if (es->flags & BGP_EVPNES_LOCAL)
+ prefix_rd2str(&es->prd, buf1, sizeof(buf1));
+ else
+ strlcpy(buf1, "-", sizeof(buf1));
+
+ vty_out(vty, "ESI: %s\n", es->esi_str);
+ vty_out(vty, " Type: %s\n", type_str);
+ vty_out(vty, " RD: %s\n", buf1);
+ vty_out(vty, " Originator-IP: %s\n",
+ inet_ntoa(es->originator_ip));
+ vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list));
+ vty_out(vty, " Remote VNI Count: %d\n",
+ es->remote_es_evi_cnt);
+ vty_out(vty, " Inconsistent VNI VTEP Count: %d\n",
+ es->incons_evi_vtep_cnt);
+ if (es->inconsistencies) {
+ incons_str[0] = '\0';
+ if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST)
+ strlcat(incons_str, "vni-vtep-mismatch",
+ sizeof(incons_str));
+ } else {
+ strlcpy(incons_str, "-", sizeof(incons_str));
+ }
+ vty_out(vty, " Inconsistencies: %s\n",
+ incons_str);
+ vty_out(vty, " VTEPs: %s\n", vtep_str);
+ vty_out(vty, "\n");
+ }
+}
+
+/* Display all ESs */
+void bgp_evpn_es_show(struct vty *vty, bool uj, bool detail)
+{
+ struct bgp_evpn_es *es;
+ json_object *json_array = NULL;
+ json_object *json = NULL;
+
+ if (uj) {
+ /* create an array of ESs */
+ json_array = json_object_new_array();
+ } else {
+ if (!detail) {
+ vty_out(vty,
+ "ES Flags: L local, R remote, I inconsistent\n");
+ vty_out(vty,
+ "VTEP Flags: E ESR/Type-4, A active nexthop\n");
+ vty_out(vty,
+ "%-30s %-5s %-21s %-8s %s\n",
+ "ESI", "Flags", "RD", "#VNIs", "VTEPs");
+ }
+ }
+
+ RB_FOREACH(es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree) {
+ if (uj)
+ /* create a separate json object for each ES */
+ json = json_object_new_object();
+ if (detail)
+ bgp_evpn_es_show_entry_detail(vty, es, json);
+ else
+ bgp_evpn_es_show_entry(vty, es, json);
+ /* add ES to the json array */
+ if (uj)
+ json_object_array_add(json_array, json);
+ }
+
+ /* print the array of json-ESs */
+ if (uj) {
+ vty_out(vty, "%s\n", json_object_to_json_string_ext(
+ json_array, JSON_C_TO_STRING_PRETTY));
+ json_object_free(json_array);
+ }
+}
+
+/* Display specific ES */
+void bgp_evpn_es_show_esi(struct vty *vty, esi_t *esi, bool uj)
+{
+ struct bgp_evpn_es *es;
+ json_object *json = NULL;
+
+ if (uj)
+ json = json_object_new_object();
+
+ es = bgp_evpn_es_find(esi);
+ if (es) {
+ bgp_evpn_es_show_entry_detail(vty, es, json);
+ } else {
+ if (!uj)
+ vty_out(vty, "ESI not found\n");
+ }
+
+ if (uj) {
+ vty_out(vty, "%s\n", json_object_to_json_string_ext(
+ json, JSON_C_TO_STRING_PRETTY));
+ json_object_free(json);
+ }
+}
+
+/*****************************************************************************/
+/* Ethernet Segment to EVI association -
+ * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI
+ * (bgpevpn->es_evi_rb_tree).
+ * 2. Each local ES-EVI entry is rxed from zebra and then used by BGP to
+ * advertises an EAD-EVI (Type-1 EVPN) route
+ * 3. The remote ES-EVI is created when a bgp_evpn_es_evi_vtep references
+ * it.
+ */
+
+/* A list of remote VTEPs is maintained for each ES-EVI. This list includes -
+ * 1. VTEPs for which we have imported the EAD-per-ES Type1 route
+ * 2. VTEPs for which we have imported the EAD-per-EVI Type1 route
+ * VTEPs for which both routes have been rxed are activated. Activation
+ * creates a NHG in the parent ES.
+ */
+static int bgp_evpn_es_evi_vtep_cmp(void *p1, void *p2)
+{
+ const struct bgp_evpn_es_evi_vtep *evi_vtep1 = p1;
+ const struct bgp_evpn_es_evi_vtep *evi_vtep2 = p2;
+
+ return evi_vtep1->vtep_ip.s_addr - evi_vtep2->vtep_ip.s_addr;
+}
+
+static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_new(
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ evi_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_EVI_VTEP, sizeof(*evi_vtep));
+
+ evi_vtep->es_evi = es_evi;
+ evi_vtep->vtep_ip.s_addr = vtep_ip.s_addr;
+ listnode_init(&evi_vtep->es_evi_listnode, evi_vtep);
+ listnode_add_sort(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode);
+
+ return evi_vtep;
+}
+
+static void bgp_evpn_es_evi_vtep_free(struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ struct bgp_evpn_es_evi *es_evi = evi_vtep->es_evi;
+
+ if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD))
+ /* as long as there is some reference we can't free it */
+ return;
+
+ list_delete_node(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode);
+ XFREE(MTYPE_BGP_EVPN_ES_EVI_VTEP, evi_vtep);
+}
+
+/* check if VTEP is already part of the list */
+static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_find(
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip)
+{
+ struct listnode *node = NULL;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) {
+ if (evi_vtep->vtep_ip.s_addr == vtep_ip.s_addr)
+ return evi_vtep;
+ }
+ return NULL;
+}
+
+/* A VTEP can be added as "active" attach to an ES if EAD-per-ES and
+ * EAD-per-EVI routes are rxed from it.
+ */
+static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp,
+ struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ bool old_active;
+ bool new_active;
+
+ old_active = !!CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+
+ /* Both EAD-per-ES and EAD-per-EVI routes must be rxed from a PE
+ * before it can be activated.
+ */
+ if ((evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD) ==
+ BGP_EVPN_EVI_VTEP_EAD)
+ SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+ else
+ UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+
+ new_active = !!CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE);
+
+ if (old_active == new_active)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("es %s evi %u vtep %s %s",
+ evi_vtep->es_evi->es->esi_str,
+ evi_vtep->es_evi->vpn->vni,
+ inet_ntoa(evi_vtep->vtep_ip),
+ new_active ? "active" : "inactive");
+
+ /* add VTEP to parent es */
+ if (new_active) {
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ es_vtep = bgp_evpn_es_vtep_add(bgp, evi_vtep->es_evi->es,
+ evi_vtep->vtep_ip, false /*esr*/);
+ evi_vtep->es_vtep = es_vtep;
+ } else {
+ if (evi_vtep->es_vtep) {
+ bgp_evpn_es_vtep_do_del(bgp, evi_vtep->es_vtep,
+ false /*esr*/);
+ evi_vtep->es_vtep = NULL;
+ }
+ }
+ /* queue up the parent es for background consistency checks */
+ bgp_evpn_es_cons_checks_pend_add(evi_vtep->es_evi->es);
+}
+
+static void bgp_evpn_es_evi_vtep_add(struct bgp *bgp,
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip,
+ bool ead_es)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip);
+
+ if (!evi_vtep)
+ evi_vtep = bgp_evpn_es_evi_vtep_new(es_evi, vtep_ip);
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add es %s evi %u vtep %s %s",
+ evi_vtep->es_evi->es->esi_str,
+ evi_vtep->es_evi->vpn->vni,
+ inet_ntoa(evi_vtep->vtep_ip),
+ ead_es ? "ead_es" : "ead_evi");
+
+ if (ead_es)
+ SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES);
+ else
+ SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI);
+
+ bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep);
+}
+
+static void bgp_evpn_es_evi_vtep_del(struct bgp *bgp,
+ struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip,
+ bool ead_es)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip);
+ if (!evi_vtep)
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("del es %s evi %u vtep %s %s",
+ evi_vtep->es_evi->es->esi_str,
+ evi_vtep->es_evi->vpn->vni,
+ inet_ntoa(evi_vtep->vtep_ip),
+ ead_es ? "ead_es" : "ead_evi");
+
+ if (ead_es)
+ UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES);
+ else
+ UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI);
+
+ bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep);
+ bgp_evpn_es_evi_vtep_free(evi_vtep);
+}
+
+/* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */
+static int bgp_es_evi_rb_cmp(const struct bgp_evpn_es_evi *es_evi1,
+ const struct bgp_evpn_es_evi *es_evi2)
+{
+ return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES);
+}
+RB_GENERATE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, bgp_es_evi_rb_cmp);
+
+/* find the ES-EVI in the per-L2-VNI RB tree */
+static struct bgp_evpn_es_evi *bgp_evpn_es_evi_find(struct bgp_evpn_es *es,
+ struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi es_evi;
+
+ es_evi.es = es;
+
+ return RB_FIND(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, &es_evi);
+}
+
+/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES
+ * tables.
+ */
+static struct bgp_evpn_es_evi *bgp_evpn_es_evi_new(struct bgp_evpn_es *es,
+ struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi *es_evi;
+
+ es_evi = XCALLOC(MTYPE_BGP_EVPN_ES_EVI, sizeof(*es_evi));
+
+ es_evi->es = es;
+ es_evi->vpn = vpn;
+
+ /* Initialise the VTEP list */
+ es_evi->es_evi_vtep_list = list_new();
+ listset_app_node_mem(es_evi->es_evi_vtep_list);
+ es_evi->es_evi_vtep_list->cmp = bgp_evpn_es_evi_vtep_cmp;
+
+ /* insert into the VNI-ESI rb tree */
+ if (RB_INSERT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi)) {
+ XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi);
+ return NULL;
+ }
+
+ /* add to the ES's VNI list */
+ listnode_init(&es_evi->es_listnode, es_evi);
+ listnode_add(es->es_evi_list, &es_evi->es_listnode);
+
+ return es_evi;
+}
+
+/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free
+ * up the memory.
+ */
+static void bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es *es = es_evi->es;
+ struct bgpevpn *vpn = es_evi->vpn;
+
+ /* cannot free the element as long as there is a local or remote
+ * reference
+ */
+ if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | BGP_EVPNES_EVI_REMOTE))
+ return;
+
+ /* remove from the ES's VNI list */
+ list_delete_node(es->es_evi_list, &es_evi->es_listnode);
+
+ /* remove from the VNI-ESI rb tree */
+ RB_REMOVE(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi);
+
+ /* free the VTEP list */
+ list_delete(&es_evi->es_evi_vtep_list);
+
+ /* remove from the VNI-ESI rb tree */
+ XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi);
+}
+
+/* init local info associated with the ES-EVI */
+static void bgp_evpn_es_evi_local_info_set(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgpevpn *vpn = es_evi->vpn;
+
+ if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ return;
+
+ SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL);
+ listnode_init(&es_evi->l2vni_listnode, es_evi);
+ listnode_add(vpn->local_es_evi_list, &es_evi->l2vni_listnode);
+}
+
+/* clear any local info associated with the ES-EVI */
+static void bgp_evpn_es_evi_local_info_clear(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgpevpn *vpn = es_evi->vpn;
+
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ return;
+
+ UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL);
+ list_delete_node(vpn->local_es_evi_list, &es_evi->l2vni_listnode);
+
+ bgp_evpn_es_evi_free(es_evi);
+}
+
+/* eval remote info associated with the ES */
+static void bgp_evpn_es_evi_remote_info_re_eval(struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es *es = es_evi->es;
+
+ /* if there are remote VTEPs the ES-EVI is classified as "remote" */
+ if (listcount(es_evi->es_evi_vtep_list)) {
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) {
+ SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE);
+ ++es->remote_es_evi_cnt;
+ /* set remote on the parent es */
+ bgp_evpn_es_remote_info_re_eval(es);
+ }
+ } else {
+ if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) {
+ UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE);
+ if (es->remote_es_evi_cnt)
+ --es->remote_es_evi_cnt;
+ bgp_evpn_es_evi_free(es_evi);
+ /* check if "remote" can be cleared from the
+ * parent es.
+ */
+ bgp_evpn_es_remote_info_re_eval(es);
+ }
+ }
+}
+
+static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi)
+{
+ struct prefix_evpn p;
+ struct bgp_evpn_es *es = es_evi->es;
+ struct bgp *bgp;
+
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ return;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("del local es %s evi %u",
+ es_evi->es->esi_str,
+ es_evi->vpn->vni);
+
+ bgp = bgp_get_evpn();
+
+ if (bgp) {
+ /* update EAD-ES with new list of VNIs */
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) {
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
+ &es->esi, es->originator_ip);
+ if (bgp_evpn_type1_route_update(bgp, es, NULL, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: EAD-ES route update failure for ESI %s VNI %u",
+ bgp->vrf_id, es->esi_str,
+ es_evi->vpn->vni);
+ }
+
+ /* withdraw and delete EAD-EVI */
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+ if (bgp_evpn_type1_evi_route_delete(bgp,
+ es, es_evi->vpn, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_DELETE,
+ "%u: EAD-EVI route deletion failure for ESI %s VNI %u",
+ bgp->vrf_id, es->esi_str,
+ es_evi->vpn->vni);
+ }
+ }
+
+ bgp_evpn_es_evi_local_info_clear(es_evi);
+
+}
+
+int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni)
+{
+ struct bgpevpn *vpn;
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ char buf[ESI_STR_LEN];
+
+ es = bgp_evpn_es_find(esi);
+ if (!es) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to deref VNI %d from ESI %s; ES not present",
+ bgp->vrf_id, vni,
+ esi_to_str(esi, buf, sizeof(buf)));
+ return -1;
+ }
+
+ vpn = bgp_evpn_lookup_vni(bgp, vni);
+ if (!vpn) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to deref VNI %d from ESI %s; VNI not present",
+ bgp->vrf_id, vni, es->esi_str);
+ return -1;
+ }
+
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+ if (!es_evi) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to deref VNI %d from ESI %s; ES-VNI not present",
+ bgp->vrf_id, vni, es->esi_str);
+ return -1;
+ }
+
+ bgp_evpn_local_es_evi_do_del(es_evi);
+ return 0;
+}
+
+/* Create ES-EVI and advertise the corresponding EAD routes */
+int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni)
+{
+ struct bgpevpn *vpn;
+ struct prefix_evpn p;
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ char buf[ESI_STR_LEN];
+
+ es = bgp_evpn_es_find(esi);
+ if (!es) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to associate VNI %d with ESI %s; ES not present",
+ bgp->vrf_id, vni,
+ esi_to_str(esi, buf, sizeof(buf)));
+ return -1;
+ }
+
+ vpn = bgp_evpn_lookup_vni(bgp, vni);
+ if (!vpn) {
+ flog_err(
+ EC_BGP_ES_CREATE,
+ "%u: Failed to associate VNI %d with ESI %s; VNI not present",
+ bgp->vrf_id, vni, es->esi_str);
+ return -1;
+ }
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add local es %s evi %u",
+ es->esi_str, vni);
+
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+
+ if (es_evi) {
+ if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL))
+ /* dup */
+ return 0;
+ } else {
+ es_evi = bgp_evpn_es_evi_new(es, vpn);
+ if (!es_evi)
+ return -1;
+ }
+
+ bgp_evpn_es_evi_local_info_set(es_evi);
+
+ /* generate an EAD-EVI for this new VNI */
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG,
+ &es->esi, es->originator_ip);
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) {
+ if (bgp_evpn_type1_route_update(bgp, es, vpn, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: EAD-EVI route creation failure for ESI %s VNI %u",
+ bgp->vrf_id, es->esi_str, vni);
+ }
+
+ /* update EAD-ES */
+ build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
+ &es->esi, es->originator_ip);
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) {
+ if (bgp_evpn_type1_route_update(bgp, es, NULL, &p))
+ flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+ "%u: EAD-ES route creation failure for ESI %s VNI %u",
+ bgp->vrf_id, es->esi_str, vni);
+ }
+
+ return 0;
+}
+
+/* Add remote ES-EVI entry. This is actually the remote VTEP add and the
+ * ES-EVI is implicity created on first VTEP's reference.
+ */
+int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn,
+ const struct prefix_evpn *p)
+{
+ char buf[ESI_STR_LEN];
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ bool ead_es;
+ const esi_t *esi = &p->prefix.ead_addr.esi;
+
+ if (!vpn)
+ /* local EAD-ES need not be sent back to zebra */
+ return 0;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("add remote %s es %s evi %u vtep %s",
+ p->prefix.ead_addr.eth_tag ?
+ "ead-es" : "ead-evi",
+ esi_to_str(esi, buf,
+ sizeof(buf)),
+ vpn->vni,
+ inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4));
+
+ es = bgp_evpn_es_find(esi);
+ if (!es) {
+ es = bgp_evpn_es_new(bgp, esi);
+ if (!es) {
+ flog_err(EC_BGP_ES_CREATE,
+ "%u: Failed to allocate ES entry for ESI %s - at remote ES Add",
+ bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf)));
+ return -1;
+ }
+ }
+
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+ if (!es_evi) {
+ es_evi = bgp_evpn_es_evi_new(es, vpn);
+ if (!es_evi) {
+ bgp_evpn_es_free(es);
+ return -1;
+ }
+ }
+
+ ead_es = !!p->prefix.ead_addr.eth_tag;
+ bgp_evpn_es_evi_vtep_add(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4,
+ ead_es);
+
+ bgp_evpn_es_evi_remote_info_re_eval(es_evi);
+ return 0;
+}
+
+/* A remote VTEP has withdrawn. The es-evi-vtep will be deleted and the
+ * parent es-evi freed up implicitly in last VTEP's deref.
+ */
+int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn,
+ const struct prefix_evpn *p)
+{
+ char buf[ESI_STR_LEN];
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es_evi *es_evi;
+ bool ead_es;
+
+ if (!vpn)
+ /* local EAD-ES need not be sent back to zebra */
+ return 0;
+
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("del remote %s es %s evi %u vtep %s",
+ p->prefix.ead_addr.eth_tag ?
+ "ead-es" : "ead-evi",
+ esi_to_str(&p->prefix.ead_addr.esi, buf,
+ sizeof(buf)),
+ vpn->vni,
+ inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4));
+
+ es = bgp_evpn_es_find(&p->prefix.ead_addr.esi);
+ if (!es)
+ /* XXX - error logs */
+ return 0;
+ es_evi = bgp_evpn_es_evi_find(es, vpn);
+ if (!es_evi)
+ /* XXX - error logs */
+ return 0;
+
+ ead_es = !!p->prefix.ead_addr.eth_tag;
+ bgp_evpn_es_evi_vtep_del(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4,
+ ead_es);
+ bgp_evpn_es_evi_remote_info_re_eval(es_evi);
+ return 0;
+}
+
+/* Initialize the ES tables maintained per-L2_VNI */
+void bgp_evpn_vni_es_init(struct bgpevpn *vpn)
+{
+ /* Initialize the ES-EVI RB tree */
+ RB_INIT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree);
+
+ /* Initialize the local list maintained for quick walks by type */
+ vpn->local_es_evi_list = list_new();
+ listset_app_node_mem(vpn->local_es_evi_list);
+}
+
+/* Cleanup the ES info maintained per-L2_VNI */
+void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn)
+{
+ struct bgp_evpn_es_evi *es_evi;
+ struct bgp_evpn_es_evi *es_evi_next;
+
+ RB_FOREACH_SAFE(es_evi, bgp_es_evi_rb_head,
+ &vpn->es_evi_rb_tree, es_evi_next) {
+ bgp_evpn_local_es_evi_do_del(es_evi);
+ }
+
+ list_delete(&vpn->local_es_evi_list);
+}
+
+static char *bgp_evpn_es_evi_vteps_str(char *vtep_str,
+ struct bgp_evpn_es_evi *es_evi,
+ uint8_t vtep_str_size)
+{
+ char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ];
+ struct listnode *node;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+ bool first = true;
+
+ vtep_str[0] = '\0';
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) {
+ vtep_flag_str[0] = '\0';
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES)
+ strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str));
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI)
+ strlcat(vtep_flag_str, "V", sizeof(vtep_flag_str));
+
+ if (!strnlen(vtep_flag_str, sizeof(vtep_flag_str)))
+ strlcpy(vtep_flag_str, "-", sizeof(vtep_flag_str));
+ if (first)
+ first = false;
+ else
+ strlcat(vtep_str, ",", vtep_str_size);
+ strlcat(vtep_str, inet_ntoa(evi_vtep->vtep_ip), vtep_str_size);
+ strlcat(vtep_str, "(", vtep_str_size);
+ strlcat(vtep_str, vtep_flag_str, vtep_str_size);
+ strlcat(vtep_str, ")", vtep_str_size);
+ }
+
+ return vtep_str;
+}
+
+static void bgp_evpn_es_evi_json_vtep_fill(json_object *json_vteps,
+ struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ json_object *json_vtep_entry;
+ json_object *json_flags;
+
+ json_vtep_entry = json_object_new_object();
+
+ json_object_string_add(json_vtep_entry,
+ "vtep_ip",
+ inet_ntoa(evi_vtep->vtep_ip));
+ if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD_PER_ES |
+ BGP_EVPN_EVI_VTEP_EAD_PER_EVI)) {
+ json_flags = json_object_new_array();
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES)
+ json_array_string_add(json_flags, "ead-per-es");
+ if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI)
+ json_array_string_add(json_flags, "ed-per-evi");
+ json_object_object_add(json_vtep_entry,
+ "flags", json_flags);
+ }
+
+ json_object_array_add(json_vteps,
+ json_vtep_entry);
+}
+
+static void bgp_evpn_es_evi_show_entry(struct vty *vty,
+ struct bgp_evpn_es_evi *es_evi, json_object *json)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ if (json) {
+ json_object *json_vteps;
+ json_object *json_types;
+
+ json_object_string_add(json, "esi", es_evi->es->esi_str);
+ json_object_int_add(json, "vni", es_evi->vpn->vni);
+
+ if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL |
+ BGP_EVPNES_EVI_REMOTE)) {
+ json_types = json_object_new_array();
+ if (es_evi->flags & BGP_EVPNES_EVI_LOCAL)
+ json_array_string_add(json_types, "local");
+ if (es_evi->flags & BGP_EVPNES_EVI_REMOTE)
+ json_array_string_add(json_types, "remote");
+ json_object_object_add(json, "type", json_types);
+ }
+
+ if (listcount(es_evi->es_evi_vtep_list)) {
+ json_vteps = json_object_new_array();
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list,
+ node, evi_vtep)) {
+ bgp_evpn_es_evi_json_vtep_fill(json_vteps,
+ evi_vtep);
+ }
+ json_object_object_add(json, "vteps", json_vteps);
+ }
+ } else {
+ char type_str[4];
+ char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
+
+ type_str[0] = '\0';
+ if (es_evi->flags & BGP_EVPNES_EVI_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es_evi->flags & BGP_EVPNES_EVI_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+ if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST)
+ strlcat(type_str, "I", sizeof(type_str));
+
+ bgp_evpn_es_evi_vteps_str(vtep_str, es_evi, sizeof(vtep_str));
+
+ vty_out(vty, "%-8d %-30s %-5s %s\n",
+ es_evi->vpn->vni, es_evi->es->esi_str,
+ type_str, vtep_str);
+ }
+}
+
+static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty,
+ struct bgp_evpn_es_evi *es_evi, json_object *json)
+{
+ if (json) {
+ json_object *json_flags;
+
+ /* Add the "brief" info first */
+ bgp_evpn_es_evi_show_entry(vty, es_evi, json);
+ if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) {
+ json_flags = json_object_new_array();
+ json_array_string_add(json_flags, "es-vtep-mismatch");
+ json_object_object_add(json, "flags", json_flags);
+ }
+ } else {
+ char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
+ char type_str[4];
+
+ type_str[0] = '\0';
+ if (es_evi->flags & BGP_EVPNES_EVI_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es_evi->flags & BGP_EVPNES_EVI_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+
+ bgp_evpn_es_evi_vteps_str(vtep_str, es_evi, sizeof(vtep_str));
+ if (!strlen(vtep_str))
+ strlcpy(vtep_str, "-", sizeof(type_str));
+
+ vty_out(vty, "VNI: %d ESI: %s\n",
+ es_evi->vpn->vni, es_evi->es->esi_str);
+ vty_out(vty, " Type: %s\n", type_str);
+ vty_out(vty, " Inconsistencies: %s\n",
+ (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) ?
+ "es-vtep-mismatch":"-");
+ vty_out(vty, " VTEPs: %s\n", vtep_str);
+ vty_out(vty, "\n");
+ }
+}
+
+static void bgp_evpn_es_evi_show_one_vni(struct bgpevpn *vpn, struct vty *vty,
+ json_object *json_array, bool detail)
+{
+ struct bgp_evpn_es_evi *es_evi;
+ json_object *json = NULL;
+
+ RB_FOREACH(es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree) {
+ if (json_array)
+ /* create a separate json object for each ES */
+ json = json_object_new_object();
+ if (detail)
+ bgp_evpn_es_evi_show_entry_detail(vty, es_evi, json);
+ else
+ bgp_evpn_es_evi_show_entry(vty, es_evi, json);
+ /* add ES to the json array */
+ if (json_array)
+ json_object_array_add(json_array, json);
+ }
+}
+
+struct es_evi_show_ctx {
+ struct vty *vty;
+ json_object *json;
+ int detail;
+};
+
+static void bgp_evpn_es_evi_show_one_vni_hash_cb(struct hash_bucket *bucket,
+ void *ctxt)
+{
+ struct bgpevpn *vpn = (struct bgpevpn *)bucket->data;
+ struct es_evi_show_ctx *wctx = (struct es_evi_show_ctx *)ctxt;
+
+ bgp_evpn_es_evi_show_one_vni(vpn, wctx->vty, wctx->json, wctx->detail);
+}
+
+/* Display all ES EVIs */
+void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail)
+{
+ json_object *json_array = NULL;
+ struct es_evi_show_ctx wctx;
+ struct bgp *bgp;
+
+ if (uj) {
+ /* create an array of ES-EVIs */
+ json_array = json_object_new_array();
+ }
+
+ wctx.vty = vty;
+ wctx.json = json_array;
+ wctx.detail = detail;
+
+ bgp = bgp_get_evpn();
+
+ if (!json_array && !detail) {
+ vty_out(vty, "Flags: L local, R remote, I inconsistent\n");
+ vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n");
+ vty_out(vty, "%-8s %-30s %-5s %s\n",
+ "VNI", "ESI", "Flags", "VTEPs");
+ }
+
+ if (bgp)
+ hash_iterate(bgp->vnihash,
+ (void (*)(struct hash_bucket *,
+ void *))bgp_evpn_es_evi_show_one_vni_hash_cb,
+ &wctx);
+ if (uj) {
+ vty_out(vty, "%s\n", json_object_to_json_string_ext(
+ json_array, JSON_C_TO_STRING_PRETTY));
+ json_object_free(json_array);
+ }
+}
+
+/* Display specific ES EVI */
+void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni,
+ bool uj, bool detail)
+{
+ struct bgpevpn *vpn = NULL;
+ json_object *json_array = NULL;
+ struct bgp *bgp;
+
+ if (uj) {
+ /* create an array of ES-EVIs */
+ json_array = json_object_new_array();
+ }
+
+ bgp = bgp_get_evpn();
+ if (bgp)
+ vpn = bgp_evpn_lookup_vni(bgp, vni);
+
+ if (vpn) {
+ if (!json_array && !detail) {
+ vty_out(vty, "Flags: L local, R remote, I inconsistent\n");
+ vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n");
+ vty_out(vty, "%-8s %-30s %-5s %s\n",
+ "VNI", "ESI", "Flags", "VTEPs");
+ }
+
+ bgp_evpn_es_evi_show_one_vni(vpn, vty, json_array, detail);
+ } else {
+ if (!uj)
+ vty_out(vty, "VNI not found\n");
+ }
+
+ if (uj) {
+ vty_out(vty, "%s\n", json_object_to_json_string_ext(
+ json_array, JSON_C_TO_STRING_PRETTY));
+ json_object_free(json_array);
+ }
+}
+
+/*****************************************************************************
+ * Ethernet Segment Consistency checks
+ * Consistency checking is done to detect misconfig or mis-cabling. When
+ * an inconsistency is detected it is simply logged (and displayed via
+ * show commands) at this point. A more drastic action can be executed (based
+ * on user config) in the future.
+ */
+/* queue up the es for background consistency checks */
+static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es)
+{
+ if (!bgp_mh_info->consistency_checking)
+ /* consistency checking is not enabled */
+ return;
+
+ if (CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND))
+ /* already queued for consistency checking */
+ return;
+
+ SET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND);
+ listnode_init(&es->pend_es_listnode, es);
+ listnode_add_after(bgp_mh_info->pend_es_list,
+ listtail_unchecked(bgp_mh_info->pend_es_list),
+ &es->pend_es_listnode);
+}
+
+/* pull the ES from the consistency check list */
+static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es)
+{
+ if (!CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND))
+ return;
+
+ UNSET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND);
+ list_delete_node(bgp_mh_info->pend_es_list,
+ &es->pend_es_listnode);
+}
+
+/* Number of active VTEPs associated with the ES-per-EVI */
+static uint32_t bgp_evpn_es_evi_get_active_vtep_cnt(
+ struct bgp_evpn_es_evi *es_evi)
+{
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+ struct listnode *node;
+ uint32_t vtep_cnt = 0;
+
+ for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) {
+ if (CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE))
+ ++vtep_cnt;
+ }
+
+ return vtep_cnt;
+}
+
+/* Number of active VTEPs associated with the ES */
+static uint32_t bgp_evpn_es_get_active_vtep_cnt(struct bgp_evpn_es *es)
+{
+ struct listnode *node;
+ uint32_t vtep_cnt = 0;
+ struct bgp_evpn_es_vtep *es_vtep;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE))
+ ++vtep_cnt;
+ }
+
+ return vtep_cnt;
+}
+
+static struct bgp_evpn_es_vtep *bgp_evpn_es_get_next_active_vtep(
+ struct bgp_evpn_es *es, struct bgp_evpn_es_vtep *es_vtep)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_vtep *next_es_vtep;
+
+ if (es_vtep)
+ node = listnextnode_unchecked(&es_vtep->es_listnode);
+ else
+ node = listhead(es->es_vtep_list);
+
+ for (; node; node = listnextnode_unchecked(node)) {
+ next_es_vtep = listgetdata(node);
+ if (CHECK_FLAG(next_es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE))
+ return next_es_vtep;
+ }
+
+ return NULL;
+}
+
+static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_get_next_active_vtep(
+ struct bgp_evpn_es_evi *es_evi,
+ struct bgp_evpn_es_evi_vtep *evi_vtep)
+{
+ struct listnode *node;
+ struct bgp_evpn_es_evi_vtep *next_evi_vtep;
+
+ if (evi_vtep)
+ node = listnextnode_unchecked(&evi_vtep->es_evi_listnode);
+ else
+ node = listhead(es_evi->es_evi_vtep_list);
+
+ for (; node; node = listnextnode_unchecked(node)) {
+ next_evi_vtep = listgetdata(node);
+ if (CHECK_FLAG(next_evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE))
+ return next_evi_vtep;
+ }
+
+ return NULL;
+}
+
+static void bgp_evpn_es_evi_set_inconsistent(struct bgp_evpn_es_evi *es_evi)
+{
+ if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST)) {
+ if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("inconsistency detected - es %s evi %u vtep list mismatch",
+ es_evi->es->esi_str,
+ es_evi->vpn->vni);
+ SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST);
+
+ /* update parent ES with the incosistency setting */
+ if (!es_evi->es->incons_evi_vtep_cnt &&
+ BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+ zlog_debug("inconsistency detected - es %s vtep list mismatch",
+ es_evi->es->esi_str);
+ ++es_evi->es->incons_evi_vtep_cnt;
+ SET_FLAG(es_evi->es->inconsistencies,
+ BGP_EVPNES_INCONS_VTEP_LIST);
+ }
+}
+
+static uint32_t bgp_evpn_es_run_consistency_checks(struct bgp_evpn_es *es)
+{
+ int proc_cnt = 0;
+ int es_active_vtep_cnt;
+ int evi_active_vtep_cnt;
+ struct bgp_evpn_es_evi *es_evi;
+ struct listnode *evi_node;
+ struct bgp_evpn_es_vtep *es_vtep;
+ struct bgp_evpn_es_evi_vtep *evi_vtep;
+
+ /* reset the inconsistencies and re-evaluate */
+ es->incons_evi_vtep_cnt = 0;
+ es->inconsistencies = 0;
+
+ es_active_vtep_cnt = bgp_evpn_es_get_active_vtep_cnt(es);
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list,
+ evi_node, es_evi)) {
+ ++proc_cnt;
+
+ /* reset the inconsistencies on the EVI and re-evaluate*/
+ UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST);
+
+ evi_active_vtep_cnt =
+ bgp_evpn_es_evi_get_active_vtep_cnt(es_evi);
+ if (es_active_vtep_cnt != evi_active_vtep_cnt) {
+ bgp_evpn_es_evi_set_inconsistent(es_evi);
+ continue;
+ }
+
+ if (!es_active_vtep_cnt)
+ continue;
+
+ es_vtep = NULL;
+ evi_vtep = NULL;
+ while ((es_vtep = bgp_evpn_es_get_next_active_vtep(
+ es, es_vtep))) {
+ evi_vtep = bgp_evpn_es_evi_get_next_active_vtep(es_evi,
+ evi_vtep);
+ if (!evi_vtep) {
+ bgp_evpn_es_evi_set_inconsistent(es_evi);
+ break;
+ }
+ if (es_vtep->vtep_ip.s_addr !=
+ evi_vtep->vtep_ip.s_addr) {
+ /* inconsistency detected; set it and move
+ * to the next evi
+ */
+ bgp_evpn_es_evi_set_inconsistent(es_evi);
+ break;
+ }
+ }
+ }
+
+ return proc_cnt;
+}
+
+static int bgp_evpn_run_consistency_checks(struct thread *t)
+{
+ int proc_cnt = 0;
+ int es_cnt = 0;
+ struct listnode *node;
+ struct listnode *nextnode;
+ struct bgp_evpn_es *es;
+
+ for (ALL_LIST_ELEMENTS(bgp_mh_info->pend_es_list,
+ node, nextnode, es)) {
+ ++es_cnt;
+ ++proc_cnt;
+ /* run consistency checks on the ES and remove it from the
+ * pending list
+ */
+ proc_cnt += bgp_evpn_es_run_consistency_checks(es);
+ bgp_evpn_es_cons_checks_pend_del(es);
+ if (proc_cnt > 500)
+ break;
+ }
+
+ /* restart the timer */
+ thread_add_timer(bm->master, bgp_evpn_run_consistency_checks, NULL,
+ BGP_EVPN_CONS_CHECK_INTERVAL,
+ &bgp_mh_info->t_cons_check);
+
+ return 0;
+}
+
+/*****************************************************************************/
+void bgp_evpn_mh_init(void)
+{
+ bm->mh_info = XCALLOC(MTYPE_BGP_EVPN_MH_INFO, sizeof(*bm->mh_info));
+
+ /* setup ES tables */
+ RB_INIT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree);
+ /* local ES list */
+ bgp_mh_info->local_es_list = list_new();
+ listset_app_node_mem(bgp_mh_info->local_es_list);
+ /* list of ESs with pending processing */
+ bgp_mh_info->pend_es_list = list_new();
+ listset_app_node_mem(bgp_mh_info->pend_es_list);
+
+ /* config knobs - XXX add cli to control it */
+ bgp_mh_info->ead_evi_adv_for_down_links = true;
+ bgp_mh_info->consistency_checking = true;
+
+ if (bgp_mh_info->consistency_checking)
+ thread_add_timer(bm->master, bgp_evpn_run_consistency_checks,
+ NULL, BGP_EVPN_CONS_CHECK_INTERVAL,
+ &bgp_mh_info->t_cons_check);
+
+ memset(&zero_esi_buf, 0, sizeof(esi_t));
+}
+
+void bgp_evpn_mh_finish(void)
+{
+ struct bgp_evpn_es *es;
+ struct bgp_evpn_es *es_next;
+ struct bgp *bgp;
+
+ bgp = bgp_get_evpn();
+ if (bgp) {
+ RB_FOREACH_SAFE(es, bgp_es_rb_head,
+ &bgp_mh_info->es_rb_tree, es_next) {
+ /* XXX - need to force free remote ESs here */
+ bgp_evpn_local_es_do_del(bgp, es);
+ }
+ }
+ thread_cancel(bgp_mh_info->t_cons_check);
+ list_delete(&bgp_mh_info->local_es_list);
+ list_delete(&bgp_mh_info->pend_es_list);
+
+ XFREE(MTYPE_BGP_EVPN_MH_INFO, bgp_mh_info);
+}
diff --git a/bgpd/bgp_evpn_mh.h b/bgpd/bgp_evpn_mh.h
new file mode 100644
index 0000000000..93355d495a
--- /dev/null
+++ b/bgpd/bgp_evpn_mh.h
@@ -0,0 +1,299 @@
+/* EVPN header for multihoming procedures
+ *
+ * Copyright (C) 2019 Cumulus Networks
+ * Anuradha Karuppiah
+ *
+ * This file is part of FRRouting.
+ *
+ * FRRouting is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRRouting is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef _FRR_BGP_EVPN_MH_H
+#define _FRR_BGP_EVPN_MH_H
+
+#include "vxlan.h"
+#include "bgpd.h"
+#include "bgp_evpn.h"
+#include "bgp_evpn_private.h"
+
+#define BGP_EVPN_AD_ES_ETH_TAG 0xffffffff
+#define BGP_EVPN_AD_EVI_ETH_TAG 0
+
+#define BGP_EVPNES_INCONS_STR_SZ 80
+#define BGP_EVPN_FLAG_STR_SZ 5
+#define BGP_EVPN_VTEPS_FLAG_STR_SZ (BGP_EVPN_FLAG_STR_SZ * ES_VTEP_MAX_CNT)
+
+#define BGP_EVPN_CONS_CHECK_INTERVAL 60
+
+
+/* Ethernet Segment entry -
+ * - Local and remote ESs are maintained in a global RB tree,
+ * bgp_mh_info->es_rb_tree using ESI as key
+ * - Local ESs are received from zebra (BGP_EVPNES_LOCAL)
+ * - Remotes ESs are implicitly created (by reference) by a remote ES-EVI
+ * (BGP_EVPNES_REMOTE)
+ * - An ES can be simulatenously LOCAL and REMOTE; infact all LOCAL ESs are
+ * expected to have REMOTE ES peers.
+ */
+struct bgp_evpn_es {
+ /* Ethernet Segment Identifier */
+ esi_t esi;
+ char esi_str[ESI_STR_LEN];
+
+ /* es flags */
+ uint32_t flags;
+ /* created via zebra config */
+#define BGP_EVPNES_LOCAL (1 << 0)
+ /* created implicitly by a remote ES-EVI reference */
+#define BGP_EVPNES_REMOTE (1 << 1)
+ /* local ES link is oper-up */
+#define BGP_EVPNES_OPER_UP (1 << 2)
+ /* enable generation of EAD-EVI routes */
+#define BGP_EVPNES_ADV_EVI (1 << 3)
+ /* consistency checks pending */
+#define BGP_EVPNES_CONS_CHECK_PEND (1 << 4)
+
+ /* memory used for adding the es to bgp->es_rb_tree */
+ RB_ENTRY(bgp_evpn_es) rb_node;
+
+ /* [EVPNES_LOCAL] memory used for linking the es to
+ * bgp_mh_info->local_es_list
+ */
+ struct listnode es_listnode;
+
+ /* memory used for linking the es to "processing" pending list
+ * bgp_mh_info->pend_es_list
+ */
+ struct listnode pend_es_listnode;
+
+ /* [EVPNES_LOCAL] Id for deriving the RD automatically for this ESI */
+ uint16_t rd_id;
+
+ /* [EVPNES_LOCAL] RD for this ES */
+ struct prefix_rd prd;
+
+ /* [EVPNES_LOCAL] originator ip address */
+ struct in_addr originator_ip;
+
+ /* [EVPNES_LOCAL] Route table for EVPN routes for this ESI-
+ * - Type-4 local and remote routes
+ * - Type-1 local routes
+ */
+ struct bgp_table *route_table;
+
+ /* list of PEs (bgp_evpn_es_vtep) attached to the ES */
+ struct list *es_vtep_list;
+
+ /* List of ES-EVIs associated with this ES */
+ struct list *es_evi_list;
+
+ /* Number of remote VNIs referencing this ES */
+ uint32_t remote_es_evi_cnt;
+
+ uint32_t inconsistencies;
+ /* there are one or more EVIs whose VTEP list doesn't match
+ * with the ES's VTEP list
+ */
+#define BGP_EVPNES_INCONS_VTEP_LIST (1 << 0)
+
+ /* number of es-evi entries whose VTEP list doesn't match
+ * with the ES's
+ */
+ uint32_t incons_evi_vtep_cnt;
+
+ QOBJ_FIELDS
+};
+DECLARE_QOBJ_TYPE(bgp_evpn_es)
+RB_HEAD(bgp_es_rb_head, bgp_evpn_es);
+RB_PROTOTYPE(bgp_es_rb_head, bgp_evpn_es, rb_node, bgp_es_rb_cmp);
+
+/* PE attached to an ES */
+struct bgp_evpn_es_vtep {
+ struct bgp_evpn_es *es; /* parent ES */
+ struct in_addr vtep_ip;
+
+ uint32_t flags;
+ /* Rxed a Type4 route from this PE */
+#define BGP_EVPNES_VTEP_ESR (1 << 0)
+ /* Active (rxed EAD-ES and EAD-EVI) and can be included as
+ * a nexthop
+ */
+#define BGP_EVPNES_VTEP_ACTIVE (1 << 1)
+
+ uint32_t evi_cnt; /* es_evis referencing this vtep as an active path */
+
+ /* memory used for adding the entry to es->es_vtep_list */
+ struct listnode es_listnode;
+};
+
+/* ES per-EVI info
+ * - ES-EVIs are maintained per-L2-VNI (vpn->es_evi_rb_tree)
+ * - ES-EVIs are also linked to the parent ES (es->es_evi_list)
+ * - Local ES-EVIs are created by zebra (via config). They are linked to a
+ * per-VNI list (vpn->local_es_evi_list) for quick access
+ * - Remote ES-EVIs are created implicitly when a bgp_evpn_es_evi_vtep
+ * references it.
+ */
+struct bgp_evpn_es_evi {
+ struct bgp_evpn_es *es;
+ struct bgpevpn *vpn;
+
+ /* ES-EVI flags */
+ uint32_t flags;
+/* local ES-EVI, created by zebra */
+#define BGP_EVPNES_EVI_LOCAL (1 << 0)
+/* created via a remote VTEP imported by BGP */
+#define BGP_EVPNES_EVI_REMOTE (1 << 1)
+#define BGP_EVPNES_EVI_INCONS_VTEP_LIST (1 << 2)
+
+ /* memory used for adding the es_evi to es_evi->vpn->es_evi_rb_tree */
+ RB_ENTRY(bgp_evpn_es_evi) rb_node;
+ /* memory used for linking the es_evi to
+ * es_evi->vpn->local_es_evi_list
+ */
+ struct listnode l2vni_listnode;
+ /* memory used for linking the es_evi to
+ * es_evi->es->es_evi_list
+ */
+ struct listnode es_listnode;
+
+ /* list of PEs (bgp_evpn_es_evi_vtep) attached to the ES for this VNI */
+ struct list *es_evi_vtep_list;
+};
+
+/* PE attached to an ES for a VNI. This entry is created when an EAD-per-ES
+ * or EAD-per-EVI Type1 route is imported into the VNI.
+ */
+struct bgp_evpn_es_evi_vtep {
+ struct bgp_evpn_es_evi *es_evi; /* parent ES-EVI */
+ struct in_addr vtep_ip;
+
+ uint32_t flags;
+ /* Rxed an EAD-per-ES route from the PE */
+#define BGP_EVPN_EVI_VTEP_EAD_PER_ES (1 << 0) /* rxed EAD-per-ES */
+ /* Rxed an EAD-per-EVI route from the PE */
+#define BGP_EVPN_EVI_VTEP_EAD_PER_EVI (1 << 1) /* rxed EAD-per-EVI */
+ /* VTEP is active i.e. will result in the creation of an es-vtep */
+#define BGP_EVPN_EVI_VTEP_ACTIVE (1 << 2)
+#define BGP_EVPN_EVI_VTEP_EAD (BGP_EVPN_EVI_VTEP_EAD_PER_ES |\
+ BGP_EVPN_EVI_VTEP_EAD_PER_EVI)
+
+ /* memory used for adding the entry to es_evi->es_evi_vtep_list */
+ struct listnode es_evi_listnode;
+ struct bgp_evpn_es_vtep *es_vtep;
+};
+
+/* multihoming information stored in bgp_master */
+#define bgp_mh_info (bm->mh_info)
+struct bgp_evpn_mh_info {
+ /* RB tree of Ethernet segments (used for EVPN-MH) */
+ struct bgp_es_rb_head es_rb_tree;
+ /* List of local ESs */
+ struct list *local_es_list;
+ /* List of ESs with pending/periodic processing */
+ struct list *pend_es_list;
+ /* periodic timer for running background consistency checks */
+ struct thread *t_cons_check;
+
+ /* config knobs for optimizing or interop */
+ /* Generate EAD-EVI routes even if the ES is oper-down. This can be
+ * enabled as an optimization to avoid a storm of updates when an ES
+ * link flaps.
+ */
+ bool ead_evi_adv_for_down_links;
+ /* Enable ES consistency checking */
+ bool consistency_checking;
+};
+
+/****************************************************************************/
+static inline int bgp_evpn_is_es_local(struct bgp_evpn_es *es)
+{
+ return CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL) ? 1 : 0;
+}
+
+extern esi_t *zero_esi;
+static inline bool bgp_evpn_is_esi_valid(esi_t *esi)
+{
+ return !!memcmp(esi, zero_esi, sizeof(esi_t));
+}
+
+static inline esi_t *bgp_evpn_attr_get_esi(struct attr *attr)
+{
+ return attr ? &attr->esi : zero_esi;
+}
+
+static inline bool bgp_evpn_attr_is_sync(struct attr *attr)
+{
+ return attr ? !!(attr->es_flags &
+ (ATTR_ES_PEER_PROXY | ATTR_ES_PEER_ACTIVE)) : false;
+}
+
+static inline uint32_t bgp_evpn_attr_get_sync_seq(struct attr *attr)
+{
+ return attr ? attr->mm_sync_seqnum : 0;
+}
+
+static inline bool bgp_evpn_attr_is_active_on_peer(struct attr *attr)
+{
+ return attr ?
+ !!(attr->es_flags & ATTR_ES_PEER_ACTIVE) : false;
+}
+
+static inline bool bgp_evpn_attr_is_router_on_peer(struct attr *attr)
+{
+ return attr ?
+ !!(attr->es_flags & ATTR_ES_PEER_ROUTER) : false;
+}
+
+static inline bool bgp_evpn_attr_is_proxy(struct attr *attr)
+{
+ return attr ? !!(attr->es_flags & ATTR_ES_PROXY_ADVERT) : false;
+}
+
+static inline bool bgp_evpn_attr_is_local_es(struct attr *attr)
+{
+ return attr ? !!(attr->es_flags & ATTR_ES_IS_LOCAL) : false;
+}
+
+/****************************************************************************/
+extern int bgp_evpn_es_route_install_uninstall(struct bgp *bgp,
+ struct bgp_evpn_es *es, afi_t afi, safi_t safi,
+ struct prefix_evpn *evp, struct bgp_path_info *pi,
+ int install);
+int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi,
+ struct attr *attr, uint8_t *pfx, int psize,
+ uint32_t addpath_id);
+int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi,
+ struct attr *attr, uint8_t *pfx, int psize,
+ uint32_t addpath_id);
+extern int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
+ struct in_addr originator_ip, bool oper_up);
+extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi);
+extern int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni);
+extern int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni);
+extern int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn,
+ const struct prefix_evpn *p);
+extern int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn,
+ const struct prefix_evpn *p);
+extern void bgp_evpn_mh_init(void);
+extern void bgp_evpn_mh_finish(void);
+void bgp_evpn_vni_es_init(struct bgpevpn *vpn);
+void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn);
+void bgp_evpn_es_show_esi(struct vty *vty, esi_t *esi, bool uj);
+void bgp_evpn_es_show(struct vty *vty, bool uj, bool detail);
+void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni,
+ bool uj, bool detail);
+void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail);
+struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi);
+extern bool bgp_evpn_is_esi_local(esi_t *esi);
+
+#endif /* _FRR_BGP_EVPN_MH_H */
diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h
index c7ccf69f05..ca45b198a7 100644
--- a/bgpd/bgp_evpn_private.h
+++ b/bgpd/bgp_evpn_private.h
@@ -34,15 +34,23 @@
* in bits */
#define EVPN_ROUTE_PREFIXLEN (sizeof(struct evpn_addr) * 8)
-/* EVPN route types. */
-typedef enum {
- BGP_EVPN_AD_ROUTE = 1, /* Ethernet Auto-Discovery (A-D) route */
- BGP_EVPN_MAC_IP_ROUTE, /* MAC/IP Advertisement route */
- BGP_EVPN_IMET_ROUTE, /* Inclusive Multicast Ethernet Tag route */
- BGP_EVPN_ES_ROUTE, /* Ethernet Segment route */
- BGP_EVPN_IP_PREFIX_ROUTE, /* IP Prefix route */
-} bgp_evpn_route_type;
+/* EVPN route RD buffer length */
+#define BGP_EVPN_PREFIX_RD_LEN 100
+/* packet sizes for EVPN routes */
+/* Type-1 route should be 25 bytes
+ * RD (8), ESI (10), eth-tag (4), vni (3)
+ */
+#define BGP_EVPN_TYPE1_PSIZE 25
+/* Type-4 route should be either 23 or 35 bytes
+ * RD (8), ESI (10), ip-len (1), ip (4 or 16)
+ */
+#define BGP_EVPN_TYPE4_V4_PSIZE 23
+#define BGP_EVPN_TYPE4_V6_PSIZE 34
+
+RB_HEAD(bgp_es_evi_rb_head, bgp_evpn_es_evi);
+RB_PROTOTYPE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node,
+ bgp_es_evi_rb_cmp);
/*
* Hash table of EVIs. Right now, the only type of EVI supported is with
* VxLAN encapsulation, hence each EVI corresponds to a L2 VNI.
@@ -98,46 +106,16 @@ struct bgpevpn {
* this VNI. */
struct bgp_table *route_table;
- QOBJ_FIELDS
-};
-
-DECLARE_QOBJ_TYPE(bgpevpn)
+ /* RB tree of ES-EVIs */
+ struct bgp_es_evi_rb_head es_evi_rb_tree;
-struct evpnes {
-
- /* Ethernet Segment Identifier */
- esi_t esi;
-
- /* es flags */
- uint16_t flags;
-#define EVPNES_LOCAL 0x01
-#define EVPNES_REMOTE 0x02
-
- /*
- * Id for deriving the RD
- * automatically for this ESI
- */
- uint16_t rd_id;
-
- /* RD for this VNI. */
- struct prefix_rd prd;
-
- /* originator ip address */
- struct ipaddr originator_ip;
-
- /* list of VTEPs in the same site */
- struct list *vtep_list;
-
- /*
- * Route table for EVPN routes for
- * this ESI. - type4 routes
- */
- struct bgp_table *route_table;
+ /* List of local ESs */
+ struct list *local_es_evi_list;
QOBJ_FIELDS
};
-DECLARE_QOBJ_TYPE(evpnes)
+DECLARE_QOBJ_TYPE(bgpevpn)
/* Mapping of Import RT to VNIs.
* The Import RTs of all VNIs are maintained in a hash table with each
@@ -330,6 +308,16 @@ static inline void encode_es_rt_extcomm(struct ecommunity_val *eval,
memcpy(&eval->val[2], mac, ETH_ALEN);
}
+static inline void encode_esi_label_extcomm(struct ecommunity_val *eval,
+ bool single_active)
+{
+ memset(eval, 0, sizeof(struct ecommunity_val));
+ eval->val[0] = ECOMMUNITY_ENCODE_EVPN;
+ eval->val[1] = ECOMMUNITY_EVPN_SUBTYPE_ESI_LABEL;
+ if (single_active)
+ eval->val[2] |= (1 << 0);
+}
+
static inline void encode_rmac_extcomm(struct ecommunity_val *eval,
struct ethaddr *rmac)
{
@@ -361,13 +349,15 @@ static inline void encode_mac_mobility_extcomm(int static_mac, uint32_t seq,
}
static inline void encode_na_flag_extcomm(struct ecommunity_val *eval,
- uint8_t na_flag)
+ uint8_t na_flag, bool proxy)
{
memset(eval, 0, sizeof(*eval));
eval->val[0] = ECOMMUNITY_ENCODE_EVPN;
eval->val[1] = ECOMMUNITY_EVPN_SUBTYPE_ND;
if (na_flag)
eval->val[2] |= ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG;
+ if (proxy)
+ eval->val[2] |= ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG;
}
static inline void ip_prefix_from_type5_prefix(const struct prefix_evpn *evp,
@@ -487,6 +477,44 @@ static inline void build_evpn_type4_prefix(struct prefix_evpn *p,
memcpy(&p->prefix.es_addr.esi, esi, sizeof(esi_t));
}
+static inline void build_evpn_type1_prefix(struct prefix_evpn *p,
+ uint32_t eth_tag,
+ esi_t *esi,
+ struct in_addr originator_ip)
+{
+ memset(p, 0, sizeof(struct prefix_evpn));
+ p->family = AF_EVPN;
+ p->prefixlen = EVPN_ROUTE_PREFIXLEN;
+ p->prefix.route_type = BGP_EVPN_AD_ROUTE;
+ p->prefix.ead_addr.eth_tag = eth_tag;
+ p->prefix.ead_addr.ip.ipa_type = IPADDR_V4;
+ p->prefix.ead_addr.ip.ipaddr_v4 = originator_ip;
+ memcpy(&p->prefix.ead_addr.esi, esi, sizeof(esi_t));
+}
+
+static inline void evpn_type1_prefix_global_copy(struct prefix_evpn *global_p,
+ const struct prefix_evpn *vni_p)
+{
+ memcpy(global_p, vni_p, sizeof(*global_p));
+ global_p->prefix.ead_addr.ip.ipa_type = 0;
+ global_p->prefix.ead_addr.ip.ipaddr_v4.s_addr = 0;
+}
+
+/* EAD prefix in the global table doesn't include the VTEP-IP so
+ * we need to create a different copy for the VNI
+ */
+static inline struct prefix_evpn *evpn_type1_prefix_vni_copy(
+ struct prefix_evpn *vni_p,
+ const struct prefix_evpn *global_p,
+ struct in_addr originator_ip)
+{
+ memcpy(vni_p, global_p, sizeof(*vni_p));
+ vni_p->prefix.ead_addr.ip.ipa_type = IPADDR_V4;
+ vni_p->prefix.ead_addr.ip.ipaddr_v4 = originator_ip;
+
+ return vni_p;
+}
+
static inline int evpn_default_originate_set(struct bgp *bgp, afi_t afi,
safi_t safi)
{
@@ -511,11 +539,6 @@ static inline void es_get_system_mac(esi_t *esi,
memcpy(mac, &esi->val[1], ETH_ALEN);
}
-static inline int is_es_local(struct evpnes *es)
-{
- return CHECK_FLAG(es->flags, EVPNES_LOCAL) ? 1 : 0;
-}
-
static inline bool bgp_evpn_is_svi_macip_enabled(struct bgpevpn *vpn)
{
struct bgp *bgp_evpn = NULL;
@@ -526,6 +549,16 @@ static inline bool bgp_evpn_is_svi_macip_enabled(struct bgpevpn *vpn)
vpn->advertise_svi_macip);
}
+static inline bool bgp_evpn_is_path_local(struct bgp *bgp,
+ struct bgp_path_info *pi)
+{
+ return (pi->peer == bgp->peer_self
+ && pi->type == ZEBRA_ROUTE_BGP
+ && pi->sub_type == BGP_ROUTE_STATIC);
+}
+
+extern struct zclient *zclient;
+
extern void bgp_evpn_install_uninstall_default_route(struct bgp *bgp_vrf,
afi_t afi, safi_t safi,
bool add);
@@ -563,10 +596,18 @@ extern struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni,
vrf_id_t tenant_vrf_id,
struct in_addr mcast_grp);
extern void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn);
-extern struct evpnes *bgp_evpn_lookup_es(struct bgp *bgp, esi_t *esi);
-extern struct evpnes *bgp_evpn_es_new(struct bgp *bgp, esi_t *esi,
- struct ipaddr *originator_ip);
-extern void bgp_evpn_es_free(struct bgp *bgp, struct evpnes *es);
extern bool bgp_evpn_lookup_l3vni_l2vni_table(vni_t vni);
extern int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn);
+extern void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi,
+ struct bgp_dest *dest,
+ struct bgp_path_info **pi);
+int vni_list_cmp(void *p1, void *p2);
+extern int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
+ struct bgp_node *rn);
+extern struct bgp_node *bgp_global_evpn_node_get(
+ struct bgp_table *table, afi_t afi, safi_t safi,
+ const struct prefix_evpn *evp, struct prefix_rd *prd);
+extern struct bgp_node *bgp_global_evpn_node_lookup(
+ struct bgp_table *table, afi_t afi, safi_t safi,
+ const struct prefix_evpn *evp, struct prefix_rd *prd);
#endif /* _BGP_EVPN_PRIVATE_H */
diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c
index 2584939378..3a198b20f6 100644
--- a/bgpd/bgp_evpn_vty.c
+++ b/bgpd/bgp_evpn_vty.c
@@ -33,6 +33,7 @@
#include "bgpd/bgp_evpn_vty.h"
#include "bgpd/bgp_evpn.h"
#include "bgpd/bgp_evpn_private.h"
+#include "bgpd/bgp_evpn_mh.h"
#include "bgpd/bgp_zebra.h"
#include "bgpd/bgp_vty.h"
#include "bgpd/bgp_errors.h"
@@ -348,6 +349,8 @@ static void bgp_evpn_show_route_header(struct vty *vty, struct bgp *bgp,
"Status codes: s suppressed, d damped, h history, * valid, > best, i - internal\n");
vty_out(vty, "Origin codes: i - IGP, e - EGP, ? - incomplete\n");
vty_out(vty,
+ "EVPN type-1 prefix: [1]:[ESI]:[EthTag]:[IPlen]:[VTEP-IP]\n");
+ vty_out(vty,
"EVPN type-2 prefix: [2]:[EthTag]:[MAClen]:[MAC]:[IPlen]:[IP]\n");
vty_out(vty, "EVPN type-3 prefix: [3]:[EthTag]:[IPlen]:[OrigIP]\n");
vty_out(vty, "EVPN type-4 prefix: [4]:[ESI]:[IPlen]:[OrigIP]\n");
@@ -461,47 +464,6 @@ static void display_l3vni(struct vty *vty, struct bgp *bgp_vrf,
json_object_object_add(json, "exportRts", json_export_rtl);
}
-static void display_es(struct vty *vty, struct evpnes *es, json_object *json)
-{
- struct in_addr *vtep;
- char buf[ESI_STR_LEN];
- char buf1[RD_ADDRSTRLEN];
- char buf2[INET6_ADDRSTRLEN];
- struct listnode *node = NULL;
- json_object *json_vteps = NULL;
-
- if (json) {
- json_vteps = json_object_new_array();
- json_object_string_add(json, "esi",
- esi_to_str(&es->esi, buf, sizeof(buf)));
- json_object_string_add(json, "rd",
- prefix_rd2str(&es->prd, buf1,
- sizeof(buf1)));
- json_object_string_add(
- json, "originatorIp",
- ipaddr2str(&es->originator_ip, buf2, sizeof(buf2)));
- if (es->vtep_list) {
- for (ALL_LIST_ELEMENTS_RO(es->vtep_list, node, vtep))
- json_object_array_add(
- json_vteps, json_object_new_string(
- inet_ntoa(*vtep)));
- }
- json_object_object_add(json, "vteps", json_vteps);
- } else {
- vty_out(vty, "ESI: %s\n",
- esi_to_str(&es->esi, buf, sizeof(buf)));
- vty_out(vty, " RD: %s\n", prefix_rd2str(&es->prd, buf1,
- sizeof(buf1)));
- vty_out(vty, " Originator-IP: %s\n",
- ipaddr2str(&es->originator_ip, buf2, sizeof(buf2)));
- if (es->vtep_list) {
- vty_out(vty, " VTEP List:\n");
- for (ALL_LIST_ELEMENTS_RO(es->vtep_list, node, vtep))
- vty_out(vty, " %s\n", inet_ntoa(*vtep));
- }
- }
-}
-
static void display_vni(struct vty *vty, struct bgpevpn *vpn, json_object *json)
{
char buf1[RD_ADDRSTRLEN];
@@ -628,7 +590,7 @@ static void display_vni(struct vty *vty, struct bgpevpn *vpn, json_object *json)
}
static void show_esi_routes(struct bgp *bgp,
- struct evpnes *es,
+ struct bgp_evpn_es *es,
struct vty *vty,
json_object *json)
{
@@ -979,48 +941,6 @@ static void show_l3vni_entry(struct vty *vty, struct bgp *bgp,
}
}
-static void show_es_entry(struct hash_bucket *bucket, void *args[])
-{
- char buf[ESI_STR_LEN];
- char buf1[RD_ADDRSTRLEN];
- char buf2[INET6_ADDRSTRLEN];
- struct in_addr *vtep = NULL;
- struct vty *vty = args[0];
- json_object *json = args[1];
- json_object *json_vteps = NULL;
- struct listnode *node = NULL;
- struct evpnes *es = (struct evpnes *)bucket->data;
-
- if (json) {
- json_vteps = json_object_new_array();
- json_object_string_add(json, "esi",
- esi_to_str(&es->esi, buf, sizeof(buf)));
- json_object_string_add(json, "type",
- is_es_local(es) ? "Local" : "Remote");
- json_object_string_add(json, "rd",
- prefix_rd2str(&es->prd, buf1,
- sizeof(buf1)));
- json_object_string_add(
- json, "originatorIp",
- ipaddr2str(&es->originator_ip, buf2, sizeof(buf2)));
- if (es->vtep_list) {
- for (ALL_LIST_ELEMENTS_RO(es->vtep_list, node, vtep))
- json_object_array_add(json_vteps,
- json_object_new_string(
- inet_ntoa(*vtep)));
- }
- json_object_object_add(json, "vteps", json_vteps);
- } else {
- vty_out(vty, "%-30s %-6s %-21s %-15s %-6d\n",
- esi_to_str(&es->esi, buf, sizeof(buf)),
- is_es_local(es) ? "Local" : "Remote",
- prefix_rd2str(&es->prd, buf1, sizeof(buf1)),
- ipaddr2str(&es->originator_ip, buf2,
- sizeof(buf2)),
- es->vtep_list ? listcount(es->vtep_list) : 0);
- }
-}
-
static void show_vni_entry(struct hash_bucket *bucket, void *args[])
{
struct vty *vty;
@@ -2454,10 +2374,10 @@ static void evpn_show_route_vni_macip(struct vty *vty, struct bgp *bgp,
static void evpn_show_routes_esi(struct vty *vty, struct bgp *bgp,
esi_t *esi, json_object *json)
{
- struct evpnes *es = NULL;
+ struct bgp_evpn_es *es = NULL;
/* locate the ES */
- es = bgp_evpn_lookup_es(bgp, esi);
+ es = bgp_evpn_es_find(esi);
if (!es) {
if (!json)
vty_out(vty, "ESI not found\n");
@@ -2863,43 +2783,6 @@ static void evpn_show_all_routes(struct vty *vty, struct bgp *bgp, int type,
}
}
-/* Display specific ES */
-static void evpn_show_es(struct vty *vty, struct bgp *bgp, esi_t *esi,
- json_object *json)
-{
- struct evpnes *es = NULL;
-
- es = bgp_evpn_lookup_es(bgp, esi);
- if (es) {
- display_es(vty, es, json);
- } else {
- if (json) {
- vty_out(vty, "{}\n");
- } else {
- vty_out(vty, "ESI not found\n");
- return;
- }
- }
-}
-
-/* Display all ESs */
-static void evpn_show_all_es(struct vty *vty, struct bgp *bgp,
- json_object *json)
-{
- void *args[2];
-
- if (!json)
- vty_out(vty, "%-30s %-6s %-21s %-15s %-6s\n",
- "ESI", "Type", "RD", "Originator-IP", "#VTEPs");
-
- /* print all ESs */
- args[0] = vty;
- args[1] = json;
- hash_iterate(bgp->esihash,
- (void (*)(struct hash_bucket *, void *))show_es_entry,
- args);
-}
-
/*
* Display specified VNI (vty handler)
*/
@@ -4022,55 +3905,50 @@ DEFUN(show_bgp_l2vpn_evpn_vni,
return CMD_SUCCESS;
}
-/* Disaply ES */
-DEFUN(show_bgp_l2vpn_evpn_es,
+DEFPY(show_bgp_l2vpn_evpn_es_evi,
+ show_bgp_l2vpn_evpn_es_evi_cmd,
+ "show bgp l2vpn evpn es-evi [vni (1-16777215)$vni] [json$uj] [detail$detail]",
+ SHOW_STR
+ BGP_STR
+ L2VPN_HELP_STR
+ EVPN_HELP_STR
+ "ES per EVI\n"
+ "VxLAN Network Identifier\n"
+ "VNI\n"
+ JSON_STR
+ "Detailed information\n")
+{
+ if (vni)
+ bgp_evpn_es_evi_show_vni(vty, vni, !!uj, !!detail);
+ else
+ bgp_evpn_es_evi_show(vty, !!uj, !!detail);
+
+ return CMD_SUCCESS;
+}
+
+DEFPY(show_bgp_l2vpn_evpn_es,
show_bgp_l2vpn_evpn_es_cmd,
- "show bgp l2vpn evpn es [ESI] [json]",
+ "show bgp l2vpn evpn es [NAME$esi_str|detail$detail] [json$uj]",
SHOW_STR
BGP_STR
L2VPN_HELP_STR
EVPN_HELP_STR
- "ethernet-Segment\n"
- "Ethernet-Segment Identifier\n"
+ "Ethernet Segment\n"
+ "ES ID\n"
+ "Detailed information\n"
JSON_STR)
{
- int idx = 0;
- bool uj = false;
esi_t esi;
- json_object *json = NULL;
- struct bgp *bgp = NULL;
-
- memset(&esi, 0, sizeof(esi));
- uj = use_json(argc, argv);
- bgp = bgp_get_evpn();
- if (!bgp)
- return CMD_WARNING;
-
- if (!argv_find(argv, argc, "evpn", &idx))
- return CMD_WARNING;
-
- if ((uj && argc == ((idx + 1) + 2)) ||
- (!uj && argc == (idx + 1) + 1)) {
-
- /* show all ESs */
- evpn_show_all_es(vty, bgp, json);
- } else {
-
- /* show a specific ES */
-
- /* get the ESI - ESI-ID is at argv[5] */
- if (!str_to_esi(argv[idx + 2]->arg, &esi)) {
- vty_out(vty, "%% Malformed ESI\n");
+ if (esi_str) {
+ if (!str_to_esi(esi_str, &esi)) {
+ vty_out(vty, "%%Malformed ESI\n");
return CMD_WARNING;
}
- evpn_show_es(vty, bgp, &esi, json);
- }
+ bgp_evpn_es_show_esi(vty, &esi, uj);
+ } else {
- if (uj) {
- vty_out(vty, "%s\n", json_object_to_json_string_ext(
- json, JSON_C_TO_STRING_PRETTY));
- json_object_free(json);
+ bgp_evpn_es_show(vty, uj, !!detail);
}
return CMD_SUCCESS;
@@ -4115,7 +3993,7 @@ DEFUN(show_bgp_l2vpn_evpn_summary,
*/
DEFUN(show_bgp_l2vpn_evpn_route,
show_bgp_l2vpn_evpn_route_cmd,
- "show bgp l2vpn evpn route [detail] [type <macip|2|multicast|3|es|4|prefix|5>] [json]",
+ "show bgp l2vpn evpn route [detail] [type <ead|1|macip|2|multicast|3|es|4|prefix|5>] [json]",
SHOW_STR
BGP_STR
L2VPN_HELP_STR
@@ -4123,6 +4001,7 @@ DEFUN(show_bgp_l2vpn_evpn_route,
"EVPN route information\n"
"Display Detailed Information\n"
"Specify Route type\n"
+ "EAD (Type-1) route\n"
"MAC-IP (Type-2) route\n"
"MAC-IP (Type-2) route\n"
"Multicast (Type-3) route\n"
@@ -4158,9 +4037,12 @@ DEFUN(show_bgp_l2vpn_evpn_route,
else if ((strncmp(argv[type_idx + 1]->arg, "mu", 2) == 0)
|| (strmatch(argv[type_idx + 1]->arg, "3")))
type = BGP_EVPN_IMET_ROUTE;
- else if ((strncmp(argv[type_idx + 1]->arg, "e", 1) == 0)
+ else if ((strncmp(argv[type_idx + 1]->arg, "es", 2) == 0)
|| (strmatch(argv[type_idx + 1]->arg, "4")))
type = BGP_EVPN_ES_ROUTE;
+ else if ((strncmp(argv[type_idx + 1]->arg, "ea", 2) == 0)
+ || (strmatch(argv[type_idx + 1]->arg, "1")))
+ type = BGP_EVPN_AD_ROUTE;
else if ((strncmp(argv[type_idx + 1]->arg, "p", 1) == 0)
|| (strmatch(argv[type_idx + 1]->arg, "5")))
type = BGP_EVPN_IP_PREFIX_ROUTE;
@@ -4186,7 +4068,7 @@ DEFUN(show_bgp_l2vpn_evpn_route,
*/
DEFUN(show_bgp_l2vpn_evpn_route_rd,
show_bgp_l2vpn_evpn_route_rd_cmd,
- "show bgp l2vpn evpn route rd ASN:NN_OR_IP-ADDRESS:NN [type <macip|multicast|es|prefix>] [json]",
+ "show bgp l2vpn evpn route rd ASN:NN_OR_IP-ADDRESS:NN [type <ead|macip|multicast|es|prefix>] [json]",
SHOW_STR
BGP_STR
L2VPN_HELP_STR
@@ -4195,6 +4077,7 @@ DEFUN(show_bgp_l2vpn_evpn_route_rd,
"Route Distinguisher\n"
"ASN:XX or A.B.C.D:XX\n"
"Specify Route type\n"
+ "EAD (Type-1) route\n"
"MAC-IP (Type-2) route\n"
"Multicast (Type-3) route\n"
"Ethernet Segment route\n"
@@ -4236,6 +4119,10 @@ DEFUN(show_bgp_l2vpn_evpn_route_rd,
type = BGP_EVPN_MAC_IP_ROUTE;
else if (strncmp(argv[type_idx + 1]->arg, "mu", 2) == 0)
type = BGP_EVPN_IMET_ROUTE;
+ else if (strncmp(argv[type_idx + 1]->arg, "es", 2) == 0)
+ type = BGP_EVPN_ES_ROUTE;
+ else if (strncmp(argv[type_idx + 1]->arg, "ea", 2) == 0)
+ type = BGP_EVPN_AD_ROUTE;
else if (strncmp(argv[type_idx + 1]->arg, "pr", 2) == 0)
type = BGP_EVPN_IP_PREFIX_ROUTE;
else
@@ -4380,7 +4267,7 @@ DEFUN(show_bgp_l2vpn_evpn_route_esi,
* Display per-VNI EVPN routing table.
*/
DEFUN(show_bgp_l2vpn_evpn_route_vni, show_bgp_l2vpn_evpn_route_vni_cmd,
- "show bgp l2vpn evpn route vni " CMD_VNI_RANGE " [<type <macip|multicast> | vtep A.B.C.D>] [json]",
+ "show bgp l2vpn evpn route vni " CMD_VNI_RANGE " [<type <ead|macip|multicast> | vtep A.B.C.D>] [json]",
SHOW_STR
BGP_STR
L2VPN_HELP_STR
@@ -4389,6 +4276,7 @@ DEFUN(show_bgp_l2vpn_evpn_route_vni, show_bgp_l2vpn_evpn_route_vni_cmd,
"VXLAN Network Identifier\n"
"VNI number\n"
"Specify Route type\n"
+ "EAD (Type-1) route\n"
"MAC-IP (Type-2) route\n"
"Multicast (Type-3) route\n"
"Remote VTEP\n"
@@ -4426,6 +4314,8 @@ DEFUN(show_bgp_l2vpn_evpn_route_vni, show_bgp_l2vpn_evpn_route_vni_cmd,
type = BGP_EVPN_MAC_IP_ROUTE;
else if (strncmp(argv[idx + 5]->arg, "mu", 2) == 0)
type = BGP_EVPN_IMET_ROUTE;
+ else if (strncmp(argv[idx + 5]->arg, "ea", 2) == 0)
+ type = BGP_EVPN_AD_ROUTE;
else
return CMD_WARNING;
} else if (strncmp(argv[idx + 4]->arg, "vtep", 4) == 0) {
@@ -4711,17 +4601,22 @@ DEFUN(show_bgp_l2vpn_evpn_import_rt,
return CMD_SUCCESS;
}
-DEFUN(test_adv_evpn_type4_route,
- test_adv_evpn_type4_route_cmd,
- "advertise es ESI",
- "Advertise EVPN ES route\n"
+DEFPY(test_es_add,
+ test_es_add_cmd,
+ "[no$no] test es NAME$esi_str [state NAME$state_str]",
+ NO_STR
+ "Test\n"
"Ethernet-segment\n"
- "Ethernet-Segment Identifier\n")
+ "Ethernet-Segment Identifier\n"
+ "ES link state\n"
+ "up|down\n"
+)
{
int ret = 0;
esi_t esi;
struct bgp *bgp;
- struct ipaddr vtep_ip;
+ struct in_addr vtep_ip;
+ bool oper_up;
bgp = bgp_get_evpn();
if (!bgp) {
@@ -4729,33 +4624,47 @@ DEFUN(test_adv_evpn_type4_route,
return CMD_WARNING;
}
- if (!str_to_esi(argv[2]->arg, &esi)) {
+ if (!str_to_esi(esi_str, &esi)) {
vty_out(vty, "%%Malformed ESI\n");
return CMD_WARNING;
}
- vtep_ip.ipa_type = IPADDR_V4;
- vtep_ip.ipaddr_v4 = bgp->router_id;
+ if (no) {
+ ret = bgp_evpn_local_es_del(bgp, &esi);
+ if (ret == -1) {
+ vty_out(vty, "%%Failed to delete ES\n");
+ return CMD_WARNING;
+ }
+ } else {
+ if (state_str && !strcmp(state_str, "up"))
+ oper_up = true;
+ else
+ oper_up = false;
+ vtep_ip = bgp->router_id;
- ret = bgp_evpn_local_es_add(bgp, &esi, &vtep_ip);
- if (ret == -1) {
- vty_out(vty, "%%Failed to EVPN advertise type-4 route\n");
- return CMD_WARNING;
+ ret = bgp_evpn_local_es_add(bgp, &esi, vtep_ip, oper_up);
+ if (ret == -1) {
+ vty_out(vty, "%%Failed to add ES\n");
+ return CMD_WARNING;
+ }
}
return CMD_SUCCESS;
}
-DEFUN(test_withdraw_evpn_type4_route,
- test_withdraw_evpn_type4_route_cmd,
- "withdraw es ESI",
- "Advertise EVPN ES route\n"
+DEFPY(test_es_vni_add,
+ test_es_vni_add_cmd,
+ "[no$no] test es NAME$esi_str vni (1-16777215)$vni",
+ NO_STR
+ "Test\n"
"Ethernet-segment\n"
- "Ethernet-Segment Identifier\n")
+ "Ethernet-Segment Identifier\n"
+ "VNI\n"
+ "1-16777215\n"
+)
{
int ret = 0;
esi_t esi;
struct bgp *bgp;
- struct ipaddr vtep_ip;
bgp = bgp_get_evpn();
if (!bgp) {
@@ -4763,22 +4672,23 @@ DEFUN(test_withdraw_evpn_type4_route,
return CMD_WARNING;
}
- if (!bgp->peer_self) {
- vty_out(vty, "%%BGP instance doesn't have self peer\n");
- return CMD_WARNING;
- }
-
- if (!str_to_esi(argv[2]->arg, &esi)) {
+ if (!str_to_esi(esi_str, &esi)) {
vty_out(vty, "%%Malformed ESI\n");
return CMD_WARNING;
}
- vtep_ip.ipa_type = IPADDR_V4;
- vtep_ip.ipaddr_v4 = bgp->router_id;
- ret = bgp_evpn_local_es_del(bgp, &esi, &vtep_ip);
- if (ret == -1) {
- vty_out(vty, "%%Failed to withdraw EVPN type-4 route\n");
- return CMD_WARNING;
+ if (no) {
+ ret = bgp_evpn_local_es_evi_del(bgp, &esi, vni);
+ if (ret == -1) {
+ vty_out(vty, "%%Failed to deref ES VNI\n");
+ return CMD_WARNING;
+ }
+ } else {
+ ret = bgp_evpn_local_es_evi_add(bgp, &esi, vni);
+ if (ret == -1) {
+ vty_out(vty, "%%Failed to ref ES VNI\n");
+ return CMD_WARNING;
+ }
}
return CMD_SUCCESS;
}
@@ -5836,11 +5746,12 @@ void bgp_ethernetvpn_init(void)
install_element(BGP_EVPN_NODE, &bgp_evpn_advertise_pip_ip_mac_cmd);
/* test commands */
- install_element(BGP_EVPN_NODE, &test_adv_evpn_type4_route_cmd);
- install_element(BGP_EVPN_NODE, &test_withdraw_evpn_type4_route_cmd);
+ install_element(BGP_EVPN_NODE, &test_es_add_cmd);
+ install_element(BGP_EVPN_NODE, &test_es_vni_add_cmd);
/* "show bgp l2vpn evpn" commands. */
install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_es_cmd);
+ install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_es_evi_cmd);
install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_vni_cmd);
install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_summary_cmd);
install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_route_cmd);
diff --git a/bgpd/bgp_memory.c b/bgpd/bgp_memory.c
index 41c4108c0a..8bdab16680 100644
--- a/bgpd/bgp_memory.c
+++ b/bgpd/bgp_memory.c
@@ -116,8 +116,11 @@ DEFINE_MTYPE(BGPD, LCOMMUNITY_STR, "Large Community display string")
DEFINE_MTYPE(BGPD, LCOMMUNITY_VAL, "Large Community value")
DEFINE_MTYPE(BGPD, BGP_EVPN, "BGP EVPN Information")
-DEFINE_MTYPE(BGPD, BGP_EVPN_ES_VTEP, "BGP EVPN ES VTEP Ip")
+DEFINE_MTYPE(BGPD, BGP_EVPN_MH_INFO, "BGP EVPN Multihoming Information")
+DEFINE_MTYPE(BGPD, BGP_EVPN_ES_VTEP, "BGP EVPN ES VTEP")
+DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI_VTEP, "BGP EVPN ES-EVI VTEP")
DEFINE_MTYPE(BGPD, BGP_EVPN_ES, "BGP EVPN ESI Information")
+DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI, "BGP EVPN ES-per-EVI Information")
DEFINE_MTYPE(BGPD, BGP_EVPN_IMPORT_RT, "BGP EVPN Import RT")
DEFINE_MTYPE(BGPD, BGP_EVPN_VRF_IMPORT_RT, "BGP EVPN VRF Import RT")
DEFINE_MTYPE(BGPD, BGP_EVPN_MACIP, "BGP EVPN MAC IP")
diff --git a/bgpd/bgp_memory.h b/bgpd/bgp_memory.h
index 5428022551..d1ae392c65 100644
--- a/bgpd/bgp_memory.h
+++ b/bgpd/bgp_memory.h
@@ -111,8 +111,11 @@ DECLARE_MTYPE(LCOMMUNITY)
DECLARE_MTYPE(LCOMMUNITY_STR)
DECLARE_MTYPE(LCOMMUNITY_VAL)
+DECLARE_MTYPE(BGP_EVPN_MH_INFO)
DECLARE_MTYPE(BGP_EVPN_ES)
+DECLARE_MTYPE(BGP_EVPN_ES_EVI)
DECLARE_MTYPE(BGP_EVPN_ES_VTEP)
+DECLARE_MTYPE(BGP_EVPN_ES_EVI_VTEP)
DECLARE_MTYPE(BGP_EVPN)
DECLARE_MTYPE(BGP_EVPN_IMPORT_RT)
diff --git a/bgpd/bgp_rd.h b/bgpd/bgp_rd.h
index b5ad9d624d..2aee44c721 100644
--- a/bgpd/bgp_rd.h
+++ b/bgpd/bgp_rd.h
@@ -33,6 +33,7 @@
#endif
#define RD_ADDRSTRLEN 28
+#define RD_BYTES 8
struct rd_as {
uint16_t type;
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index 6b2a5f55b7..80ffa18424 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -79,6 +79,7 @@
#include "bgpd/bgp_encap_types.h"
#include "bgpd/bgp_encap_tlv.h"
#include "bgpd/bgp_evpn.h"
+#include "bgpd/bgp_evpn_mh.h"
#include "bgpd/bgp_evpn_vty.h"
#include "bgpd/bgp_flowspec.h"
#include "bgpd/bgp_flowspec_util.h"
@@ -544,6 +545,11 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
uint32_t new_mm_seq;
uint32_t exist_mm_seq;
int nh_cmp;
+ esi_t *exist_esi;
+ esi_t *new_esi;
+ bool same_esi;
+ bool old_proxy;
+ bool new_proxy;
*paths_eq = 0;
@@ -620,6 +626,47 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
}
}
+ new_esi = bgp_evpn_attr_get_esi(newattr);
+ exist_esi = bgp_evpn_attr_get_esi(existattr);
+ if (bgp_evpn_is_esi_valid(new_esi) &&
+ !memcmp(new_esi, exist_esi, sizeof(esi_t))) {
+ same_esi = true;
+ } else {
+ same_esi = false;
+ }
+
+ /* If both paths have the same non-zero ES and
+ * one path is local it wins.
+ * PS: Note the local path wins even if the remote
+ * has the higher MM seq. The local path's
+ * MM seq will be fixed up to match the highest
+ * rem seq, subsequently.
+ */
+ if (same_esi) {
+ char esi_buf[ESI_STR_LEN];
+
+ if (bgp_evpn_is_path_local(bgp, new)) {
+ *reason = bgp_path_selection_evpn_local_path;
+ if (debug)
+ zlog_debug(
+ "%s: %s wins over %s as ES %s is same and local",
+ pfx_buf, new_buf, exist_buf,
+ esi_to_str(new_esi, esi_buf,
+ sizeof(esi_buf)));
+ return 1;
+ }
+ if (bgp_evpn_is_path_local(bgp, exist)) {
+ *reason = bgp_path_selection_evpn_local_path;
+ if (debug)
+ zlog_debug(
+ "%s: %s loses to %s as ES %s is same and local",
+ pfx_buf, new_buf, exist_buf,
+ esi_to_str(new_esi, esi_buf,
+ sizeof(esi_buf)));
+ return 0;
+ }
+ }
+
new_mm_seq = mac_mobility_seqnum(newattr);
exist_mm_seq = mac_mobility_seqnum(existattr);
@@ -643,6 +690,30 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
return 0;
}
+ /* if the sequence numbers and ESI are the same and one path
+ * is non-proxy it wins (over proxy)
+ */
+ new_proxy = bgp_evpn_attr_is_proxy(newattr);
+ old_proxy = bgp_evpn_attr_is_proxy(existattr);
+ if (same_esi && bgp_evpn_attr_is_local_es(newattr) &&
+ old_proxy != new_proxy) {
+ if (!new_proxy) {
+ *reason = bgp_path_selection_evpn_non_proxy;
+ if (debug)
+ zlog_debug(
+ "%s: %s wins over %s, same seq/es and non-proxy",
+ pfx_buf, new_buf, exist_buf);
+ return 1;
+ }
+
+ *reason = bgp_path_selection_evpn_non_proxy;
+ if (debug)
+ zlog_debug(
+ "%s: %s loses to %s, same seq/es and non-proxy",
+ pfx_buf, new_buf, exist_buf);
+ return 0;
+ }
+
/*
* if sequence numbers are the same path with the lowest IP
* wins
@@ -1175,6 +1246,17 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
return 1;
}
+
+int bgp_evpn_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ struct bgp_path_info *exist, int *paths_eq)
+{
+ enum bgp_path_selection_reason reason;
+ char pfx_buf[PREFIX2STR_BUFFER];
+
+ return bgp_path_info_cmp(bgp, new, exist, paths_eq, NULL, 0, pfx_buf,
+ AFI_L2VPN, SAFI_EVPN, &reason);
+}
+
/* Compare two bgp route entity. Return -1 if new is preferred, 1 if exist
* is preferred, or 0 if they are the same (usually will only occur if
* multipath is enabled
@@ -3172,19 +3254,10 @@ struct bgp_path_info *info_make(int type, int sub_type, unsigned short instance,
}
static void overlay_index_update(struct attr *attr,
- struct eth_segment_id *eth_s_id,
union gw_addr *gw_ip)
{
if (!attr)
return;
-
- if (eth_s_id == NULL) {
- memset(&(attr->evpn_overlay.eth_s_id), 0,
- sizeof(struct eth_segment_id));
- } else {
- memcpy(&(attr->evpn_overlay.eth_s_id), eth_s_id,
- sizeof(struct eth_segment_id));
- }
if (gw_ip == NULL) {
memset(&(attr->evpn_overlay.gw_ip), 0, sizeof(union gw_addr));
} else {
@@ -3194,20 +3267,17 @@ static void overlay_index_update(struct attr *attr,
}
static bool overlay_index_equal(afi_t afi, struct bgp_path_info *path,
- struct eth_segment_id *eth_s_id,
union gw_addr *gw_ip)
{
- struct eth_segment_id *path_eth_s_id, *path_eth_s_id_remote;
union gw_addr *path_gw_ip, *path_gw_ip_remote;
union {
- struct eth_segment_id esi;
+ esi_t esi;
union gw_addr ip;
} temp;
if (afi != AFI_L2VPN)
return true;
- path_eth_s_id = &(path->attr->evpn_overlay.eth_s_id);
path_gw_ip = &(path->attr->evpn_overlay.gw_ip);
if (gw_ip == NULL) {
@@ -3216,17 +3286,7 @@ static bool overlay_index_equal(afi_t afi, struct bgp_path_info *path,
} else
path_gw_ip_remote = gw_ip;
- if (eth_s_id == NULL) {
- memset(&temp, 0, sizeof(temp));
- path_eth_s_id_remote = &temp.esi;
- } else
- path_eth_s_id_remote = eth_s_id;
-
- if (!memcmp(path_gw_ip, path_gw_ip_remote, sizeof(union gw_addr)))
- return false;
-
- return !memcmp(path_eth_s_id, path_eth_s_id_remote,
- sizeof(struct eth_segment_id));
+ return !!memcmp(path_gw_ip, path_gw_ip_remote, sizeof(union gw_addr));
}
/* Check if received nexthop is valid or not. */
@@ -3521,7 +3581,7 @@ int bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
num_labels * sizeof(mpls_label_t))
== 0)
&& (overlay_index_equal(
- afi, pi, evpn == NULL ? NULL : &evpn->eth_s_id,
+ afi, pi,
evpn == NULL ? NULL : &evpn->gw_ip))) {
if (CHECK_FLAG(bgp->af_flags[afi][safi],
BGP_CONFIG_DAMPENING)
@@ -3746,7 +3806,7 @@ int bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
/* Update Overlay Index */
if (afi == AFI_L2VPN) {
overlay_index_update(
- pi->attr, evpn == NULL ? NULL : &evpn->eth_s_id,
+ pi->attr,
evpn == NULL ? NULL : &evpn->gw_ip);
}
@@ -3912,7 +3972,6 @@ int bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
/* Update Overlay Index */
if (afi == AFI_L2VPN) {
overlay_index_update(new->attr,
- evpn == NULL ? NULL : &evpn->eth_s_id,
evpn == NULL ? NULL : &evpn->gw_ip);
}
/* Nexthop reachability check. */
@@ -5301,7 +5360,7 @@ static void bgp_static_update_safi(struct bgp *bgp, const struct prefix *p,
else if (bgp_static->gatewayIp.family == AF_INET6)
memcpy(&(add.ipv6), &(bgp_static->gatewayIp.u.prefix6),
sizeof(struct in6_addr));
- overlay_index_update(&attr, bgp_static->eth_s_id, &add);
+ memcpy(&attr.esi, bgp_static->eth_s_id, sizeof(esi_t));
if (bgp_static->encap_tunneltype == BGP_ENCAP_TYPE_VXLAN) {
struct bgp_encap_type_vxlan bet;
memset(&bet, 0, sizeof(struct bgp_encap_type_vxlan));
@@ -5352,7 +5411,7 @@ static void bgp_static_update_safi(struct bgp *bgp, const struct prefix *p,
if (pi) {
memset(&add, 0, sizeof(union gw_addr));
if (attrhash_cmp(pi->attr, attr_new)
- && overlay_index_equal(afi, pi, bgp_static->eth_s_id, &add)
+ && overlay_index_equal(afi, pi, &add)
&& !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
bgp_dest_unlock_node(dest);
bgp_attr_unintern(&attr_new);
@@ -5856,7 +5915,7 @@ int bgp_static_set_safi(afi_t afi, safi_t safi, struct vty *vty,
if (esi) {
bgp_static->eth_s_id =
XCALLOC(MTYPE_ATTR,
- sizeof(struct eth_segment_id));
+ sizeof(esi_t));
str2esi(esi, bgp_static->eth_s_id);
}
if (routermac) {
@@ -7582,6 +7641,7 @@ void route_vty_out(struct vty *vty, const struct prefix *p,
const char *nexthop_vrfname = VRF_DEFAULT_NAME;
char *nexthop_hostname =
bgp_nexthop_hostname(path->peer, path->nexthop);
+ char esi_buf[ESI_STR_LEN];
if (json_paths)
json_path = json_object_new_object();
@@ -7959,6 +8019,11 @@ void route_vty_out(struct vty *vty, const struct prefix *p,
vty_out(vty, "%s", bgp_origin_str[attr->origin]);
if (json_paths) {
+ if (bgp_evpn_is_esi_valid(&attr->esi)) {
+ json_object_string_add(json_path, "esi",
+ esi_to_str(&attr->esi,
+ esi_buf, sizeof(esi_buf)));
+ }
if (safi == SAFI_EVPN &&
attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)) {
json_ext_community = json_object_new_object();
@@ -8004,10 +8069,18 @@ void route_vty_out(struct vty *vty, const struct prefix *p,
} else {
vty_out(vty, "\n");
- if (safi == SAFI_EVPN &&
- attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)) {
- vty_out(vty, "%*s", 20, " ");
- vty_out(vty, "%s\n", attr->ecommunity->str);
+ if (safi == SAFI_EVPN) {
+ if (bgp_evpn_is_esi_valid(&attr->esi)) {
+ vty_out(vty, "%*s", 20, " ");
+ vty_out(vty, "ESI:%s\n",
+ esi_to_str(&attr->esi,
+ esi_buf, sizeof(esi_buf)));
+ }
+ if (attr->flag &
+ ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)) {
+ vty_out(vty, "%*s", 20, " ");
+ vty_out(vty, "%s\n", attr->ecommunity->str);
+ }
}
#ifdef ENABLE_BGP_VNC
@@ -8340,15 +8413,6 @@ void route_vty_out_overlay(struct vty *vty, const struct prefix *p,
}
}
- char *str = esi2str(&(attr->evpn_overlay.eth_s_id));
-
- if (!json_path)
- vty_out(vty, "%s", str);
- else
- json_object_string_add(json_overlay, "esi", str);
-
- XFREE(MTYPE_TMP, str);
-
if (is_evpn_prefix_ipaddr_v4((struct prefix_evpn *)p)) {
inet_ntop(AF_INET, &(attr->evpn_overlay.gw_ip.ipv4), buf,
BUFSIZ);
@@ -8632,6 +8696,10 @@ static const char *bgp_path_selection_reason2str(
return "EVPN sequence number";
case bgp_path_selection_evpn_lower_ip:
return "EVPN lower IP";
+ case bgp_path_selection_evpn_local_path:
+ return "EVPN local ES path";
+ case bgp_path_selection_evpn_non_proxy:
+ return "EVPN non proxy";
case bgp_path_selection_weight:
return "Weight";
case bgp_path_selection_local_pref:
@@ -8670,9 +8738,67 @@ static const char *bgp_path_selection_reason2str(
return "Invalid (internal error)";
}
-void route_vty_out_detail(struct vty *vty, struct bgp *bgp, struct bgp_dest *bn,
- struct bgp_path_info *path, afi_t afi, safi_t safi,
- json_object *json_paths)
+static void route_vty_out_detail_es_info(struct vty *vty,
+ struct attr *attr, json_object *json_path)
+{
+ char esi_buf[ESI_STR_LEN];
+ bool es_local = !!CHECK_FLAG(attr->es_flags, ATTR_ES_IS_LOCAL);
+ bool peer_router = !!CHECK_FLAG(attr->es_flags,
+ ATTR_ES_PEER_ROUTER);
+ bool peer_active = !!CHECK_FLAG(attr->es_flags,
+ ATTR_ES_PEER_ACTIVE);
+ bool peer_proxy = !!CHECK_FLAG(attr->es_flags,
+ ATTR_ES_PEER_PROXY);
+
+ esi_to_str(&attr->esi, esi_buf, sizeof(esi_buf));
+ if (json_path) {
+ json_object *json_es_info = NULL;
+
+ json_object_string_add(
+ json_path, "esi",
+ esi_buf);
+ if (es_local || bgp_evpn_attr_is_sync(attr)) {
+ json_es_info = json_object_new_object();
+ if (es_local)
+ json_object_boolean_true_add(
+ json_es_info, "localEs");
+ if (peer_active)
+ json_object_boolean_true_add(
+ json_es_info, "peerActive");
+ if (peer_proxy)
+ json_object_boolean_true_add(
+ json_es_info, "peerProxy");
+ if (peer_router)
+ json_object_boolean_true_add(
+ json_es_info, "peerRouter");
+ if (attr->mm_sync_seqnum)
+ json_object_int_add(
+ json_es_info, "peerSeq",
+ attr->mm_sync_seqnum);
+ json_object_object_add(
+ json_path, "es_info",
+ json_es_info);
+ }
+ } else {
+ if (bgp_evpn_attr_is_sync(attr))
+ vty_out(vty,
+ " ESI %s %s peer-info: (%s%s%sMM: %d)\n",
+ esi_buf,
+ es_local ? "local-es":"",
+ peer_proxy ? "proxy " : "",
+ peer_active ? "active ":"",
+ peer_router ? "router ":"",
+ attr->mm_sync_seqnum);
+ else
+ vty_out(vty, " ESI %s %s\n",
+ esi_buf,
+ es_local ? "local-es":"");
+ }
+}
+
+void route_vty_out_detail(struct vty *vty, struct bgp *bgp,
+ struct bgp_dest *bn, struct bgp_path_info *path,
+ afi_t afi, safi_t safi, json_object *json_paths)
{
char buf[INET6_ADDRSTRLEN];
char buf1[BUFSIZ];
@@ -9142,6 +9268,11 @@ void route_vty_out_detail(struct vty *vty, struct bgp *bgp, struct bgp_dest *bn,
"used");
}
+ if (safi == SAFI_EVPN &&
+ bgp_evpn_is_esi_valid(&attr->esi)) {
+ route_vty_out_detail_es_info(vty, attr, json_path);
+ }
+
/* Line 3 display Origin, Med, Locpref, Weight, Tag, valid,
* Int/Ext/Local, Atomic, best */
if (json_paths)
@@ -13316,6 +13447,7 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp,
char buf[PREFIX_STRLEN * 2];
char buf2[SU_ADDRSTRLEN];
char rdbuf[RD_ADDRSTRLEN];
+ char esi_buf[ESI_BYTES];
/* Network configuration. */
for (pdest = bgp_table_top(bgp->route[afi][safi]); pdest;
@@ -13331,13 +13463,13 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp,
continue;
char *macrouter = NULL;
- char *esi = NULL;
if (bgp_static->router_mac)
macrouter = prefix_mac2str(
bgp_static->router_mac, NULL, 0);
if (bgp_static->eth_s_id)
- esi = esi2str(bgp_static->eth_s_id);
+ esi_to_str(bgp_static->eth_s_id,
+ esi_buf, sizeof(esi_buf));
p = bgp_dest_get_prefix(dest);
prd = (struct prefix_rd *)bgp_dest_get_prefix(pdest);
@@ -13368,11 +13500,10 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp,
" network %s rd %s ethtag %u label %u esi %s gwip %s routermac %s\n",
buf, rdbuf,
p->u.prefix_evpn.prefix_addr.eth_tag,
- decode_label(&bgp_static->label), esi, buf2,
+ decode_label(&bgp_static->label), esi_buf, buf2,
macrouter);
XFREE(MTYPE_TMP, macrouter);
- XFREE(MTYPE_TMP, esi);
}
}
}
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
index 32c65c8fac..3f734d2672 100644
--- a/bgpd/bgp_route.h
+++ b/bgpd/bgp_route.h
@@ -99,6 +99,7 @@ enum bgp_show_adj_route_type {
#define BGP_NLRI_PARSE_ERROR_FLOWSPEC_NLRI_SIZELIMIT -12
#define BGP_NLRI_PARSE_ERROR_FLOWSPEC_BAD_FORMAT -13
#define BGP_NLRI_PARSE_ERROR_ADDRESS_FAMILY -14
+#define BGP_NLRI_PARSE_ERROR_EVPN_TYPE1_SIZE -15
#define BGP_NLRI_PARSE_ERROR -32
/* Ancillary information to struct bgp_path_info,
@@ -303,7 +304,7 @@ struct bgp_static {
mpls_label_t label;
/* EVPN */
- struct eth_segment_id *eth_s_id;
+ esi_t *eth_s_id;
struct ethaddr *router_mac;
uint16_t encap_tunneltype;
struct prefix gatewayIp;
@@ -681,4 +682,6 @@ extern int bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi);
extern bool bgp_update_martian_nexthop(struct bgp *bgp, afi_t afi, safi_t safi,
uint8_t type, uint8_t stype,
struct attr *attr, struct bgp_dest *dest);
+extern int bgp_evpn_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
+ struct bgp_path_info *exist, int *paths_eq);
#endif /* _QUAGGA_BGP_ROUTE_H */
diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h
index a9ec36d29b..cf0086b52e 100644
--- a/bgpd/bgp_table.h
+++ b/bgpd/bgp_table.h
@@ -51,6 +51,8 @@ enum bgp_path_selection_reason {
bgp_path_selection_first,
bgp_path_selection_evpn_sticky_mac,
bgp_path_selection_evpn_seq,
+ bgp_path_selection_evpn_local_path,
+ bgp_path_selection_evpn_non_proxy,
bgp_path_selection_evpn_lower_ip,
bgp_path_selection_weight,
bgp_path_selection_local_pref,
diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c
index c53286cb36..0268b7ec9d 100644
--- a/bgpd/bgp_vty.c
+++ b/bgpd/bgp_vty.c
@@ -65,6 +65,7 @@
#include "bgpd/bgp_io.h"
#include "bgpd/bgp_evpn.h"
#include "bgpd/bgp_evpn_vty.h"
+#include "bgpd/bgp_evpn_mh.h"
#include "bgpd/bgp_addpath.h"
#include "bgpd/bgp_mac.h"
#include "bgpd/bgp_flowspec.h"
@@ -1093,7 +1094,8 @@ DEFUN_HIDDEN (bgp_local_mac,
return CMD_WARNING;
}
- rv = bgp_evpn_local_macip_add(bgp, vni, &mac, &ip, 0 /* flags */, seq);
+ rv = bgp_evpn_local_macip_add(bgp, vni, &mac, &ip, 0 /* flags */, seq,
+ zero_esi);
if (rv < 0) {
vty_out(vty, "Internal error\n");
return CMD_WARNING;
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index 0a55a46ed4..87936f1dd6 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -60,6 +60,7 @@
#include "bgpd/bgp_labelpool.h"
#include "bgpd/bgp_pbr.h"
#include "bgpd/bgp_evpn_private.h"
+#include "bgpd/bgp_evpn_mh.h"
#include "bgpd/bgp_mac.h"
/* All information about zebra. */
@@ -1143,8 +1144,7 @@ static bool update_ipv6nh_for_route_install(int nh_othervrf, struct bgp *nh_bgp,
api_nh->ifindex = 0;
}
}
- if (nexthop)
- api_nh->gate.ipv6 = *nexthop;
+ api_nh->gate.ipv6 = *nexthop;
return true;
}
@@ -2499,17 +2499,66 @@ static void bgp_zebra_connected(struct zclient *zclient)
BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer);
}
-static int bgp_zebra_process_local_es(ZAPI_CALLBACK_ARGS)
+static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS)
+{
+ esi_t esi;
+ struct bgp *bgp = NULL;
+ struct stream *s = NULL;
+ char buf[ESI_STR_LEN];
+ struct in_addr originator_ip;
+ uint8_t active;
+
+ bgp = bgp_lookup_by_vrf_id(vrf_id);
+ if (!bgp)
+ return 0;
+
+ s = zclient->ibuf;
+ stream_get(&esi, s, sizeof(esi_t));
+ originator_ip.s_addr = stream_get_ipv4(s);
+ active = stream_getc(s);
+
+ if (BGP_DEBUG(zebra, ZEBRA))
+ zlog_debug("Rx add ESI %s originator-ip %s active %u",
+ esi_to_str(&esi, buf, sizeof(buf)),
+ inet_ntoa(originator_ip),
+ active);
+
+ bgp_evpn_local_es_add(bgp, &esi, originator_ip, active);
+
+ return 0;
+}
+
+static int bgp_zebra_process_local_es_del(ZAPI_CALLBACK_ARGS)
{
esi_t esi;
struct bgp *bgp = NULL;
struct stream *s = NULL;
char buf[ESI_STR_LEN];
- char buf1[INET6_ADDRSTRLEN];
- struct ipaddr originator_ip;
memset(&esi, 0, sizeof(esi_t));
- memset(&originator_ip, 0, sizeof(struct ipaddr));
+ bgp = bgp_lookup_by_vrf_id(vrf_id);
+ if (!bgp)
+ return 0;
+
+ s = zclient->ibuf;
+ stream_get(&esi, s, sizeof(esi_t));
+
+ if (BGP_DEBUG(zebra, ZEBRA))
+ zlog_debug("Rx del ESI %s",
+ esi_to_str(&esi, buf, sizeof(buf)));
+
+ bgp_evpn_local_es_del(bgp, &esi);
+
+ return 0;
+}
+
+static int bgp_zebra_process_local_es_evi(ZAPI_CALLBACK_ARGS)
+{
+ esi_t esi;
+ vni_t vni;
+ struct bgp *bgp;
+ struct stream *s;
+ char buf[ESI_STR_LEN];
bgp = bgp_lookup_by_vrf_id(vrf_id);
if (!bgp)
@@ -2517,18 +2566,18 @@ static int bgp_zebra_process_local_es(ZAPI_CALLBACK_ARGS)
s = zclient->ibuf;
stream_get(&esi, s, sizeof(esi_t));
- stream_get(&originator_ip, s, sizeof(struct ipaddr));
+ vni = stream_getl(s);
if (BGP_DEBUG(zebra, ZEBRA))
- zlog_debug("Rx %s ESI %s originator-ip %s",
- (cmd == ZEBRA_LOCAL_ES_ADD) ? "add" : "del",
- esi_to_str(&esi, buf, sizeof(buf)),
- ipaddr2str(&originator_ip, buf1, sizeof(buf1)));
+ zlog_debug("Rx %s ESI %s VNI %u",
+ ZEBRA_VNI_ADD ? "add" : "del",
+ esi_to_str(&esi, buf, sizeof(buf)), vni);
- if (cmd == ZEBRA_LOCAL_ES_ADD)
- bgp_evpn_local_es_add(bgp, &esi, &originator_ip);
+ if (cmd == ZEBRA_LOCAL_ES_EVI_ADD)
+ bgp_evpn_local_es_evi_add(bgp, &esi, vni);
else
- bgp_evpn_local_es_del(bgp, &esi, &originator_ip);
+ bgp_evpn_local_es_evi_del(bgp, &esi, vni);
+
return 0;
}
@@ -2628,6 +2677,8 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS)
uint8_t flags = 0;
uint32_t seqnum = 0;
int state = 0;
+ char buf2[ESI_STR_LEN];
+ esi_t esi;
memset(&ip, 0, sizeof(ip));
s = zclient->ibuf;
@@ -2651,6 +2702,7 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS)
if (cmd == ZEBRA_MACIP_ADD) {
flags = stream_getc(s);
seqnum = stream_getl(s);
+ stream_get(&esi, s, sizeof(esi_t));
} else {
state = stream_getl(s);
}
@@ -2660,15 +2712,15 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS)
return 0;
if (BGP_DEBUG(zebra, ZEBRA))
- zlog_debug("%u:Recv MACIP %s flags 0x%x MAC %s IP %s VNI %u seq %u state %d",
+ zlog_debug("%u:Recv MACIP %s f 0x%x MAC %s IP %s VNI %u seq %u state %d ESI %s",
vrf_id, (cmd == ZEBRA_MACIP_ADD) ? "Add" : "Del",
flags, prefix_mac2str(&mac, buf, sizeof(buf)),
ipaddr2str(&ip, buf1, sizeof(buf1)), vni, seqnum,
- state);
+ state, esi_to_str(&esi, buf2, sizeof(buf2)));
if (cmd == ZEBRA_MACIP_ADD)
return bgp_evpn_local_macip_add(bgp, vni, &mac, &ip,
- flags, seqnum);
+ flags, seqnum, &esi);
else
return bgp_evpn_local_macip_del(bgp, vni, &mac, &ip, state);
}
@@ -2801,9 +2853,11 @@ void bgp_zebra_init(struct thread_master *master, unsigned short instance)
zclient->nexthop_update = bgp_read_nexthop_update;
zclient->import_check_update = bgp_read_import_check_update;
zclient->fec_update = bgp_read_fec_update;
- zclient->local_es_add = bgp_zebra_process_local_es;
- zclient->local_es_del = bgp_zebra_process_local_es;
+ zclient->local_es_add = bgp_zebra_process_local_es_add;
+ zclient->local_es_del = bgp_zebra_process_local_es_del;
zclient->local_vni_add = bgp_zebra_process_local_vni;
+ zclient->local_es_evi_add = bgp_zebra_process_local_es_evi;
+ zclient->local_es_evi_del = bgp_zebra_process_local_es_evi;
zclient->local_vni_del = bgp_zebra_process_local_vni;
zclient->local_macip_add = bgp_zebra_process_local_macip;
zclient->local_macip_del = bgp_zebra_process_local_macip;
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index f0ee800287..c9e6fd2ac0 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -87,6 +87,7 @@
#include "bgpd/bgp_pbr.h"
#include "bgpd/bgp_addpath.h"
#include "bgpd/bgp_evpn_private.h"
+#include "bgpd/bgp_evpn_mh.h"
#include "bgpd/bgp_mac.h"
DEFINE_MTYPE_STATIC(BGPD, PEER_TX_SHUTDOWN_MSG, "Peer shutdown message (TX)");
@@ -1227,6 +1228,10 @@ struct peer *peer_new(struct bgp *bgp)
peer->addpath_type[afi][safi] = BGP_ADDPATH_NONE;
}
+ /* set nexthop-unchanged for l2vpn evpn by default */
+ SET_FLAG(peer->af_flags[AFI_L2VPN][SAFI_EVPN],
+ PEER_FLAG_NEXTHOP_UNCHANGED);
+
SET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
/* Initialize per peer bgp GR FSM */
@@ -6935,6 +6940,7 @@ void bgp_master_init(struct thread_master *master, const int buffer_size)
/* mpls label dynamic allocation pool */
bgp_lp_init(bm->master, &bm->labelpool);
+ bgp_evpn_mh_init();
QOBJ_REG(bm, bgp_master);
}
@@ -7134,6 +7140,7 @@ void bgp_terminate(void)
BGP_TIMER_OFF(bm->t_rmap_update);
bgp_mac_finish();
+ bgp_evpn_mh_finish();
}
struct peer *peer_lookup_in_view(struct vty *vty, struct bgp *bgp,
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 8eea2a5f60..966de87830 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -165,6 +165,9 @@ struct bgp_master {
/* How big should we set the socket buffer size */
uint32_t socket_buffer;
+ /* EVPN multihoming */
+ struct bgp_evpn_mh_info *mh_info;
+
bool terminating; /* global flag that sigint terminate seen */
QOBJ_FIELDS
};
@@ -661,9 +664,6 @@ struct bgp {
struct bgp_pbr_config *bgp_pbr_cfg;
- /* local esi hash table */
- struct hash *esihash;
-
/* Count of peers in established state */
uint32_t established_peers;
diff --git a/bgpd/subdir.am b/bgpd/subdir.am
index 6b5c0fe719..a5393e25ac 100644
--- a/bgpd/subdir.am
+++ b/bgpd/subdir.am
@@ -15,6 +15,7 @@ vtysh_scan += \
bgpd/bgp_bfd.c \
bgpd/bgp_debug.c \
bgpd/bgp_dump.c \
+ bgpd/bgp_evpn_mh.c \
bgpd/bgp_evpn_vty.c \
bgpd/bgp_filter.c \
bgpd/bgp_mplsvpn.c \
@@ -65,6 +66,7 @@ bgpd_libbgp_a_SOURCES = \
bgpd/bgp_encap_tlv.c \
bgpd/bgp_errors.c \
bgpd/bgp_evpn.c \
+ bgpd/bgp_evpn_mh.c \
bgpd/bgp_evpn_vty.c \
bgpd/bgp_filter.c \
bgpd/bgp_flowspec.c \
@@ -139,6 +141,7 @@ noinst_HEADERS += \
bgpd/bgp_encap_types.h \
bgpd/bgp_errors.h \
bgpd/bgp_evpn.h \
+ bgpd/bgp_evpn_mh.h \
bgpd/bgp_evpn_private.h \
bgpd/bgp_evpn_vty.h \
bgpd/bgp_filter.h \
diff --git a/doc/developer/building-frr-for-openwrt.rst b/doc/developer/building-frr-for-openwrt.rst
index 5d8f82f27e..9bd1296dad 100644
--- a/doc/developer/building-frr-for-openwrt.rst
+++ b/doc/developer/building-frr-for-openwrt.rst
@@ -1,6 +1,8 @@
-OpenWRT
+OpenWrt
=======
+General info about OpenWrt buildsystem: `link <https://openwrt.org/docs/guide-developer/build-system/start>`_.
+
Prepare build environment
-------------------------
@@ -13,16 +15,16 @@ For Debian based distributions, run:
For other environments, instructions can be found in the
`official documentation
-<https://wiki.openwrt.org/doc/howto/buildroot.exigence#examples_of_package_installations>`_.
+<https://openwrt.org/docs/guide-developer/build-system/install-buildsystem#examples_of_package_installations>`_.
-Get OpenWRT Sources (from Git)
+Get OpenWrt Sources (from Git)
------------------------------
.. note::
- The OpenWRT build will fail if you run it as root. So take care to run it as a nonprivileged user.
+ The OpenWrt build will fail if you run it as root. So take care to run it as a nonprivileged user.
-Clone the OpenWRT sources and retrieve the package feeds
+Clone the OpenWrt sources and retrieve the package feeds
::
@@ -30,21 +32,15 @@ Clone the OpenWRT sources and retrieve the package feeds
cd openwrt
./scripts/feeds update -a
./scripts/feeds install -a
- cd feeds/routing
- git fetch origin pull/319/head
- git read-tree --prefix=frr/ -u FETCH_HEAD:frr
- cd ../../package/feeds/routing/
- ln -sv ../../../feeds/routing/frr .
- cd ../../..
-
-Configure OpenWRT for your target and select the needed FRR packages in Network -> Routing and Redirection -> frr,
+
+Configure OpenWrt for your target and select the needed FRR packages in Network -> Routing and Redirection -> frr,
exit and save
::
make menuconfig
-Then, to compile either a complete OpenWRT image, or the FRR packages, run:
+Then, to compile either a complete OpenWrt image, or the FRR packages, run:
::
@@ -54,10 +50,16 @@ It may be possible that on first build ``make package/frr/compile`` not
to work and it may be needed to run a ``make`` for the entire build
environment. Add ``V=s`` to get more debugging output.
+More information about OpenWrt buildsystem can be found `here
+<https://openwrt.org/docs/guide-developer/build-system/use-buildsystem>`_.
+
Work with sources
-----------------
-To update to a newer version, or change other options, you need to edit the ``feeds/routing/frr/Makefile``.
+To update to a newer version, or change other options, you need to edit the ``feeds/packages/frr/Makefile``.
+
+More information about working with patches in OpenWrt buildsystem can be found `here
+<https://openwrt.org/docs/guide-developer/build-system/use-patches-with-buildsystem>`_.
Usage
-----
diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst
index cc27bc202d..db0776adba 100644
--- a/doc/user/bgp.rst
+++ b/doc/user/bgp.rst
@@ -2465,26 +2465,6 @@ the same behavior of using same next-hop and RMAC values.
Enables or disables advertise-pip feature, specifiy system-IP and/or system-MAC
parameters.
-Support with VRF network namespace backend
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-It is possible to separate overlay networks contained in VXLAN interfaces from
-underlay networks by using VRFs. VRF-lite and VRF-netns backends can be used for
-that. In the latter case, this is necessary to set both bridge and vxlan interface
-on the same network namespace, as below example illustrates:
-
-.. code-block:: shell
-
- # linux shell
- ip netns add vrf1
- ip link add name vxlan101 type vxlan id 101 dstport 4789 dev eth0 local 10.1.1.1
- ip link set dev vxlan101 netns vrf1
- ip netns exec vrf1 ip link set dev lo up
- ip netns exec vrf1 brctl addbr bridge101
- ip netns exec vrf1 brctl addif bridge101 vxlan101
-
-This makes possible to separate not only layer 3 networks like VRF-lite networks.
-Also, VRF netns based make possible to separate layer 2 networks on separate VRF
-instances.
.. _bgp-debugging:
diff --git a/doc/user/sharp.rst b/doc/user/sharp.rst
index 1c474193f2..76bdc48dc0 100644
--- a/doc/user/sharp.rst
+++ b/doc/user/sharp.rst
@@ -88,13 +88,13 @@ keyword. At present, no sharp commands will be preserved in the config.
may have been turned on.
.. index:: sharp lsp
-.. clicmd:: sharp lsp (0-100000) nexthop-group NAME [prefix A.B.C.D/M TYPE [instance (0-255)]]
+.. clicmd:: sharp lsp [update] (0-100000) nexthop-group NAME [prefix A.B.C.D/M TYPE [instance (0-255)]]
Install an LSP using the specified in-label, with nexthops as
- listed in nexthop-group ``NAME``. The LSP is installed as type
- ZEBRA_LSP_SHARP. If ``prefix`` is specified, an existing route with
- type ``TYPE`` (and optional ``instance`` id) will be updated to use
- the LSP.
+ listed in nexthop-group ``NAME``. If ``update`` is included, the
+ update path is used. The LSP is installed as type ZEBRA_LSP_SHARP.
+ If ``prefix`` is specified, an existing route with type ``TYPE``
+ (and optional ``instance`` id) will be updated to use the LSP.
.. index:: sharp remove lsp
.. clicmd:: sharp remove lsp (0-100000) nexthop-group NAME [prefix A.B.C.D/M TYPE [instance (0-255)]]
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index fb79481cb2..50011d55ec 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -293,4 +293,15 @@ struct br_mcast_stats {
__u64 mcast_bytes[BR_MCAST_DIR_SIZE];
__u64 mcast_packets[BR_MCAST_DIR_SIZE];
};
+
+/* FDB notification bits for NDA_NOTIFY:
+ * - BR_FDB_NFY_STATIC - notify on activity/expire even for a static entry
+ * - BR_FDB_NFY_INACTIVE - mark as inactive to avoid double notification,
+ * used with BR_FDB_NFY_STATIC (kernel controlled)
+ */
+enum {
+ BR_FDB_NFY_STATIC,
+ BR_FDB_NFY_INACTIVE,
+ BR_FDB_NFY_MAX
+};
#endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index cd144e3099..33c17af1cc 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -29,6 +29,8 @@ enum {
NDA_LINK_NETNSID,
NDA_SRC_VNI,
NDA_PROTOCOL, /* Originator of entry */
+ NDA_NH_ID,
+ NDA_NOTIFY,
__NDA_MAX
};
diff --git a/include/linux/net_namespace.h b/include/linux/net_namespace.h
index 0ed9dd61d3..0187c74d88 100644
--- a/include/linux/net_namespace.h
+++ b/include/linux/net_namespace.h
@@ -16,7 +16,6 @@ enum {
NETNSA_NSID,
NETNSA_PID,
NETNSA_FD,
- NETNSA_TARGET_NSID,
__NETNSA_MAX,
};
diff --git a/include/linux/nexthop.h b/include/linux/nexthop.h
index e4d6e256ef..ee2a15b9c7 100644
--- a/include/linux/nexthop.h
+++ b/include/linux/nexthop.h
@@ -50,6 +50,7 @@ enum {
*/
NHA_GROUPS, /* flag; only return nexthop groups in dump */
NHA_MASTER, /* u32; only return nexthops with given master dev */
+ NHA_FDB, /* nexthop belongs to a bridge fdb */
__NHA_MAX,
};
diff --git a/lib/bitfield.h b/lib/bitfield.h
index 72980165f9..244938933b 100644
--- a/lib/bitfield.h
+++ b/lib/bitfield.h
@@ -58,7 +58,7 @@ typedef unsigned int word_t;
* @n: The current word number that is being used.
* @m: total number of words in 'data'
*/
-#define bitfield_t struct { word_t *data; size_t n, m; }
+typedef struct {word_t *data; size_t n, m; } bitfield_t;
/**
* Initialize the bits.
@@ -97,6 +97,16 @@ typedef unsigned int word_t;
#define bf_release_index(v, id) \
(v).data[bf_index(id)] &= ~(1 << (bf_offset(id)))
+/* check if an id is in use */
+#define bf_test_index(v, id) \
+ ((v).data[bf_index(id)] & (1 << (bf_offset(id))))
+
+/* check if the bit field has been setup */
+#define bf_is_inited(v) ((v).data)
+
+/* compare two bitmaps of the same length */
+#define bf_cmp(v1, v2) (memcmp((v1).data, (v2).data, ((v1).m * sizeof(word_t))))
+
/*
* return 0th index back to bitfield
*/
@@ -146,6 +156,37 @@ typedef unsigned int word_t;
(b) += (w * WORD_SIZE); \
} while (0)
+static inline unsigned int bf_find_next_set_bit(bitfield_t v,
+ word_t start_index)
+{
+ int start_bit;
+ unsigned long i, offset;
+
+ start_bit = start_index & (WORD_SIZE - 1);
+
+ for (i = bf_index(start_index); i < v.m; ++i) {
+ if (v.data[i] == 0) {
+ /* if the whole word is empty move to the next */
+ start_bit = 0;
+ continue;
+ }
+ /* scan one word for set bits */
+ for (offset = start_bit; offset < WORD_SIZE; ++offset) {
+ if ((v.data[i] >> offset) & 1)
+ return ((i * WORD_SIZE) + offset);
+ }
+ /* move to the next word */
+ start_bit = 0;
+ }
+ return WORD_MAX;
+}
+
+/* iterate through all the set bits */
+#define bf_for_each_set_bit(v, b, max) \
+ for ((b) = bf_find_next_set_bit((v), 0); \
+ (b) < max; \
+ (b) = bf_find_next_set_bit((v), (b) + 1))
+
/*
* Free the allocated memory for data
* @v: an instance of bitfield_t struct.
diff --git a/lib/if.c b/lib/if.c
index 07e786c708..d8392708e1 100644
--- a/lib/if.c
+++ b/lib/if.c
@@ -217,16 +217,14 @@ struct interface *if_create_name(const char *name, vrf_id_t vrf_id)
return ifp;
}
-struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id,
- char *optional_name)
+struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id)
{
struct interface *ifp;
ifp = if_new(vrf_id);
if_set_index(ifp, ifindex);
- if (optional_name)
- if_set_name(ifp, optional_name);
+
hook_call(if_add, ifp);
return ifp;
}
@@ -573,8 +571,7 @@ struct interface *if_get_by_name(const char *name, vrf_id_t vrf_id)
return NULL;
}
-struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id,
- char *optional_name)
+struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id)
{
struct interface *ifp;
@@ -584,7 +581,7 @@ struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id,
ifp = if_lookup_by_ifindex(ifindex, vrf_id);
if (ifp)
return ifp;
- return if_create_ifindex(ifindex, vrf_id, optional_name);
+ return if_create_ifindex(ifindex, vrf_id);
case VRF_BACKEND_VRF_LITE:
ifp = if_lookup_by_index_all_vrf(ifindex);
if (ifp) {
@@ -596,7 +593,7 @@ struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id,
if_update_to_new_vrf(ifp, vrf_id);
return ifp;
}
- return if_create_ifindex(ifindex, vrf_id, optional_name);
+ return if_create_ifindex(ifindex, vrf_id);
}
return NULL;
diff --git a/lib/if.h b/lib/if.h
index 40e87c1e31..1fb0757db2 100644
--- a/lib/if.h
+++ b/lib/if.h
@@ -509,8 +509,7 @@ extern void if_update_to_new_vrf(struct interface *, vrf_id_t vrf_id);
extern struct interface *if_create_name(const char *name, vrf_id_t vrf_id);
/* Create new interface, adds to index list only */
-extern struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id,
- char *name);
+extern struct interface *if_create_ifindex(ifindex_t ifindex, vrf_id_t vrf_id);
extern struct interface *if_lookup_by_index(ifindex_t, vrf_id_t vrf_id);
extern struct interface *if_lookup_by_index_all_vrf(ifindex_t);
extern struct interface *if_lookup_exact_address(const void *matchaddr,
@@ -527,8 +526,8 @@ extern struct interface *if_lookup_by_name_all_vrf(const char *ifname);
extern struct interface *if_lookup_by_name_vrf(const char *name, struct vrf *vrf);
extern struct interface *if_lookup_by_name(const char *ifname, vrf_id_t vrf_id);
extern struct interface *if_get_by_name(const char *ifname, vrf_id_t vrf_id);
-extern struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id,
- char *optional_name);
+extern struct interface *if_get_by_ifindex(ifindex_t ifindex, vrf_id_t vrf_id);
+
/* Sets the index and adds to index list */
extern int if_set_index(struct interface *ifp, ifindex_t ifindex);
/* Sets the name and adds to name list */
diff --git a/lib/linklist.c b/lib/linklist.c
index 272e153276..2936c5b502 100644
--- a/lib/linklist.c
+++ b/lib/linklist.c
@@ -38,16 +38,30 @@ static void list_free_internal(struct list *l)
XFREE(MTYPE_LINK_LIST, l);
}
+
/* Allocate new listnode. Internal use only. */
-static struct listnode *listnode_new(void)
+static struct listnode *listnode_new(struct list *list, void *val)
{
- return XCALLOC(MTYPE_LINK_NODE, sizeof(struct listnode));
+ struct listnode *node;
+
+ /* if listnode memory is managed by the app then the val
+ * passed in is the listnode
+ */
+ if (list->flags & LINKLIST_FLAG_NODE_MEM_BY_APP) {
+ node = val;
+ node->prev = node->next = NULL;
+ } else {
+ node = XCALLOC(MTYPE_LINK_NODE, sizeof(struct listnode));
+ node->data = val;
+ }
+ return node;
}
/* Free listnode. */
-static void listnode_free(struct listnode *node)
+static void listnode_free(struct list *list, struct listnode *node)
{
- XFREE(MTYPE_LINK_NODE, node);
+ if (!(list->flags & LINKLIST_FLAG_NODE_MEM_BY_APP))
+ XFREE(MTYPE_LINK_NODE, node);
}
struct listnode *listnode_add(struct list *list, void *val)
@@ -56,10 +70,9 @@ struct listnode *listnode_add(struct list *list, void *val)
assert(val != NULL);
- node = listnode_new();
+ node = listnode_new(list, val);
node->prev = list->tail;
- node->data = val;
if (list->head == NULL)
list->head = node;
@@ -78,10 +91,9 @@ void listnode_add_head(struct list *list, void *val)
assert(val != NULL);
- node = listnode_new();
+ node = listnode_new(list, val);
node->next = list->head;
- node->data = val;
if (list->head == NULL)
list->head = node;
@@ -97,15 +109,22 @@ bool listnode_add_sort_nodup(struct list *list, void *val)
struct listnode *n;
struct listnode *new;
int ret;
+ void *data;
assert(val != NULL);
+ if (list->flags & LINKLIST_FLAG_NODE_MEM_BY_APP) {
+ n = val;
+ data = n->data;
+ } else {
+ data = val;
+ }
+
if (list->cmp) {
for (n = list->head; n; n = n->next) {
- ret = (*list->cmp)(val, n->data);
+ ret = (*list->cmp)(data, n->data);
if (ret < 0) {
- new = listnode_new();
- new->data = val;
+ new = listnode_new(list, val);
new->next = n;
new->prev = n->prev;
@@ -124,8 +143,7 @@ bool listnode_add_sort_nodup(struct list *list, void *val)
}
}
- new = listnode_new();
- new->data = val;
+ new = listnode_new(list, val);
LISTNODE_ATTACH(list, new);
@@ -139,8 +157,8 @@ void listnode_add_sort(struct list *list, void *val)
assert(val != NULL);
- new = listnode_new();
- new->data = val;
+ new = listnode_new(list, val);
+ val = new->data;
if (list->cmp) {
for (n = list->head; n; n = n->next) {
@@ -177,8 +195,7 @@ struct listnode *listnode_add_after(struct list *list, struct listnode *pp,
assert(val != NULL);
- nn = listnode_new();
- nn->data = val;
+ nn = listnode_new(list, val);
if (pp == NULL) {
if (list->head)
@@ -212,8 +229,7 @@ struct listnode *listnode_add_before(struct list *list, struct listnode *pp,
assert(val != NULL);
- nn = listnode_new();
- nn->data = val;
+ nn = listnode_new(list, val);
if (pp == NULL) {
if (list->tail)
@@ -276,7 +292,7 @@ void list_delete_all_node(struct list *list)
next = node->next;
if (*list->del)
(*list->del)(node->data);
- listnode_free(node);
+ listnode_free(list, node);
}
list->head = list->tail = NULL;
list->count = 0;
@@ -336,7 +352,7 @@ void list_delete_node(struct list *list, struct listnode *node)
else
list->tail = node->prev;
list->count--;
- listnode_free(node);
+ listnode_free(list, node);
}
void list_sort(struct list *list, int (*cmp)(const void **, const void **))
diff --git a/lib/linklist.h b/lib/linklist.h
index 00cb9f8714..94a1a1604a 100644
--- a/lib/linklist.h
+++ b/lib/linklist.h
@@ -43,6 +43,12 @@ struct list {
/* invariant: count is the number of listnodes in the list */
unsigned int count;
+ uint8_t flags;
+/* Indicates that listnode memory is managed by the application and
+ * doesn't need to be freed by this library via listnode_delete etc.
+ */
+#define LINKLIST_FLAG_NODE_MEM_BY_APP (1 << 0)
+
/*
* Returns -1 if val1 < val2, 0 if equal?, 1 if val1 > val2.
* Used as definition of sorted for listnode_add_sort
@@ -60,10 +66,14 @@ struct list {
#define listhead(X) ((X) ? ((X)->head) : NULL)
#define listhead_unchecked(X) ((X)->head)
#define listtail(X) ((X) ? ((X)->tail) : NULL)
+#define listtail_unchecked(X) ((X)->tail)
#define listcount(X) ((X)->count)
#define list_isempty(X) ((X)->head == NULL && (X)->tail == NULL)
/* return X->data only if X and X->data are not NULL */
#define listgetdata(X) (assert(X), assert((X)->data != NULL), (X)->data)
+/* App is going to manage listnode memory */
+#define listset_app_node_mem(X) ((X)->flags |= LINKLIST_FLAG_NODE_MEM_BY_APP)
+#define listnode_init(X, val) ((X)->data = (val))
/*
* Create a new linked list.
@@ -95,7 +105,7 @@ extern struct listnode *listnode_add(struct list *list, void *data);
* list to operate on
*
* data
- * element to add
+ * If MEM_BY_APP is set this is listnode. Otherwise it is element to add.
*/
extern void listnode_add_head(struct list *list, void *data);
@@ -112,7 +122,7 @@ extern void listnode_add_head(struct list *list, void *data);
* list to operate on
*
* val
- * element to add
+ * If MEM_BY_APP is set this is listnode. Otherwise it is element to add.
*/
extern void listnode_add_sort(struct list *list, void *val);
@@ -128,7 +138,7 @@ extern void listnode_add_sort(struct list *list, void *val);
* listnode to insert after
*
* data
- * data to insert
+ * If MEM_BY_APP is set this is listnode. Otherwise it is element to add.
*
* Returns:
* pointer to newly created listnode that contains the inserted data
@@ -148,7 +158,7 @@ extern struct listnode *listnode_add_after(struct list *list,
* listnode to insert before
*
* data
- * data to insert
+ * If MEM_BY_APP is set this is listnode. Otherwise it is element to add.
*
* Returns:
* pointer to newly created listnode that contains the inserted data
@@ -313,7 +323,7 @@ extern void list_filter_out_nodes(struct list *list, bool (*cond)(void *data));
* list to operate on
*
* val
- * element to add
+ * If MEM_BY_APP is set this is listnode. Otherwise it is element to add.
*/
extern bool listnode_add_sort_nodup(struct list *list, void *val);
diff --git a/lib/log.c b/lib/log.c
index 2baa09b6fd..bbce4eb793 100644
--- a/lib/log.c
+++ b/lib/log.c
@@ -398,6 +398,10 @@ static const struct zebra_desc_table command_types[] = {
DESC_ENTRY(ZEBRA_ADVERTISE_SUBNET),
DESC_ENTRY(ZEBRA_LOCAL_ES_ADD),
DESC_ENTRY(ZEBRA_LOCAL_ES_DEL),
+ DESC_ENTRY(ZEBRA_REMOTE_ES_VTEP_ADD),
+ DESC_ENTRY(ZEBRA_REMOTE_ES_VTEP_DEL),
+ DESC_ENTRY(ZEBRA_LOCAL_ES_EVI_ADD),
+ DESC_ENTRY(ZEBRA_LOCAL_ES_EVI_DEL),
DESC_ENTRY(ZEBRA_VNI_ADD),
DESC_ENTRY(ZEBRA_VNI_DEL),
DESC_ENTRY(ZEBRA_L3VNI_ADD),
diff --git a/lib/netns_linux.c b/lib/netns_linux.c
index e1c0159fc5..98f359401e 100644
--- a/lib/netns_linux.c
+++ b/lib/netns_linux.c
@@ -379,20 +379,12 @@ struct ns *ns_lookup(ns_id_t ns_id)
return ns_lookup_internal(ns_id);
}
-void ns_walk_func(int (*func)(struct ns *,
- void *param_in,
- void **param_out),
- void *param_in,
- void **param_out)
+void ns_walk_func(int (*func)(struct ns *))
{
struct ns *ns = NULL;
- int ret;
- RB_FOREACH (ns, ns_head, &ns_tree) {
- ret = func(ns, param_in, param_out);
- if (ret == NS_WALK_STOP)
- return;
- }
+ RB_FOREACH (ns, ns_head, &ns_tree)
+ func(ns);
}
const char *ns_get_name(struct ns *ns)
@@ -592,33 +584,9 @@ int ns_socket(int domain, int type, int protocol, ns_id_t ns_id)
return ret;
}
-/* if relative link_nsid matches default netns,
- * then return default absolute netns value
- * otherwise, return NS_UNKNOWN
- */
-ns_id_t ns_id_get_absolute(ns_id_t ns_id_reference, ns_id_t link_nsid)
-{
- struct ns *ns;
-
- ns = ns_lookup(ns_id_reference);
- if (!ns)
- return NS_UNKNOWN;
- if (ns->relative_default_ns != link_nsid)
- return NS_UNKNOWN;
- ns = ns_get_default();
- assert(ns);
- return ns->ns_id;
-}
-
ns_id_t ns_get_default_id(void)
{
if (default_ns)
return default_ns->ns_id;
return NS_DEFAULT_INTERNAL;
}
-
-struct ns *ns_get_default(void)
-{
- return default_ns;
-}
-
diff --git a/lib/ns.h b/lib/ns.h
index 286ff5b295..20e0a38e3b 100644
--- a/lib/ns.h
+++ b/lib/ns.h
@@ -53,11 +53,6 @@ struct ns {
/* Identifier, mapped on the NSID value */
ns_id_t internal_ns_id;
- /* Identifier, value of NSID of default netns,
- * relative value in that local netns
- */
- ns_id_t relative_default_ns;
-
/* Name */
char *name;
@@ -125,14 +120,7 @@ int ns_socket(int domain, int type, int protocol, ns_id_t ns_id);
extern char *ns_netns_pathname(struct vty *vty, const char *name);
/* Parse and execute a function on all the NETNS */
-#define NS_WALK_CONTINUE 0
-#define NS_WALK_STOP 1
-
-extern void ns_walk_func(int (*func)(struct ns *,
- void *,
- void **),
- void *param_in,
- void **param_out);
+extern void ns_walk_func(int (*func)(struct ns *));
/* API to get the NETNS name, from the ns pointer */
extern const char *ns_get_name(struct ns *ns);
@@ -186,9 +174,7 @@ extern struct ns *ns_lookup_name(const char *name);
*/
extern int ns_enable(struct ns *ns, void (*func)(ns_id_t, void *));
extern struct ns *ns_get_created(struct ns *ns, char *name, ns_id_t ns_id);
-extern ns_id_t ns_id_get_absolute(ns_id_t ns_id_reference, ns_id_t link_nsid);
extern void ns_disable(struct ns *ns);
-extern struct ns *ns_get_default(void);
#ifdef __cplusplus
}
diff --git a/lib/prefix.h b/lib/prefix.h
index 53e9dc3cb3..400f07386f 100644
--- a/lib/prefix.h
+++ b/lib/prefix.h
@@ -43,9 +43,36 @@ extern "C" {
#define ETH_ALEN 6
#endif
+/* EVPN route types. */
+typedef enum {
+ BGP_EVPN_AD_ROUTE = 1, /* Ethernet Auto-Discovery (A-D) route */
+ BGP_EVPN_MAC_IP_ROUTE, /* MAC/IP Advertisement route */
+ BGP_EVPN_IMET_ROUTE, /* Inclusive Multicast Ethernet Tag route */
+ BGP_EVPN_ES_ROUTE, /* Ethernet Segment route */
+ BGP_EVPN_IP_PREFIX_ROUTE, /* IP Prefix route */
+} bgp_evpn_route_type;
+
+/* value of first byte of ESI */
+#define ESI_TYPE_ARBITRARY 0 /* */
+#define ESI_TYPE_LACP 1 /* <> */
+#define ESI_TYPE_BRIDGE 2 /* <Root bridge Mac-6B>:<Root Br Priority-2B>:00 */
+#define ESI_TYPE_MAC 3 /* <Syst Mac Add-6B>:<Local Discriminator Value-3B> */
+#define ESI_TYPE_ROUTER 4 /* <RouterId-4B>:<Local Discriminator Value-4B> */
+#define ESI_TYPE_AS 5 /* <AS-4B>:<Local Discriminator Value-4B> */
+
+#define MAX_ESI {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
+
+
+#define EVPN_ETH_TAG_BYTES 4
#define ESI_BYTES 10
#define ESI_STR_LEN (3 * ESI_BYTES)
+/* Maximum number of VTEPs per-ES -
+ * XXX - temporary limit for allocating strings etc.
+ */
+#define ES_VTEP_MAX_CNT 10
+#define ES_VTEP_LIST_STR_SZ (ES_VTEP_MAX_CNT * 16)
+
#define ETHER_ADDR_STRLEN (3*ETH_ALEN)
/*
* there isn't a portable ethernet address type. We define our
@@ -64,12 +91,13 @@ struct ethaddr {
#define PREFIX_LEN_ROUTE_TYPE_5_IPV6 (30*8)
typedef struct esi_t_ {
- uint8_t val[10];
+ uint8_t val[ESI_BYTES];
} esi_t;
struct evpn_ead_addr {
esi_t esi;
uint32_t eth_tag;
+ struct ipaddr ip;
};
struct evpn_macip_addr {
@@ -217,39 +245,45 @@ struct prefix_evpn {
static inline int is_evpn_prefix_ipaddr_none(const struct prefix_evpn *evp)
{
- if (evp->prefix.route_type == 2)
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE)
+ return IS_IPADDR_NONE(&(evp)->prefix.ead_addr.ip);
+ if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)
return IS_IPADDR_NONE(&(evp)->prefix.macip_addr.ip);
- if (evp->prefix.route_type == 3)
+ if (evp->prefix.route_type == BGP_EVPN_IMET_ROUTE)
return IS_IPADDR_NONE(&(evp)->prefix.imet_addr.ip);
- if (evp->prefix.route_type == 4)
+ if (evp->prefix.route_type == BGP_EVPN_ES_ROUTE)
return IS_IPADDR_NONE(&(evp)->prefix.es_addr.ip);
- if (evp->prefix.route_type == 5)
+ if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE)
return IS_IPADDR_NONE(&(evp)->prefix.prefix_addr.ip);
return 0;
}
static inline int is_evpn_prefix_ipaddr_v4(const struct prefix_evpn *evp)
{
- if (evp->prefix.route_type == 2)
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE)
+ return IS_IPADDR_V4(&(evp)->prefix.ead_addr.ip);
+ if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)
return IS_IPADDR_V4(&(evp)->prefix.macip_addr.ip);
- if (evp->prefix.route_type == 3)
+ if (evp->prefix.route_type == BGP_EVPN_IMET_ROUTE)
return IS_IPADDR_V4(&(evp)->prefix.imet_addr.ip);
- if (evp->prefix.route_type == 4)
+ if (evp->prefix.route_type == BGP_EVPN_ES_ROUTE)
return IS_IPADDR_V4(&(evp)->prefix.es_addr.ip);
- if (evp->prefix.route_type == 5)
+ if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE)
return IS_IPADDR_V4(&(evp)->prefix.prefix_addr.ip);
return 0;
}
static inline int is_evpn_prefix_ipaddr_v6(const struct prefix_evpn *evp)
{
- if (evp->prefix.route_type == 2)
+ if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE)
+ return IS_IPADDR_V6(&(evp)->prefix.ead_addr.ip);
+ if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)
return IS_IPADDR_V6(&(evp)->prefix.macip_addr.ip);
- if (evp->prefix.route_type == 3)
+ if (evp->prefix.route_type == BGP_EVPN_IMET_ROUTE)
return IS_IPADDR_V6(&(evp)->prefix.imet_addr.ip);
- if (evp->prefix.route_type == 4)
+ if (evp->prefix.route_type == BGP_EVPN_ES_ROUTE)
return IS_IPADDR_V6(&(evp)->prefix.es_addr.ip);
- if (evp->prefix.route_type == 5)
+ if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE)
return IS_IPADDR_V6(&(evp)->prefix.prefix_addr.ip);
return 0;
}
diff --git a/lib/thread.c b/lib/thread.c
index 5c7c104842..1df4eee25c 100644
--- a/lib/thread.c
+++ b/lib/thread.c
@@ -634,6 +634,36 @@ struct timeval thread_timer_remain(struct thread *thread)
return remain;
}
+static int time_hhmmss(char *buf, int buf_size, long sec)
+{
+ long hh;
+ long mm;
+ int wr;
+
+ zassert(buf_size >= 8);
+
+ hh = sec / 3600;
+ sec %= 3600;
+ mm = sec / 60;
+ sec %= 60;
+
+ wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
+
+ return wr != 8;
+}
+
+char *thread_timer_to_hhmmss(char *buf, int buf_size,
+ struct thread *t_timer)
+{
+ if (t_timer) {
+ time_hhmmss(buf, buf_size,
+ thread_timer_remain_second(t_timer));
+ } else {
+ snprintf(buf, buf_size, "--:--:--");
+ }
+ return buf;
+}
+
/* Get new thread. */
static struct thread *thread_get(struct thread_master *m, uint8_t type,
int (*func)(struct thread *), void *arg,
diff --git a/lib/thread.h b/lib/thread.h
index 412a4d93bf..c22b2105cd 100644
--- a/lib/thread.h
+++ b/lib/thread.h
@@ -140,6 +140,8 @@ struct cpu_thread_history {
/* Thread yield time. */
#define THREAD_YIELD_TIME_SLOT 10 * 1000L /* 10ms */
+#define THREAD_TIMER_STRLEN 12
+
/* Macros. */
#define THREAD_ARG(X) ((X)->arg)
#define THREAD_FD(X) ((X)->u.fd)
@@ -228,6 +230,8 @@ extern unsigned long thread_consumed_time(RUSAGE_T *after, RUSAGE_T *before,
/* only for use in logging functions! */
extern pthread_key_t thread_current;
+extern char *thread_timer_to_hhmmss(char *buf, int buf_size,
+ struct thread *t_timer);
#ifdef __cplusplus
}
diff --git a/lib/vrf.c b/lib/vrf.c
index 20e08b03d8..2a3ce2a315 100644
--- a/lib/vrf.c
+++ b/lib/vrf.c
@@ -653,8 +653,7 @@ int vrf_handler_create(struct vty *vty, const char *vrfname,
}
int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname,
- ns_id_t ns_id, ns_id_t internal_ns_id,
- ns_id_t rel_def_ns_id)
+ ns_id_t ns_id, ns_id_t internal_ns_id)
{
struct ns *ns = NULL;
@@ -701,7 +700,6 @@ int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname,
}
ns = ns_get_created(ns, pathname, ns_id);
ns->internal_ns_id = internal_ns_id;
- ns->relative_default_ns = rel_def_ns_id;
ns->vrf_ctxt = (void *)vrf;
vrf->ns_ctxt = (void *)ns;
/* update VRF netns NAME */
@@ -797,9 +795,7 @@ DEFUN_NOSH (vrf_netns,
frr_with_privs(vrf_daemon_privs) {
ret = vrf_netns_handler_create(vty, vrf, pathname,
- NS_UNKNOWN,
- NS_UNKNOWN,
- NS_UNKNOWN);
+ NS_UNKNOWN, NS_UNKNOWN);
}
return ret;
}
diff --git a/lib/vrf.h b/lib/vrf.h
index a8514d74ed..83ed16b48e 100644
--- a/lib/vrf.h
+++ b/lib/vrf.h
@@ -315,7 +315,7 @@ extern int vrf_handler_create(struct vty *vty, const char *name,
*/
extern int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf,
char *pathname, ns_id_t ext_ns_id,
- ns_id_t ns_id, ns_id_t rel_def_ns_id);
+ ns_id_t ns_id);
/* used internally to enable or disable VRF.
* Notify a change in the VRF ID of the VRF
diff --git a/lib/zclient.c b/lib/zclient.c
index 92ff2537d5..eb62350f4f 100644
--- a/lib/zclient.c
+++ b/lib/zclient.c
@@ -3519,6 +3519,16 @@ static int zclient_read(struct thread *thread)
(*zclient->local_es_del)(command, zclient, length,
vrf_id);
break;
+ case ZEBRA_LOCAL_ES_EVI_ADD:
+ if (zclient->local_es_evi_add)
+ (*zclient->local_es_evi_add)(command, zclient, length,
+ vrf_id);
+ break;
+ case ZEBRA_LOCAL_ES_EVI_DEL:
+ if (zclient->local_es_evi_del)
+ (*zclient->local_es_evi_del)(command, zclient, length,
+ vrf_id);
+ break;
case ZEBRA_VNI_ADD:
if (zclient->local_vni_add)
(*zclient->local_vni_add)(command, zclient, length,
diff --git a/lib/zclient.h b/lib/zclient.h
index 250824e612..da06239d01 100644
--- a/lib/zclient.h
+++ b/lib/zclient.h
@@ -157,6 +157,10 @@ typedef enum {
ZEBRA_ADVERTISE_ALL_VNI,
ZEBRA_LOCAL_ES_ADD,
ZEBRA_LOCAL_ES_DEL,
+ ZEBRA_REMOTE_ES_VTEP_ADD,
+ ZEBRA_REMOTE_ES_VTEP_DEL,
+ ZEBRA_LOCAL_ES_EVI_ADD,
+ ZEBRA_LOCAL_ES_EVI_DEL,
ZEBRA_VNI_ADD,
ZEBRA_VNI_DEL,
ZEBRA_L3VNI_ADD,
@@ -321,6 +325,8 @@ struct zclient {
int (*fec_update)(int, struct zclient *, uint16_t);
int (*local_es_add)(ZAPI_CALLBACK_ARGS);
int (*local_es_del)(ZAPI_CALLBACK_ARGS);
+ int (*local_es_evi_add)(ZAPI_CALLBACK_ARGS);
+ int (*local_es_evi_del)(ZAPI_CALLBACK_ARGS);
int (*local_vni_add)(ZAPI_CALLBACK_ARGS);
int (*local_vni_del)(ZAPI_CALLBACK_ARGS);
int (*local_l3vni_add)(ZAPI_CALLBACK_ARGS);
@@ -601,6 +607,11 @@ zapi_rule_notify_owner2str(enum zapi_rule_notify_owner note)
#define ZEBRA_MACIP_TYPE_ROUTER_FLAG 0x04 /* Router Flag - proxy NA */
#define ZEBRA_MACIP_TYPE_OVERRIDE_FLAG 0x08 /* Override Flag */
#define ZEBRA_MACIP_TYPE_SVI_IP 0x10 /* SVI MAC-IP */
+#define ZEBRA_MACIP_TYPE_PROXY_ADVERT 0x20 /* Not locally active */
+#define ZEBRA_MACIP_TYPE_SYNC_PATH 0x40 /* sync path */
+/* XXX - flags is an u8; that needs to be changed to u32 if you need
+ * to allocate past 0x80
+ */
enum zebra_neigh_state { ZEBRA_NEIGH_INACTIVE = 0, ZEBRA_NEIGH_ACTIVE = 1 };
diff --git a/sharpd/sharp_vty.c b/sharpd/sharp_vty.c
index 1d2b87b9ba..6a120c8eff 100644
--- a/sharpd/sharp_vty.c
+++ b/sharpd/sharp_vty.c
@@ -394,27 +394,31 @@ DEFUN_NOSH (show_debugging_sharpd,
return CMD_SUCCESS;
}
-DEFPY(sharp_lsp_prefix_v4, sharp_lsp_prefix_v4_cmd,
- "sharp lsp (0-100000)$inlabel\
+DEFPY (sharp_lsp_prefix_v4, sharp_lsp_prefix_v4_cmd,
+ "sharp lsp [update]$update (0-100000)$inlabel\
nexthop-group NHGNAME$nhgname\
[prefix A.B.C.D/M$pfx\
" FRR_IP_REDIST_STR_ZEBRA "$type_str [instance (0-255)$instance]]",
- "Sharp Routing Protocol\n"
- "Add an LSP\n"
- "The ingress label to use\n"
- "Use nexthops from a nexthop-group\n"
- "The nexthop-group name\n"
- "Label a prefix\n"
- "The v4 prefix to label\n"
- FRR_IP_REDIST_HELP_STR_ZEBRA
- "Instance to use\n"
- "Instance\n")
+ "Sharp Routing Protocol\n"
+ "Add an LSP\n"
+ "Update an LSP\n"
+ "The ingress label to use\n"
+ "Use nexthops from a nexthop-group\n"
+ "The nexthop-group name\n"
+ "Label a prefix\n"
+ "The v4 prefix to label\n"
+ FRR_IP_REDIST_HELP_STR_ZEBRA
+ "Instance to use\n"
+ "Instance\n")
{
struct nexthop_group_cmd *nhgc = NULL;
struct nexthop_group_cmd *backup_nhgc = NULL;
struct nexthop_group *backup_nhg = NULL;
struct prefix p = {};
int type = 0;
+ bool update_p;
+
+ update_p = (update != NULL);
/* We're offered a v4 prefix */
if (pfx->family > 0 && type_str) {
@@ -458,7 +462,8 @@ DEFPY(sharp_lsp_prefix_v4, sharp_lsp_prefix_v4_cmd,
backup_nhg = &(backup_nhgc->nhg);
}
- if (sharp_install_lsps_helper(true, pfx->family > 0 ? &p : NULL,
+ if (sharp_install_lsps_helper(true /*install*/, update_p,
+ pfx->family > 0 ? &p : NULL,
type, instance, inlabel,
&(nhgc->nhg), backup_nhg) == 0)
return CMD_SUCCESS;
@@ -523,7 +528,8 @@ DEFPY(sharp_remove_lsp_prefix_v4, sharp_remove_lsp_prefix_v4_cmd,
nhg = &(nhgc->nhg);
}
- if (sharp_install_lsps_helper(false, pfx->family > 0 ? &p : NULL,
+ if (sharp_install_lsps_helper(false /*!install*/, false,
+ pfx->family > 0 ? &p : NULL,
type, instance, inlabel, nhg, NULL) == 0)
return CMD_SUCCESS;
else {
diff --git a/sharpd/sharp_zebra.c b/sharpd/sharp_zebra.c
index 74e44014a9..e0f16d71f5 100644
--- a/sharpd/sharp_zebra.c
+++ b/sharpd/sharp_zebra.c
@@ -114,15 +114,16 @@ static int sharp_ifp_down(struct interface *ifp)
return 0;
}
-int sharp_install_lsps_helper(bool install_p, const struct prefix *p,
- uint8_t type, int instance, uint32_t in_label,
+int sharp_install_lsps_helper(bool install_p, bool update_p,
+ const struct prefix *p, uint8_t type,
+ int instance, uint32_t in_label,
const struct nexthop_group *nhg,
const struct nexthop_group *backup_nhg)
{
struct zapi_labels zl = {};
struct zapi_nexthop *znh;
const struct nexthop *nh;
- int i, ret;
+ int i, cmd, ret;
zl.type = ZEBRA_LSP_SHARP;
zl.local_label = in_label;
@@ -200,12 +201,17 @@ int sharp_install_lsps_helper(bool install_p, const struct prefix *p,
zl.backup_nexthop_num = i;
}
- if (install_p)
- ret = zebra_send_mpls_labels(zclient, ZEBRA_MPLS_LABELS_ADD,
- &zl);
- else
- ret = zebra_send_mpls_labels(zclient, ZEBRA_MPLS_LABELS_DELETE,
- &zl);
+
+ if (install_p) {
+ if (update_p)
+ cmd = ZEBRA_MPLS_LABELS_REPLACE;
+ else
+ cmd = ZEBRA_MPLS_LABELS_ADD;
+ } else {
+ cmd = ZEBRA_MPLS_LABELS_DELETE;
+ }
+
+ ret = zebra_send_mpls_labels(zclient, cmd, &zl);
return ret;
}
diff --git a/sharpd/sharp_zebra.h b/sharpd/sharp_zebra.h
index cb2f38a6ab..e40585aa6a 100644
--- a/sharpd/sharp_zebra.h
+++ b/sharpd/sharp_zebra.h
@@ -44,8 +44,9 @@ extern void sharp_install_routes_helper(struct prefix *p, vrf_id_t vrf_id,
extern void sharp_remove_routes_helper(struct prefix *p, vrf_id_t vrf_id,
uint8_t instance, uint32_t routes);
-int sharp_install_lsps_helper(bool install_p, const struct prefix *p,
- uint8_t type, int instance, uint32_t in_label,
+int sharp_install_lsps_helper(bool install_p, bool update_p,
+ const struct prefix *p, uint8_t type,
+ int instance, uint32_t in_label,
const struct nexthop_group *nhg,
const struct nexthop_group *backup_nhg);
diff --git a/tests/topotests/bgp-evpn-mh/evpn-mh-topo-tests.pdf b/tests/topotests/bgp-evpn-mh/evpn-mh-topo-tests.pdf
new file mode 100644
index 0000000000..8858e21496
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/evpn-mh-topo-tests.pdf
Binary files differ
diff --git a/tests/topotests/bgp-evpn-mh/hostd11/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd11/evpn.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd11/evpn.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd11/pim.conf b/tests/topotests/bgp-evpn-mh/hostd11/pim.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd11/pim.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd11/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd11/zebra.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd11/zebra.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd12/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd12/evpn.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd12/evpn.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd12/pim.conf b/tests/topotests/bgp-evpn-mh/hostd12/pim.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd12/pim.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd12/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd12/zebra.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd12/zebra.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd21/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd21/evpn.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd21/evpn.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd21/pim.conf b/tests/topotests/bgp-evpn-mh/hostd21/pim.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd21/pim.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd21/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd21/zebra.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd21/zebra.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd22/evpn.conf b/tests/topotests/bgp-evpn-mh/hostd22/evpn.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd22/evpn.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd22/pim.conf b/tests/topotests/bgp-evpn-mh/hostd22/pim.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd22/pim.conf
diff --git a/tests/topotests/bgp-evpn-mh/hostd22/zebra.conf b/tests/topotests/bgp-evpn-mh/hostd22/zebra.conf
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/hostd22/zebra.conf
diff --git a/tests/topotests/bgp-evpn-mh/spine1/evpn.conf b/tests/topotests/bgp-evpn-mh/spine1/evpn.conf
new file mode 100644
index 0000000000..2e26f60f44
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/spine1/evpn.conf
@@ -0,0 +1,17 @@
+frr defaults datacenter
+!
+router bgp 65001
+ bgp router-id 192.168.100.13
+ no bgp ebgp-requires-policy
+ neighbor 192.168.1.2 remote-as external
+ neighbor 192.168.2.2 remote-as external
+ neighbor 192.168.3.2 remote-as external
+ neighbor 192.168.4.2 remote-as external
+ redistribute connected
+ address-family l2vpn evpn
+ neighbor 192.168.1.2 activate
+ neighbor 192.168.2.2 activate
+ neighbor 192.168.3.2 activate
+ neighbor 192.168.4.2 activate
+ exit-address-family
+!
diff --git a/tests/topotests/bgp-evpn-mh/spine1/pim.conf b/tests/topotests/bgp-evpn-mh/spine1/pim.conf
new file mode 100644
index 0000000000..68e686e8c7
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/spine1/pim.conf
@@ -0,0 +1,18 @@
+ip pim rp 192.168.100.13
+ip pim spt-switchover infinity-and-beyond
+!
+int lo
+ ip pim
+!
+int spine1-eth0
+ ip pim
+!
+int spine1-eth1
+ ip pim
+!
+int spine1-eth2
+ ip pim
+!
+int spine1-eth3
+ ip pim
+!
diff --git a/tests/topotests/bgp-evpn-mh/spine1/zebra.conf b/tests/topotests/bgp-evpn-mh/spine1/zebra.conf
new file mode 100644
index 0000000000..80e9e5a263
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/spine1/zebra.conf
@@ -0,0 +1,15 @@
+int spine1-eth0
+ ip addr 192.168.1.1/24
+!
+int spine1-eth1
+ ip addr 192.168.2.1/24
+!
+int spine1-eth2
+ ip addr 192.168.3.1/24
+!
+int spine1-eth3
+ ip addr 192.168.4.1/24
+!
+int lo
+ ip addr 192.168.100.13/32
+ ip addr 192.168.100.100/32
diff --git a/tests/topotests/bgp-evpn-mh/spine2/evpn.conf b/tests/topotests/bgp-evpn-mh/spine2/evpn.conf
new file mode 100644
index 0000000000..ec2e789276
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/spine2/evpn.conf
@@ -0,0 +1,17 @@
+frr defaults datacenter
+!
+router bgp 65001
+ bgp router-id 192.168.100.14
+ no bgp ebgp-requires-policy
+ neighbor 192.168.5.2 remote-as external
+ neighbor 192.168.6.2 remote-as external
+ neighbor 192.168.7.2 remote-as external
+ neighbor 192.168.8.2 remote-as external
+ redistribute connected
+ address-family l2vpn evpn
+ neighbor 192.168.5.2 activate
+ neighbor 192.168.6.2 activate
+ neighbor 192.168.7.2 activate
+ neighbor 192.168.8.2 activate
+ exit-address-family
+!
diff --git a/tests/topotests/bgp-evpn-mh/spine2/pim.conf b/tests/topotests/bgp-evpn-mh/spine2/pim.conf
new file mode 100644
index 0000000000..c1566240e6
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/spine2/pim.conf
@@ -0,0 +1,18 @@
+ip pim rp 192.168.100.13
+ip pim spt-switchover infinity-and-beyond
+!
+int lo
+ ip pim
+!
+int spine2-eth0
+ ip pim
+!
+int spine2-eth1
+ ip pim
+!
+int spine2-eth2
+ ip pim
+!
+int spine2-eth3
+ ip pim
+!
diff --git a/tests/topotests/bgp-evpn-mh/spine2/zebra.conf b/tests/topotests/bgp-evpn-mh/spine2/zebra.conf
new file mode 100644
index 0000000000..1cd1df8c81
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/spine2/zebra.conf
@@ -0,0 +1,15 @@
+int spine2-eth0
+ ip addr 192.168.5.1/24
+!
+int spine2-eth1
+ ip addr 192.168.6.1/24
+!
+int spine2-eth2
+ ip addr 192.168.7.1/24
+!
+int spine2-eth3
+ ip addr 192.168.8.1/24
+!
+int lo
+ ip addr 192.168.100.14/32
+ ip addr 192.168.100.100/32
diff --git a/tests/topotests/bgp-evpn-mh/test_evpn_mh.py b/tests/topotests/bgp-evpn-mh/test_evpn_mh.py
new file mode 100755
index 0000000000..fe28f79bd4
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/test_evpn_mh.py
@@ -0,0 +1,651 @@
+#!/usr/bin/env python
+
+#
+# test_evpn_mh.py
+#
+# Copyright (c) 2020 by
+# Cumulus Networks, Inc.
+# Anuradha Karuppiah
+#
+# Permission to use, copy, modify, and/or distribute this software
+# for any purpose with or without fee is hereby granted, provided
+# that the above copyright notice and this permission notice appear
+# in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND NETDEF DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NETDEF BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+#
+
+"""
+test_evpn_mh.py: Testing EVPN multihoming
+
+"""
+
+import os
+import re
+import sys
+import pytest
+import json
+import platform
+from functools import partial
+
+# Save the Current Working Directory to find configuration files.
+CWD = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(CWD, "../"))
+
+# pylint: disable=C0413
+# Import topogen and topotest helpers
+from lib import topotest
+from lib.topogen import Topogen, TopoRouter, get_topogen
+from lib.topolog import logger
+
+# Required to instantiate the topology builder class.
+from mininet.topo import Topo
+
+#####################################################
+##
+## Network Topology Definition
+##
+## See topology picture at evpn-mh-topo-tests.pdf
+#####################################################
+
+
+class NetworkTopo(Topo):
+ '''
+ EVPN Multihoming Topology -
+ 1. Two level CLOS
+ 2. Two spine switches - spine1, spine2
+ 3. Two racks with Top-of-Rack switches per rack - tormx1, tormx2
+ 4. Two dual attached hosts per-rack - hostdx1, hostdx2
+ '''
+
+ def build(self, **_opts):
+ "Build function"
+
+ tgen = get_topogen(self)
+
+ tgen.add_router("spine1")
+ tgen.add_router("spine2")
+ tgen.add_router("torm11")
+ tgen.add_router("torm12")
+ tgen.add_router("torm21")
+ tgen.add_router("torm22")
+ tgen.add_router("hostd11")
+ tgen.add_router("hostd12")
+ tgen.add_router("hostd21")
+ tgen.add_router("hostd22")
+
+ # On main router
+ # First switch is for a dummy interface (for local network)
+
+
+ ##################### spine1 ########################
+ # spine1-eth0 is connected to torm11-eth0
+ switch = tgen.add_switch("sw1")
+ switch.add_link(tgen.gears["spine1"])
+ switch.add_link(tgen.gears["torm11"])
+
+ # spine1-eth1 is connected to torm12-eth0
+ switch = tgen.add_switch("sw2")
+ switch.add_link(tgen.gears["spine1"])
+ switch.add_link(tgen.gears["torm12"])
+
+ # spine1-eth2 is connected to torm21-eth0
+ switch = tgen.add_switch("sw3")
+ switch.add_link(tgen.gears["spine1"])
+ switch.add_link(tgen.gears["torm21"])
+
+ # spine1-eth3 is connected to torm22-eth0
+ switch = tgen.add_switch("sw4")
+ switch.add_link(tgen.gears["spine1"])
+ switch.add_link(tgen.gears["torm22"])
+
+ ##################### spine2 ########################
+ # spine2-eth0 is connected to torm11-eth1
+ switch = tgen.add_switch("sw5")
+ switch.add_link(tgen.gears["spine2"])
+ switch.add_link(tgen.gears["torm11"])
+
+ # spine2-eth1 is connected to torm12-eth1
+ switch = tgen.add_switch("sw6")
+ switch.add_link(tgen.gears["spine2"])
+ switch.add_link(tgen.gears["torm12"])
+
+ # spine2-eth2 is connected to torm21-eth1
+ switch = tgen.add_switch("sw7")
+ switch.add_link(tgen.gears["spine2"])
+ switch.add_link(tgen.gears["torm21"])
+
+ # spine2-eth3 is connected to torm22-eth1
+ switch = tgen.add_switch("sw8")
+ switch.add_link(tgen.gears["spine2"])
+ switch.add_link(tgen.gears["torm22"])
+
+ ##################### torm11 ########################
+ # torm11-eth2 is connected to hostd11-eth0
+ switch = tgen.add_switch("sw9")
+ switch.add_link(tgen.gears["torm11"])
+ switch.add_link(tgen.gears["hostd11"])
+
+ # torm11-eth3 is connected to hostd12-eth0
+ switch = tgen.add_switch("sw10")
+ switch.add_link(tgen.gears["torm11"])
+ switch.add_link(tgen.gears["hostd12"])
+
+ ##################### torm12 ########################
+ # torm12-eth2 is connected to hostd11-eth1
+ switch = tgen.add_switch("sw11")
+ switch.add_link(tgen.gears["torm12"])
+ switch.add_link(tgen.gears["hostd11"])
+
+ # torm12-eth3 is connected to hostd12-eth1
+ switch = tgen.add_switch("sw12")
+ switch.add_link(tgen.gears["torm12"])
+ switch.add_link(tgen.gears["hostd12"])
+
+ ##################### torm21 ########################
+ # torm21-eth2 is connected to hostd21-eth0
+ switch = tgen.add_switch("sw13")
+ switch.add_link(tgen.gears["torm21"])
+ switch.add_link(tgen.gears["hostd21"])
+
+ # torm21-eth3 is connected to hostd22-eth0
+ switch = tgen.add_switch("sw14")
+ switch.add_link(tgen.gears["torm21"])
+ switch.add_link(tgen.gears["hostd22"])
+
+ ##################### torm22 ########################
+ # torm22-eth2 is connected to hostd21-eth1
+ switch = tgen.add_switch("sw15")
+ switch.add_link(tgen.gears["torm22"])
+ switch.add_link(tgen.gears["hostd21"])
+
+ # torm22-eth3 is connected to hostd22-eth1
+ switch = tgen.add_switch("sw16")
+ switch.add_link(tgen.gears["torm22"])
+ switch.add_link(tgen.gears["hostd22"])
+
+
+#####################################################
+##
+## Tests starting
+##
+#####################################################
+
+tor_ips = {"torm11" : "192.168.100.15", \
+ "torm12" : "192.168.100.16", \
+ "torm21" : "192.168.100.17", \
+ "torm22" : "192.168.100.18"}
+
+svi_ips = {"torm11" : "45.0.0.2", \
+ "torm12" : "45.0.0.3", \
+ "torm21" : "45.0.0.4", \
+ "torm22" : "45.0.0.5"}
+
+tor_ips_rack_1 = {"torm11" : "192.168.100.15", \
+ "torm12" : "192.168.100.16"}
+
+tor_ips_rack_2 = {"torm21" : "192.168.100.17", \
+ "torm22" : "192.168.100.18"}
+
+host_es_map = {"hostd11" : "03:44:38:39:ff:ff:01:00:00:01",
+ "hostd12" : "03:44:38:39:ff:ff:01:00:00:02",
+ "hostd21" : "03:44:38:39:ff:ff:02:00:00:01",
+ "hostd22" : "03:44:38:39:ff:ff:02:00:00:02"}
+
+def config_bond(node, bond_name, bond_members, bond_ad_sys_mac, br):
+ '''
+ Used to setup bonds on the TORs and hosts for MH
+ '''
+ node.run("ip link add dev %s type bond mode 802.3ad" % bond_name)
+ node.run("ip link set dev %s type bond lacp_rate 1" % bond_name)
+ node.run("ip link set dev %s type bond miimon 100" % bond_name)
+ node.run("ip link set dev %s type bond xmit_hash_policy layer3+4" % bond_name)
+ node.run("ip link set dev %s type bond min_links 1" % bond_name)
+ node.run("ip link set dev %s type bond ad_actor_system %s" %\
+ (bond_name, bond_ad_sys_mac))
+
+ for bond_member in bond_members:
+ node.run("ip link set dev %s down" % bond_member)
+ node.run("ip link set dev %s master %s" % (bond_member, bond_name))
+ node.run("ip link set dev %s up" % bond_member)
+
+ node.run("ip link set dev %s up" % bond_name)
+
+ # if bridge is specified add the bond as a bridge member
+ if br:
+ node.run(" ip link set dev %s master bridge" % bond_name)
+ node.run("/sbin/bridge link set dev %s priority 8" % bond_name)
+ node.run("/sbin/bridge vlan del vid 1 dev %s" % bond_name)
+ node.run("/sbin/bridge vlan del vid 1 untagged pvid dev %s" % bond_name)
+ node.run("/sbin/bridge vlan add vid 1000 dev %s" % bond_name)
+ node.run("/sbin/bridge vlan add vid 1000 untagged pvid dev %s"\
+ % bond_name)
+
+
+def config_mcast_tunnel_termination_device(node):
+ '''
+ The kernel requires a device to terminate VxLAN multicast tunnels
+ when EVPN-PIM is used for flooded traffic
+ '''
+ node.run("ip link add dev ipmr-lo type dummy")
+ node.run("ip link set dev ipmr-lo mtu 16000")
+ node.run("ip link set dev ipmr-lo mode dormant")
+ node.run("ip link set dev ipmr-lo up")
+
+
+def config_bridge(node):
+ '''
+ Create a VLAN aware bridge
+ '''
+ node.run("ip link add dev bridge type bridge stp_state 0")
+ node.run("ip link set dev bridge type bridge vlan_filtering 1")
+ node.run("ip link set dev bridge mtu 9216")
+ node.run("ip link set dev bridge type bridge ageing_time 1800")
+ node.run("ip link set dev bridge type bridge mcast_snooping 0")
+ node.run("ip link set dev bridge type bridge vlan_stats_enabled 1")
+ node.run("ip link set dev bridge up")
+ node.run("/sbin/bridge vlan add vid 1000 dev bridge")
+
+
+def config_vxlan(node, node_ip):
+ '''
+ Create a VxLAN device for VNI 1000 and add it to the bridge.
+ VLAN-1000 is mapped to VNI-1000.
+ '''
+ node.run("ip link add dev vx-1000 type vxlan id 1000 dstport 4789")
+ node.run("ip link set dev vx-1000 type vxlan nolearning")
+ node.run("ip link set dev vx-1000 type vxlan local %s" % node_ip)
+ node.run("ip link set dev vx-1000 type vxlan ttl 64")
+ node.run("ip link set dev vx-1000 mtu 9152")
+ node.run("ip link set dev vx-1000 type vxlan dev ipmr-lo group 239.1.1.100")
+ node.run("ip link set dev vx-1000 up")
+
+ # bridge attrs
+ node.run("ip link set dev vx-1000 master bridge")
+ node.run("/sbin/bridge link set dev vx-1000 neigh_suppress on")
+ node.run("/sbin/bridge link set dev vx-1000 learning off")
+ node.run("/sbin/bridge link set dev vx-1000 priority 8")
+ node.run("/sbin/bridge vlan del vid 1 dev vx-1000")
+ node.run("/sbin/bridge vlan del vid 1 untagged pvid dev vx-1000")
+ node.run("/sbin/bridge vlan add vid 1000 dev vx-1000")
+ node.run("/sbin/bridge vlan add vid 1000 untagged pvid dev vx-1000")
+
+
+def config_svi(node, svi_pip):
+ '''
+ Create an SVI for VLAN 1000
+ '''
+ node.run("ip link add link bridge name vlan1000 type vlan id 1000 protocol 802.1q")
+ node.run("ip addr add %s/24 dev vlan1000" % svi_pip)
+ node.run("ip link set dev vlan1000 up")
+ node.run("/sbin/sysctl net.ipv4.conf.vlan1000.arp_accept=1")
+ node.run("ip link add link vlan1000 name vlan1000-v0 type macvlan mode private")
+ node.run("/sbin/sysctl net.ipv6.conf.vlan1000-v0.accept_dad=0")
+ node.run("/sbin/sysctl net.ipv6.conf.vlan1000-v0.dad_transmits")
+ node.run("/sbin/sysctl net.ipv6.conf.vlan1000-v0.dad_transmits=0")
+ node.run("ip link set dev vlan1000-v0 address 00:00:5e:00:01:01")
+ node.run("ip link set dev vlan1000-v0 up")
+ # metric 1024 is not working
+ node.run("ip addr add 45.0.0.1/24 dev vlan1000-v0")
+
+
+def config_tor(tor_name, tor, tor_ip, svi_pip):
+ '''
+ Create the bond/vxlan-bridge on the TOR which acts as VTEP and EPN-PE
+ '''
+ # create a device for terminating VxLAN multicast tunnels
+ config_mcast_tunnel_termination_device(tor)
+
+ # create a vlan aware bridge
+ config_bridge(tor)
+
+ # create vxlan device and add it to bridge
+ config_vxlan(tor, tor_ip)
+
+ # create hostbonds and add them to the bridge
+ if "torm1" in tor_name:
+ sys_mac = "44:38:39:ff:ff:01"
+ else:
+ sys_mac = "44:38:39:ff:ff:02"
+ bond_member = tor_name + "-eth2"
+ config_bond(tor, "hostbond1", [bond_member], sys_mac, "bridge")
+
+ bond_member = tor_name + "-eth3"
+ config_bond(tor, "hostbond2", [bond_member], sys_mac, "bridge")
+
+ # create SVI
+ config_svi(tor, svi_pip)
+
+
+def config_tors(tgen, tors):
+ for tor_name in tors:
+ tor = tgen.gears[tor_name]
+ config_tor(tor_name, tor, tor_ips.get(tor_name), svi_ips.get(tor_name))
+
+def compute_host_ip_mac(host_name):
+ host_id = host_name.split("hostd")[1]
+ host_ip = "45.0.0."+ host_id + "/24"
+ host_mac = "00:00:00:00:00:" + host_id
+
+ return host_ip, host_mac
+
+def config_host(host_name, host):
+ '''
+ Create the dual-attached bond on host nodes for MH
+ '''
+ bond_members = []
+ bond_members.append(host_name + "-eth0")
+ bond_members.append(host_name + "-eth1")
+ bond_name = "torbond"
+ config_bond(host, bond_name, bond_members, "00:00:00:00:00:00", None)
+
+ host_ip, host_mac = compute_host_ip_mac(host_name)
+ host.run("ip addr add %s dev %s" % (host_ip, bond_name))
+ host.run("ip link set dev %s address %s" % (bond_name, host_mac))
+
+
+def config_hosts(tgen, hosts):
+ for host_name in hosts:
+ host = tgen.gears[host_name]
+ config_host(host_name, host)
+
+
+def setup_module(module):
+ "Setup topology"
+ tgen = Topogen(NetworkTopo, module.__name__)
+ tgen.start_topology()
+
+ krel = platform.release()
+ if topotest.version_cmp(krel, "4.19") < 0:
+ tgen.errors = "kernel 4.19 needed for multihoming tests"
+ pytest.skip(tgen.errors)
+
+ tors = []
+ tors.append("torm11")
+ tors.append("torm12")
+ tors.append("torm21")
+ tors.append("torm22")
+ config_tors(tgen, tors)
+
+ hosts = []
+ hosts.append("hostd11")
+ hosts.append("hostd12")
+ hosts.append("hostd21")
+ hosts.append("hostd22")
+ config_hosts(tgen, hosts)
+
+ # tgen.mininet_cli()
+ # This is a sample of configuration loading.
+ router_list = tgen.routers()
+ for rname, router in router_list.iteritems():
+ router.load_config(
+ TopoRouter.RD_ZEBRA, os.path.join(CWD, "{}/zebra.conf".format(rname))
+ )
+ router.load_config(
+ TopoRouter.RD_PIM, os.path.join(CWD, "{}/pim.conf".format(rname))
+ )
+ router.load_config(
+ TopoRouter.RD_BGP, os.path.join(CWD, "{}/evpn.conf".format(rname))
+ )
+ tgen.start_router()
+ # tgen.mininet_cli()
+
+
+def teardown_module(_mod):
+ "Teardown the pytest environment"
+ tgen = get_topogen()
+
+ # This function tears down the whole topology.
+ tgen.stop_topology()
+
+
+def check_local_es(esi, vtep_ips, dut_name, down_vteps):
+ '''
+ Check if ES peers are setup correctly on local ESs
+ '''
+ peer_ips = []
+ if "torm1" in dut_name:
+ tor_ips_rack = tor_ips_rack_1
+ else:
+ tor_ips_rack = tor_ips_rack_2
+
+ for tor_name, tor_ip in tor_ips_rack.iteritems():
+ if dut_name not in tor_name:
+ peer_ips.append(tor_ip)
+
+ # remove down VTEPs from the peer check list
+ peer_set = set(peer_ips)
+ down_vtep_set = set(down_vteps)
+ peer_set = peer_set - down_vtep_set
+
+ vtep_set = set(vtep_ips)
+ diff = peer_set.symmetric_difference(vtep_set)
+
+ return (esi, diff) if diff else None
+
+
+def check_remote_es(esi, vtep_ips, dut_name, down_vteps):
+ '''
+ Verify list of PEs associated with a remote ES
+ '''
+ remote_ips = []
+
+ if "torm1" in dut_name:
+ tor_ips_rack = tor_ips_rack_2
+ else:
+ tor_ips_rack = tor_ips_rack_1
+
+ for tor_name, tor_ip in tor_ips_rack.iteritems():
+ remote_ips.append(tor_ip)
+
+ # remove down VTEPs from the remote check list
+ remote_set = set(remote_ips)
+ down_vtep_set = set(down_vteps)
+ remote_set = remote_set - down_vtep_set
+
+ vtep_set = set(vtep_ips)
+ diff = remote_set.symmetric_difference(vtep_set)
+
+ return (esi, diff) if diff else None
+
+def check_es(dut):
+ '''
+ Verify list of PEs associated all ESs, local and remote
+ '''
+ bgp_es = dut.vtysh_cmd("show bgp l2vp evpn es json")
+ bgp_es_json = json.loads(bgp_es)
+
+ result = None
+
+ expected_es_set = set([v for k, v in host_es_map.iteritems()])
+ curr_es_set = []
+
+ # check is ES content is correct
+ for es in bgp_es_json:
+ esi = es["esi"]
+ curr_es_set.append(esi)
+ types = es["type"]
+ vtep_ips = []
+ for vtep in es["vteps"]:
+ vtep_ips.append(vtep["vtep_ip"])
+
+ if "local" in types:
+ result = check_local_es(esi, vtep_ips, dut.name, [])
+ else:
+ result = check_remote_es(esi, vtep_ips, dut.name, [])
+
+ if result:
+ return result
+
+ # check if all ESs are present
+ curr_es_set = set(curr_es_set)
+ result = curr_es_set.symmetric_difference(expected_es_set)
+
+ return result if result else None
+
+def check_one_es(dut, esi, down_vteps):
+ '''
+ Verify list of PEs associated all ESs, local and remote
+ '''
+ bgp_es = dut.vtysh_cmd("show bgp l2vp evpn es %s json" % esi)
+ es = json.loads(bgp_es)
+
+ if not es:
+ return "esi %s not found" % esi
+
+ esi = es["esi"]
+ types = es["type"]
+ vtep_ips = []
+ for vtep in es["vteps"]:
+ vtep_ips.append(vtep["vtep_ip"])
+
+ if "local" in types:
+ result = check_local_es(esi, vtep_ips, dut.name, down_vteps)
+ else:
+ result = check_remote_es(esi, vtep_ips, dut.name, down_vteps)
+
+ return result
+
+def test_evpn_es():
+ '''
+ Two ES are setup on each rack. This test checks if -
+ 1. ES peer has been added to the local ES (via Type-1/EAD route)
+ 2. The remote ESs are setup with the right list of PEs (via Type-1)
+ '''
+
+ tgen = get_topogen()
+
+ if tgen.routers_have_failure():
+ pytest.skip(tgen.errors)
+
+ dut_name = "torm11"
+ dut = tgen.gears[dut_name]
+ test_fn = partial(check_es, dut)
+ _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3)
+
+ assertmsg = '"{}" ES content incorrect'.format(dut_name)
+ assert result is None, assertmsg
+ # tgen.mininet_cli()
+
+def test_evpn_ead_update():
+ '''
+ Flap a host link one the remote rack and check if the EAD updates
+ are sent/processed for the corresponding ESI
+ '''
+ tgen = get_topogen()
+
+ if tgen.routers_have_failure():
+ pytest.skip(tgen.errors)
+
+ # dut on rack1 and host link flap on rack2
+ dut_name = "torm11"
+ dut = tgen.gears[dut_name]
+
+ remote_tor_name = "torm21"
+ remote_tor = tgen.gears[remote_tor_name]
+
+ host_name = "hostd21"
+ host = tgen.gears[host_name]
+ esi = host_es_map.get(host_name)
+
+ # check if the VTEP list is right to start with
+ down_vteps = []
+ test_fn = partial(check_one_es, dut, esi, down_vteps)
+ _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3)
+ assertmsg = '"{}" ES content incorrect'.format(dut_name)
+ assert result is None, assertmsg
+
+ # down a remote host link and check if the EAD withdraw is rxed
+ # Note: LACP is not working as expected so I am temporarily shutting
+ # down the link on the remote TOR instead of the remote host
+ remote_tor.run("ip link set dev %s-%s down" % (remote_tor_name, "eth2"))
+ down_vteps.append(tor_ips.get(remote_tor_name))
+ _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3)
+ assertmsg = '"{}" ES incorrect after remote link down'.format(dut_name)
+ assert result is None, assertmsg
+
+ # bring up remote host link and check if the EAD update is rxed
+ down_vteps.remove(tor_ips.get(remote_tor_name))
+ remote_tor.run("ip link set dev %s-%s up" % (remote_tor_name, "eth2"))
+ _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3)
+ assertmsg = '"{}" ES incorrect after remote link flap'.format(dut_name)
+ assert result is None, assertmsg
+
+ # tgen.mininet_cli()
+
+def check_mac(dut, vni, mac, m_type, esi, intf):
+ '''
+ checks if mac is present and if desination matches the one provided
+ '''
+
+ out = dut.vtysh_cmd("show evpn mac vni %d mac %s json" % (vni, mac))
+
+ mac_js = json.loads(out)
+ for mac, info in mac_js.iteritems():
+ tmp_esi = info.get("esi", "")
+ tmp_m_type = info.get("type", "")
+ tmp_intf = info.get("intf", "") if tmp_m_type == "local" else ""
+ if tmp_esi == esi and tmp_m_type == m_type and intf == intf:
+ return None
+
+ return "invalid vni %d mac %s out %s" % (vni, mac, mac_js)
+
+def test_evpn_mac():
+ '''
+ 1. Add a MAC on hostd11 and check if the MAC is synced between
+ torm11 and torm12. And installed as a local MAC.
+ 2. Add a MAC on hostd21 and check if the MAC is installed as a
+ remote MAC on torm11 and torm12
+ '''
+
+ tgen = get_topogen()
+
+ local_host = tgen.gears["hostd11"]
+ remote_host = tgen.gears["hostd21"]
+ tors = []
+ tors.append(tgen.gears["torm11"])
+ tors.append(tgen.gears["torm12"])
+
+ # ping the anycast gw from the local and remote hosts to populate
+ # the mac address on the PEs
+ local_host.run("arping -I torbond -c 1 45.0.0.1")
+ remote_host.run("arping -I torbond -c 1 45.0.0.1")
+
+ vni = 1000
+
+ # check if the rack-1 host MAC is present on all rack-1 PEs
+ # and points to local access port
+ m_type = "local"
+ _, mac = compute_host_ip_mac(local_host.name)
+ esi = host_es_map.get(local_host.name)
+ intf = "hostbond1"
+
+ for tor in tors:
+ test_fn = partial(check_mac, tor, vni, mac, m_type, esi, intf)
+ _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3)
+ assertmsg = '"{}" local MAC content incorrect'.format(tor.name)
+ assert result is None, assertmsg
+
+ # check if the rack-2 host MAC is present on all rack-1 PEs
+ # and points to the remote ES destination
+ m_type = "remote"
+ _, mac = compute_host_ip_mac(remote_host.name)
+ esi = host_es_map.get(remote_host.name)
+ intf = ""
+
+ for tor in tors:
+ test_fn = partial(check_mac, tor, vni, mac, m_type, esi, intf)
+ _, result = topotest.run_and_expect(test_fn, None, count=20, wait=3)
+ assertmsg = '"{}" remote MAC content incorrect'.format(tor.name)
+ assert result is None, assertmsg
+
+if __name__ == "__main__":
+ args = ["-s"] + sys.argv[1:]
+ sys.exit(pytest.main(args))
diff --git a/tests/topotests/bgp-evpn-mh/torm11/evpn.conf b/tests/topotests/bgp-evpn-mh/torm11/evpn.conf
new file mode 100644
index 0000000000..01f4b65704
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm11/evpn.conf
@@ -0,0 +1,21 @@
+!
+frr defaults datacenter
+!
+debug bgp evpn mh es
+debug bgp evpn mh route
+debug bgp zebra
+!
+!
+router bgp 65002
+ bgp router-id 192.168.100.15
+ no bgp ebgp-requires-policy
+ neighbor 192.168.1.1 remote-as external
+ neighbor 192.168.5.1 remote-as external
+ redistribute connected
+ address-family l2vpn evpn
+ neighbor 192.168.1.1 activate
+ neighbor 192.168.5.1 activate
+ advertise-all-vni
+ advertise-svi-ip
+ exit-address-family
+!
diff --git a/tests/topotests/bgp-evpn-mh/torm11/pim.conf b/tests/topotests/bgp-evpn-mh/torm11/pim.conf
new file mode 100644
index 0000000000..fbba735873
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm11/pim.conf
@@ -0,0 +1,13 @@
+!
+ip pim rp 192.168.100.13 239.1.1.0/24
+ip pim spt-switchover infinity-and-beyond
+!
+interface lo
+ ip igmp
+ ip pim
+!
+interface torm11-eth0
+ ip pim
+!
+interface torm11-eth1
+ ip pim
diff --git a/tests/topotests/bgp-evpn-mh/torm11/zebra.conf b/tests/topotests/bgp-evpn-mh/torm11/zebra.conf
new file mode 100644
index 0000000000..ee4e87e1c2
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm11/zebra.conf
@@ -0,0 +1,23 @@
+debug zebra evpn mh es
+debug zebra evpn mh mac
+debug zebra evpn mh neigh
+debug zebra evpn mh nh
+debug zebra vxlan
+!
+int torm11-eth0
+ ip addr 192.168.1.2/24
+!
+int torm11-eth1
+ ip addr 192.168.5.2/24
+!
+int lo
+ ip addr 192.168.100.15/32
+!
+interface hostbond1
+ evpn mh es-id 1
+ evpn mh es-sys-mac 44:38:39:ff:ff:01
+!
+interface hostbond2
+ evpn mh es-id 2
+ evpn mh es-sys-mac 44:38:39:ff:ff:01
+!
diff --git a/tests/topotests/bgp-evpn-mh/torm12/evpn.conf b/tests/topotests/bgp-evpn-mh/torm12/evpn.conf
new file mode 100644
index 0000000000..2c13024bbc
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm12/evpn.conf
@@ -0,0 +1,21 @@
+!
+frr defaults datacenter
+!
+debug bgp evpn mh es
+debug bgp evpn mh route
+debug bgp zebra
+!
+!
+router bgp 65003
+ bgp router-id 192.168.100.16
+ no bgp ebgp-requires-policy
+ neighbor 192.168.2.1 remote-as external
+ neighbor 192.168.6.1 remote-as external
+ redistribute connected
+ address-family l2vpn evpn
+ neighbor 192.168.2.1 activate
+ neighbor 192.168.6.1 activate
+ advertise-all-vni
+ advertise-svi-ip
+ exit-address-family
+!
diff --git a/tests/topotests/bgp-evpn-mh/torm12/pim.conf b/tests/topotests/bgp-evpn-mh/torm12/pim.conf
new file mode 100644
index 0000000000..3dd63b44ca
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm12/pim.conf
@@ -0,0 +1,13 @@
+!
+ip pim rp 192.168.100.13 239.1.1.0/24
+ip pim spt-switchover infinity-and-beyond
+!
+interface lo
+ ip igmp
+ ip pim
+!
+interface torm12-eth0
+ ip pim
+!
+interface torm12-eth1
+ ip pim
diff --git a/tests/topotests/bgp-evpn-mh/torm12/zebra.conf b/tests/topotests/bgp-evpn-mh/torm12/zebra.conf
new file mode 100644
index 0000000000..736af4159e
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm12/zebra.conf
@@ -0,0 +1,23 @@
+debug zebra evpn mh es
+debug zebra evpn mh mac
+debug zebra evpn mh neigh
+debug zebra evpn mh nh
+debug zebra vxlan
+!
+int torm12-eth0
+ ip addr 192.168.2.2/24
+!
+int torm12-eth1
+ ip addr 192.168.6.2/24
+!
+int lo
+ ip addr 192.168.100.16/32
+!
+interface hostbond1
+ evpn mh es-id 1
+ evpn mh es-sys-mac 44:38:39:ff:ff:01
+!
+interface hostbond2
+ evpn mh es-id 2
+ evpn mh es-sys-mac 44:38:39:ff:ff:01
+!
diff --git a/tests/topotests/bgp-evpn-mh/torm21/evpn.conf b/tests/topotests/bgp-evpn-mh/torm21/evpn.conf
new file mode 100644
index 0000000000..2a2ba061c6
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm21/evpn.conf
@@ -0,0 +1,21 @@
+!
+frr defaults datacenter
+!
+debug bgp evpn mh es
+debug bgp evpn mh route
+debug bgp zebra
+!
+!
+router bgp 65004
+ bgp router-id 192.168.100.17
+ no bgp ebgp-requires-policy
+ neighbor 192.168.3.1 remote-as external
+ neighbor 192.168.7.1 remote-as external
+ redistribute connected
+ address-family l2vpn evpn
+ neighbor 192.168.3.1 activate
+ neighbor 192.168.7.1 activate
+ advertise-all-vni
+ advertise-svi-ip
+ exit-address-family
+!
diff --git a/tests/topotests/bgp-evpn-mh/torm21/pim.conf b/tests/topotests/bgp-evpn-mh/torm21/pim.conf
new file mode 100644
index 0000000000..71aa91a06d
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm21/pim.conf
@@ -0,0 +1,13 @@
+!
+ip pim rp 192.168.100.13 239.1.1.0/24
+ip pim spt-switchover infinity-and-beyond
+!
+interface lo
+ ip igmp
+ ip pim
+!
+interface torm21-eth0
+ ip pim
+!
+interface torm21-eth1
+ ip pim
diff --git a/tests/topotests/bgp-evpn-mh/torm21/zebra.conf b/tests/topotests/bgp-evpn-mh/torm21/zebra.conf
new file mode 100644
index 0000000000..0ebe6f2d95
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm21/zebra.conf
@@ -0,0 +1,23 @@
+debug zebra evpn mh es
+debug zebra evpn mh mac
+debug zebra evpn mh neigh
+debug zebra evpn mh nh
+debug zebra vxlan
+!
+int torm21-eth0
+ ip addr 192.168.3.2/24
+!
+int torm21-eth1
+ ip addr 192.168.7.2/24
+!
+int lo
+ ip addr 192.168.100.17/32
+!
+interface hostbond1
+ evpn mh es-id 1
+ evpn mh es-sys-mac 44:38:39:ff:ff:02
+!
+interface hostbond2
+ evpn mh es-id 2
+ evpn mh es-sys-mac 44:38:39:ff:ff:02
+!
diff --git a/tests/topotests/bgp-evpn-mh/torm22/evpn.conf b/tests/topotests/bgp-evpn-mh/torm22/evpn.conf
new file mode 100644
index 0000000000..b4f4f1dc25
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm22/evpn.conf
@@ -0,0 +1,21 @@
+!
+frr defaults datacenter
+!
+debug bgp evpn mh es
+debug bgp evpn mh route
+debug bgp zebra
+!
+!
+router bgp 65005
+ bgp router-id 192.168.100.18
+ no bgp ebgp-requires-policy
+ neighbor 192.168.4.1 remote-as external
+ neighbor 192.168.8.1 remote-as external
+ redistribute connected
+ address-family l2vpn evpn
+ neighbor 192.168.4.1 activate
+ neighbor 192.168.8.1 activate
+ advertise-all-vni
+ advertise-svi-ip
+ exit-address-family
+!
diff --git a/tests/topotests/bgp-evpn-mh/torm22/pim.conf b/tests/topotests/bgp-evpn-mh/torm22/pim.conf
new file mode 100644
index 0000000000..46f330f5cd
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm22/pim.conf
@@ -0,0 +1,13 @@
+!
+ip pim rp 192.168.100.13 239.1.1.0/24
+ip pim spt-switchover infinity-and-beyond
+!
+interface lo
+ ip igmp
+ ip pim
+!
+interface torm22-eth0
+ ip pim
+!
+interface torm22-eth1
+ ip pim
diff --git a/tests/topotests/bgp-evpn-mh/torm22/zebra.conf b/tests/topotests/bgp-evpn-mh/torm22/zebra.conf
new file mode 100644
index 0000000000..356d8a43e7
--- /dev/null
+++ b/tests/topotests/bgp-evpn-mh/torm22/zebra.conf
@@ -0,0 +1,23 @@
+debug zebra evpn mh es
+debug zebra evpn mh mac
+debug zebra evpn mh neigh
+debug zebra evpn mh nh
+debug zebra vxlan
+!
+int torm22-eth0
+ ip addr 192.168.4.2/24
+!
+int torm22-eth1
+ ip addr 192.168.8.2/24
+!
+int lo
+ ip addr 192.168.100.18/32
+!
+interface hostbond1
+ evpn mh es-id 1
+ evpn mh es-sys-mac 44:38:39:ff:ff:02
+!
+interface hostbond2
+ evpn mh es-id 2
+ evpn mh es-sys-mac 44:38:39:ff:ff:02
+!
diff --git a/tests/topotests/lib/topogen.py b/tests/topotests/lib/topogen.py
index efd5b90685..37b9715010 100644
--- a/tests/topotests/lib/topogen.py
+++ b/tests/topotests/lib/topogen.py
@@ -819,7 +819,9 @@ class TopoRouter(TopoGear):
if memleak_file is None:
return
- self.stop()
+ self.stop(False, False)
+ self.stop(wait=True)
+
self.logger.info("running memory leak report")
self.tgen.net[self.name].report_memory_leaks(memleak_file, testname)
diff --git a/zebra/debug.c b/zebra/debug.c
index c920fca5ff..8c53ab73e4 100644
--- a/zebra/debug.c
+++ b/zebra/debug.c
@@ -40,6 +40,7 @@ unsigned long zebra_debug_pw;
unsigned long zebra_debug_dplane;
unsigned long zebra_debug_mlag;
unsigned long zebra_debug_nexthop;
+unsigned long zebra_debug_evpn_mh;
DEFINE_HOOK(zebra_debug_show_debugging, (struct vty *vty), (vty));
@@ -109,6 +110,18 @@ DEFUN_NOSH (show_debugging_zebra,
else if (IS_ZEBRA_DEBUG_NHG)
vty_out(vty, " Zebra nexthop debugging is on\n");
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ vty_out(vty, " Zebra EVPN-MH ethernet segment debugging is on\n");
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NH)
+ vty_out(vty, " Zebra EVPN-MH nexthop debugging is on\n");
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ vty_out(vty, " Zebra EVPN-MH MAC debugging is on\n");
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ vty_out(vty, " Zebra EVPN-MH Neigh debugging is on\n");
+
hook_call(zebra_debug_show_debugging, vty);
return CMD_SUCCESS;
}
@@ -320,6 +333,53 @@ DEFPY (debug_zebra_mlag,
return CMD_SUCCESS;
}
+DEFPY (debug_zebra_evpn_mh,
+ debug_zebra_evpn_mh_cmd,
+ "[no$no] debug zebra evpn mh <es$es|mac$mac|neigh$neigh|nh$nh>",
+ NO_STR
+ DEBUG_STR
+ "Zebra configuration\n"
+ "EVPN\n"
+ "Multihoming\n"
+ "Ethernet Segment Debugging\n"
+ "MAC Debugging\n"
+ "Neigh Debugging\n"
+ "Nexthop Debugging\n")
+{
+ if (es) {
+ if (no)
+ UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES);
+ else
+ SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_ES);
+ }
+
+ if (mac) {
+ if (no)
+ UNSET_FLAG(zebra_debug_evpn_mh,
+ ZEBRA_DEBUG_EVPN_MH_MAC);
+ else
+ SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_MAC);
+ }
+
+ if (neigh) {
+ if (no)
+ UNSET_FLAG(zebra_debug_evpn_mh,
+ ZEBRA_DEBUG_EVPN_MH_NEIGH);
+ else
+ SET_FLAG(zebra_debug_evpn_mh,
+ ZEBRA_DEBUG_EVPN_MH_NEIGH);
+ }
+
+ if (nh) {
+ if (no)
+ UNSET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH);
+ else
+ SET_FLAG(zebra_debug_evpn_mh, ZEBRA_DEBUG_EVPN_MH_NH);
+ }
+
+ return CMD_SUCCESS;
+}
+
DEFUN (no_debug_zebra_events,
no_debug_zebra_events_cmd,
"no debug zebra events",
@@ -553,6 +613,22 @@ static int config_write_debug(struct vty *vty)
vty_out(vty, "debug zebra mlag\n");
write++;
}
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES) {
+ vty_out(vty, "debug zebra evpn mh es\n");
+ write++;
+ }
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NH) {
+ vty_out(vty, "debug zebra evpn mh nh\n");
+ write++;
+ }
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) {
+ vty_out(vty, "debug zebra evpn mh mac\n");
+ write++;
+ }
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) {
+ vty_out(vty, "debug zebra evpn mh neigh\n");
+ write++;
+ }
if (IS_ZEBRA_DEBUG_PW) {
vty_out(vty, "debug zebra pseudowires\n");
write++;
@@ -589,6 +665,7 @@ void zebra_debug_init(void)
zebra_debug_pw = 0;
zebra_debug_dplane = 0;
zebra_debug_mlag = 0;
+ zebra_debug_evpn_mh = 0;
zebra_debug_nht = 0;
zebra_debug_nexthop = 0;
@@ -619,6 +696,7 @@ void zebra_debug_init(void)
install_element(ENABLE_NODE, &no_debug_zebra_rib_cmd);
install_element(ENABLE_NODE, &no_debug_zebra_fpm_cmd);
install_element(ENABLE_NODE, &no_debug_zebra_dplane_cmd);
+ install_element(ENABLE_NODE, &debug_zebra_evpn_mh_cmd);
install_element(CONFIG_NODE, &debug_zebra_events_cmd);
install_element(CONFIG_NODE, &debug_zebra_nht_cmd);
@@ -643,4 +721,5 @@ void zebra_debug_init(void)
install_element(CONFIG_NODE, &no_debug_zebra_fpm_cmd);
install_element(CONFIG_NODE, &no_debug_zebra_dplane_cmd);
install_element(CONFIG_NODE, &debug_zebra_mlag_cmd);
+ install_element(CONFIG_NODE, &debug_zebra_evpn_mh_cmd);
}
diff --git a/zebra/debug.h b/zebra/debug.h
index e513f8865d..8402224f19 100644
--- a/zebra/debug.h
+++ b/zebra/debug.h
@@ -62,6 +62,11 @@ extern "C" {
#define ZEBRA_DEBUG_NHG 0x01
#define ZEBRA_DEBUG_NHG_DETAILED 0x02
+#define ZEBRA_DEBUG_EVPN_MH_ES 0x01
+#define ZEBRA_DEBUG_EVPN_MH_NH 0x02
+#define ZEBRA_DEBUG_EVPN_MH_MAC 0x04
+#define ZEBRA_DEBUG_EVPN_MH_NEIGH 0x08
+
/* Debug related macro. */
#define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT)
@@ -100,6 +105,15 @@ extern "C" {
#define IS_ZEBRA_DEBUG_NHG_DETAIL \
(zebra_debug_nexthop & ZEBRA_DEBUG_NHG_DETAILED)
+#define IS_ZEBRA_DEBUG_EVPN_MH_ES \
+ (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_ES)
+#define IS_ZEBRA_DEBUG_EVPN_MH_NH \
+ (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NH)
+#define IS_ZEBRA_DEBUG_EVPN_MH_MAC \
+ (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_MAC)
+#define IS_ZEBRA_DEBUG_EVPN_MH_NEIGH \
+ (zebra_debug_evpn_mh & ZEBRA_DEBUG_EVPN_MH_NEIGH)
+
extern unsigned long zebra_debug_event;
extern unsigned long zebra_debug_packet;
extern unsigned long zebra_debug_kernel;
@@ -112,6 +126,7 @@ extern unsigned long zebra_debug_pw;
extern unsigned long zebra_debug_dplane;
extern unsigned long zebra_debug_mlag;
extern unsigned long zebra_debug_nexthop;
+extern unsigned long zebra_debug_evpn_mh;
extern void zebra_debug_init(void);
diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c
index c81d451693..4165fa1b3a 100644
--- a/zebra/dplane_fpm_nl.c
+++ b/zebra/dplane_fpm_nl.c
@@ -1015,7 +1015,8 @@ static void fpm_enqueue_rmac_table(struct hash_bucket *backet, void *arg)
dplane_ctx_set_op(fra->ctx, DPLANE_OP_MAC_INSTALL);
dplane_mac_init(fra->ctx, fra->zl3vni->vxlan_if,
zif->brslave_info.br_if, vid,
- &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, sticky);
+ &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, sticky,
+ 0 /*nhg*/, 0 /*update_flags*/);
if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) {
thread_add_timer(zrouter.master, fpm_rmac_send,
fra->fnc, 1, &fra->fnc->t_rmacwalk);
diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c
index a15f932451..81f77d4f9b 100644
--- a/zebra/if_netlink.c
+++ b/zebra/if_netlink.c
@@ -70,6 +70,7 @@
#include "zebra/if_netlink.h"
#include "zebra/zebra_errors.h"
#include "zebra/zebra_vxlan.h"
+#include "zebra/zebra_evpn_mh.h"
extern struct zebra_privs_t zserv_privs;
@@ -245,6 +246,26 @@ static enum zebra_link_type netlink_to_zebra_link_type(unsigned int hwt)
}
}
+static inline void zebra_if_set_ziftype(struct interface *ifp,
+ zebra_iftype_t zif_type,
+ zebra_slave_iftype_t zif_slave_type)
+{
+ struct zebra_if *zif;
+
+ zif = (struct zebra_if *)ifp->info;
+ zif->zif_slave_type = zif_slave_type;
+
+ if (zif->zif_type != zif_type) {
+ zif->zif_type = zif_type;
+ /* If the if_type has been set to bond initialize ES info
+ * against it. XXX - note that we don't handle the case where
+ * a zif changes from bond to non-bond; it is really
+ * an unexpected/error condition.
+ */
+ zebra_evpn_if_init(zif);
+ }
+}
+
static void netlink_determine_zebra_iftype(const char *kind,
zebra_iftype_t *zif_type)
{
@@ -557,6 +578,74 @@ static void netlink_interface_update_l2info(struct interface *ifp,
}
}
+static int netlink_bridge_vxlan_update(struct interface *ifp,
+ struct rtattr *af_spec)
+{
+ struct rtattr *aftb[IFLA_BRIDGE_MAX + 1];
+ struct bridge_vlan_info *vinfo;
+ vlanid_t access_vlan;
+
+ /* There is a 1-to-1 mapping of VLAN to VxLAN - hence
+ * only 1 access VLAN is accepted.
+ */
+ memset(aftb, 0, sizeof(aftb));
+ parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, af_spec);
+ if (!aftb[IFLA_BRIDGE_VLAN_INFO])
+ return 0;
+
+ vinfo = RTA_DATA(aftb[IFLA_BRIDGE_VLAN_INFO]);
+ if (!(vinfo->flags & BRIDGE_VLAN_INFO_PVID))
+ return 0;
+
+ access_vlan = (vlanid_t)vinfo->vid;
+ if (IS_ZEBRA_DEBUG_KERNEL)
+ zlog_debug("Access VLAN %u for VxLAN IF %s(%u)", access_vlan,
+ ifp->name, ifp->ifindex);
+ zebra_l2_vxlanif_update_access_vlan(ifp, access_vlan);
+ return 0;
+}
+
+static void netlink_bridge_vlan_update(struct interface *ifp,
+ struct rtattr *af_spec)
+{
+ struct rtattr *i;
+ int rem;
+ uint16_t vid_range_start = 0;
+ struct zebra_if *zif;
+ bitfield_t old_vlan_bitmap;
+ struct bridge_vlan_info *vinfo;
+
+ zif = (struct zebra_if *)ifp->info;
+
+ /* cache the old bitmap addrs */
+ old_vlan_bitmap = zif->vlan_bitmap;
+ /* create a new bitmap space for re-eval */
+ bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX);
+
+ for (i = RTA_DATA(af_spec), rem = RTA_PAYLOAD(af_spec);
+ RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
+
+ if (i->rta_type != IFLA_BRIDGE_VLAN_INFO)
+ continue;
+
+ vinfo = RTA_DATA(i);
+
+ if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
+ vid_range_start = vinfo->vid;
+ continue;
+ }
+
+ if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END))
+ vid_range_start = vinfo->vid;
+
+ zebra_vlan_bitmap_compute(ifp, vid_range_start, vinfo->vid);
+ }
+
+ zebra_vlan_mbr_re_eval(ifp, old_vlan_bitmap);
+
+ bf_free(old_vlan_bitmap);
+}
+
static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id,
int startup)
{
@@ -564,12 +653,8 @@ static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id,
struct ifinfomsg *ifi;
struct rtattr *tb[IFLA_MAX + 1];
struct interface *ifp;
- struct rtattr *aftb[IFLA_BRIDGE_MAX + 1];
- struct {
- uint16_t flags;
- uint16_t vid;
- } * vinfo;
- vlanid_t access_vlan;
+ struct zebra_if *zif;
+ struct rtattr *af_spec;
/* Fetch name and ifindex */
ifi = NLMSG_DATA(h);
@@ -587,30 +672,22 @@ static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id,
ifi->ifi_index);
return 0;
}
- if (!IS_ZEBRA_IF_VXLAN(ifp))
- return 0;
/* We are only interested in the access VLAN i.e., AF_SPEC */
- if (!tb[IFLA_AF_SPEC])
- return 0;
+ af_spec = tb[IFLA_AF_SPEC];
+ if (!af_spec)
+ return 0;
- /* There is a 1-to-1 mapping of VLAN to VxLAN - hence
- * only 1 access VLAN is accepted.
- */
- memset(aftb, 0, sizeof(aftb));
- parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, tb[IFLA_AF_SPEC]);
- if (!aftb[IFLA_BRIDGE_VLAN_INFO])
- return 0;
+ if (IS_ZEBRA_IF_VXLAN(ifp))
+ return netlink_bridge_vxlan_update(ifp, af_spec);
- vinfo = RTA_DATA(aftb[IFLA_BRIDGE_VLAN_INFO]);
- if (!(vinfo->flags & BRIDGE_VLAN_INFO_PVID))
- return 0;
+ /* build vlan bitmap associated with this interface if that
+ * device type is interested in the vlans
+ */
+ zif = (struct zebra_if *)ifp->info;
+ if (bf_is_inited(zif->vlan_bitmap))
+ netlink_bridge_vlan_update(ifp, af_spec);
- access_vlan = (vlanid_t)vinfo->vid;
- if (IS_ZEBRA_DEBUG_KERNEL)
- zlog_debug("Access VLAN %u for VxLAN IF %s(%u)", access_vlan,
- name, ifi->ifi_index);
- zebra_l2_vxlanif_update_access_vlan(ifp, access_vlan);
return 0;
}
@@ -721,10 +798,8 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup)
if (tb[IFLA_LINK])
link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]);
- if (tb[IFLA_LINK_NETNSID]) {
+ if (tb[IFLA_LINK_NETNSID])
link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]);
- link_nsid = ns_id_get_absolute(ns_id, link_nsid);
- }
/* Add interface.
* We add by index first because in some cases such as the master
@@ -732,9 +807,11 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup)
* back references on the slave interfaces is painful if not done
* this way, i.e. by creating by ifindex.
*/
- ifp = if_get_by_ifindex(ifi->ifi_index, vrf_id, name);
+ ifp = if_get_by_ifindex(ifi->ifi_index, vrf_id);
set_ifindex(ifp, ifi->ifi_index, zns); /* add it to ns struct */
+ if_set_name(ifp, name);
+
ifp->flags = ifi->ifi_flags & 0x0000fffff;
ifp->mtu6 = ifp->mtu = *(uint32_t *)RTA_DATA(tb[IFLA_MTU]);
ifp->metric = 0;
@@ -771,7 +848,7 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup)
netlink_interface_update_l2info(ifp, linkinfo[IFLA_INFO_DATA],
1, link_nsid);
if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
- zebra_l2if_update_bridge_slave(ifp, bridge_ifindex, ns_id);
+ zebra_l2if_update_bridge_slave(ifp, bridge_ifindex);
else if (IS_ZEBRA_IF_BOND_SLAVE(ifp))
zebra_l2if_update_bond_slave(ifp, bond_ifindex);
@@ -1263,10 +1340,9 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
if (tb[IFLA_LINK])
link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]);
- if (tb[IFLA_LINK_NETNSID]) {
+ if (tb[IFLA_LINK_NETNSID])
link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]);
- link_nsid = ns_id_get_absolute(ns_id, link_nsid);
- }
+
if (tb[IFLA_IFALIAS]) {
desc = (char *)RTA_DATA(tb[IFLA_IFALIAS]);
}
@@ -1354,8 +1430,7 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
1, link_nsid);
if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
zebra_l2if_update_bridge_slave(ifp,
- bridge_ifindex,
- ns_id);
+ bridge_ifindex);
else if (IS_ZEBRA_IF_BOND_SLAVE(ifp))
zebra_l2if_update_bond_slave(ifp, bond_ifindex);
} else if (ifp->vrf_id != vrf_id) {
@@ -1456,8 +1531,7 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
0, link_nsid);
if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) || was_bridge_slave)
zebra_l2if_update_bridge_slave(ifp,
- bridge_ifindex,
- ns_id);
+ bridge_ifindex);
else if (IS_ZEBRA_IF_BOND_SLAVE(ifp) || was_bond_slave)
zebra_l2if_update_bond_slave(ifp, bond_ifindex);
}
diff --git a/zebra/interface.c b/zebra/interface.c
index 53ae1d2089..b824e313ec 100644
--- a/zebra/interface.c
+++ b/zebra/interface.c
@@ -51,6 +51,7 @@
#include "zebra/interface.h"
#include "zebra/zebra_vxlan.h"
#include "zebra/zebra_errors.h"
+#include "zebra/zebra_evpn_mh.h"
DEFINE_MTYPE_STATIC(ZEBRA, ZINFO, "Zebra Interface Information")
@@ -127,6 +128,7 @@ static int if_zebra_new_hook(struct interface *ifp)
struct zebra_if *zebra_if;
zebra_if = XCALLOC(MTYPE_ZINFO, sizeof(struct zebra_if));
+ zebra_if->ifp = ifp;
zebra_if->multicast = IF_ZEBRA_MULTICAST_UNSPEC;
zebra_if->shutdown = IF_ZEBRA_SHUTDOWN_OFF;
@@ -238,6 +240,8 @@ static int if_zebra_delete_hook(struct interface *ifp)
list_delete(&rtadv->AdvDNSSLList);
#endif /* HAVE_RTADV */
+ zebra_evpn_if_cleanup(zebra_if);
+
if_nhg_dependents_release(ifp);
zebra_if_nhg_dependents_free(zebra_if);
@@ -831,6 +835,7 @@ void if_delete_update(struct interface *ifp)
memset(&zif->l2info, 0, sizeof(union zebra_l2if_info));
memset(&zif->brslave_info, 0,
sizeof(struct zebra_l2info_brslave));
+ zebra_evpn_if_cleanup(zif);
}
if (!ifp->configured) {
@@ -1072,6 +1077,8 @@ void if_up(struct interface *ifp)
} else if (IS_ZEBRA_IF_MACVLAN(ifp))
zebra_vxlan_macvlan_up(ifp);
+ if (zif->es_info.es)
+ zebra_evpn_es_if_oper_state_change(zif, true /*up*/);
}
/* Interface goes down. We have to manage different behavior of based
@@ -1106,6 +1113,8 @@ void if_down(struct interface *ifp)
} else if (IS_ZEBRA_IF_MACVLAN(ifp))
zebra_vxlan_macvlan_down(ifp);
+ if (zif->es_info.es)
+ zebra_evpn_es_if_oper_state_change(zif, false /*up*/);
/* Notify to the protocol daemons. */
zebra_interface_down_update(ifp);
@@ -1233,23 +1242,6 @@ static void nbr_connected_dump_vty(struct vty *vty,
vty_out(vty, "\n");
}
-static const char *zebra_zifslavetype_2str(zebra_slave_iftype_t zif_slave_type)
-{
- switch (zif_slave_type) {
- case ZEBRA_IF_SLAVE_BRIDGE:
- return "Bridge";
- case ZEBRA_IF_SLAVE_VRF:
- return "Vrf";
- case ZEBRA_IF_SLAVE_BOND:
- return "Bond";
- case ZEBRA_IF_SLAVE_OTHER:
- return "Other";
- case ZEBRA_IF_SLAVE_NONE:
- return "None";
- }
- return "None";
-}
-
static const char *zebra_ziftype_2str(zebra_iftype_t zif_type)
{
switch (zif_type) {
@@ -1477,9 +1469,6 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp)
vty_out(vty, " Interface Type %s\n",
zebra_ziftype_2str(zebra_if->zif_type));
- vty_out(vty, " Interface Slave Type %s\n",
- zebra_zifslavetype_2str(zebra_if->zif_slave_type));
-
if (IS_ZEBRA_IF_BRIDGE(ifp)) {
struct zebra_l2info_bridge *bridge_info;
@@ -1547,6 +1536,8 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp)
}
}
+ zebra_evpn_if_es_print(vty, zebra_if);
+
if (zebra_if->link_ifindex != IFINDEX_INTERNAL) {
if (zebra_if->link)
vty_out(vty, " Parent interface: %s\n", zebra_if->link->name);
@@ -3588,7 +3579,7 @@ static int if_config_write(struct vty *vty)
}
hook_call(zebra_if_config_wr, vty, ifp);
-
+ zebra_evpn_mh_if_write(vty, ifp);
link_params_config_write(vty, ifp);
vty_endframe(vty, "!\n");
@@ -3664,4 +3655,7 @@ void zebra_if_init(void)
install_element(LINK_PARAMS_NODE, &link_params_use_bw_cmd);
install_element(LINK_PARAMS_NODE, &no_link_params_use_bw_cmd);
install_element(LINK_PARAMS_NODE, &exit_link_params_cmd);
+
+ /* setup EVPN MH elements */
+ zebra_evpn_interface_init();
}
diff --git a/zebra/interface.h b/zebra/interface.h
index 2dad0c3bb2..1a8e3caed5 100644
--- a/zebra/interface.h
+++ b/zebra/interface.h
@@ -25,6 +25,7 @@
#include "redistribute.h"
#include "vrf.h"
#include "hook.h"
+#include "bitfield.h"
#include "zebra/zebra_l2.h"
#include "zebra/zebra_nhg_private.h"
@@ -42,6 +43,8 @@ extern "C" {
#define IF_ZEBRA_SHUTDOWN_OFF 0
#define IF_ZEBRA_SHUTDOWN_ON 1
+#define IF_VLAN_BITMAP_MAX 4096
+
#if defined(HAVE_RTADV)
/* Router advertisement parameter. From RFC4861, RFC6275 and RFC4191. */
struct rtadvconf {
@@ -272,8 +275,19 @@ typedef enum {
struct irdp_interface;
+/* Ethernet segment info used for setting up EVPN multihoming */
+struct zebra_evpn_es;
+struct zebra_es_if_info {
+ struct ethaddr sysmac;
+ uint32_t lid; /* local-id; has to be unique per-ES-sysmac */
+ struct zebra_evpn_es *es; /* local ES */
+};
+
/* `zebra' daemon local interface structure. */
struct zebra_if {
+ /* back pointer to the interface */
+ struct interface *ifp;
+
/* Shutdown configuration. */
uint8_t shutdown;
@@ -347,6 +361,12 @@ struct zebra_if {
struct zebra_l2info_bondslave bondslave_info;
+ /* ethernet segment */
+ struct zebra_es_if_info es_info;
+
+ /* bitmap of vlans associated with this interface */
+ bitfield_t vlan_bitmap;
+
/* Link fields - for sub-interfaces. */
ifindex_t link_ifindex;
struct interface *link;
@@ -370,17 +390,6 @@ DECLARE_HOOK(zebra_if_extra_info, (struct vty * vty, struct interface *ifp),
DECLARE_HOOK(zebra_if_config_wr, (struct vty * vty, struct interface *ifp),
(vty, ifp))
-static inline void zebra_if_set_ziftype(struct interface *ifp,
- zebra_iftype_t zif_type,
- zebra_slave_iftype_t zif_slave_type)
-{
- struct zebra_if *zif;
-
- zif = (struct zebra_if *)ifp->info;
- zif->zif_type = zif_type;
- zif->zif_slave_type = zif_slave_type;
-}
-
#define IS_ZEBRA_IF_VRF(ifp) \
(((struct zebra_if *)(ifp->info))->zif_type == ZEBRA_IF_VRF)
diff --git a/zebra/main.c b/zebra/main.c
index 9c5a1ef9b6..92e94c2a2a 100644
--- a/zebra/main.c
+++ b/zebra/main.c
@@ -186,7 +186,7 @@ static void sigint(void)
vrf_terminate();
rtadv_terminate();
- ns_walk_func(zebra_ns_early_shutdown, NULL, NULL);
+ ns_walk_func(zebra_ns_early_shutdown);
zebra_ns_notify_close();
access_list_reset();
@@ -217,7 +217,7 @@ int zebra_finalize(struct thread *dummy)
zlog_info("Zebra final shutdown");
/* Final shutdown of ns resources */
- ns_walk_func(zebra_ns_final_shutdown, NULL, NULL);
+ ns_walk_func(zebra_ns_final_shutdown);
/* Stop dplane thread and finish any cleanup */
zebra_dplane_shutdown();
diff --git a/zebra/rt.h b/zebra/rt.h
index 4b9a3f83fe..143e16b3ea 100644
--- a/zebra/rt.h
+++ b/zebra/rt.h
@@ -91,6 +91,11 @@ extern void neigh_read_for_vlan(struct zebra_ns *zns, struct interface *ifp);
extern void neigh_read_specific_ip(struct ipaddr *ip,
struct interface *vlan_if);
extern void route_read(struct zebra_ns *zns);
+extern int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip);
+extern int kernel_del_mac_nh(uint32_t nh_id);
+extern int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
+ struct nh_grp *nh_ids);
+extern int kernel_del_mac_nhg(uint32_t nhg_id);
#ifdef __cplusplus
}
diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
index 8d38b6defe..4daef42d7a 100644
--- a/zebra/rt_netlink.c
+++ b/zebra/rt_netlink.c
@@ -68,11 +68,27 @@
#include "zebra/zebra_mroute.h"
#include "zebra/zebra_vxlan.h"
#include "zebra/zebra_errors.h"
+#include "zebra/zebra_evpn_mh.h"
#ifndef AF_MPLS
#define AF_MPLS 28
#endif
+/* Re-defining as I am unable to include <linux/if_bridge.h> which has the
+ * UAPI for MAC sync. */
+#ifndef _UAPI_LINUX_IF_BRIDGE_H
+/* FDB notification bits for NDA_NOTIFY:
+ * - BR_FDB_NFY_STATIC - notify on activity/expire even for a static entry
+ * - BR_FDB_NFY_INACTIVE - mark as inactive to avoid double notification,
+ * used with BR_FDB_NFY_STATIC (kernel controlled)
+ */
+enum {
+ BR_FDB_NFY_STATIC,
+ BR_FDB_NFY_INACTIVE,
+ BR_FDB_NFY_MAX
+};
+#endif
+
static vlanid_t filter_vlan = 0;
/* We capture whether the current kernel supports nexthop ids; by
@@ -2521,6 +2537,15 @@ int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
/* We use the ID key'd nhg table for kernel updates */
id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
+ if (zebra_evpn_mh_is_fdb_nh(id)) {
+ /* If this is a L2 NH just ignore it */
+ if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
+ zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x",
+ h->nlmsg_type, id);
+ }
+ return 0;
+ }
+
family = nhm->nh_family;
afi = family2afi(family);
@@ -2676,7 +2701,9 @@ int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla,
static ssize_t netlink_neigh_update_msg_encode(
const struct zebra_dplane_ctx *ctx, int cmd, const struct ethaddr *mac,
const struct ipaddr *ip, bool replace_obj, uint8_t family, uint8_t type,
- uint8_t flags, uint16_t state, void *data, size_t datalen)
+ uint8_t flags, uint16_t state, uint32_t nhg_id,
+ bool nfy, uint8_t nfy_flags,
+ void *data, size_t datalen)
{
uint8_t protocol = RTPROT_ZEBRA;
struct {
@@ -2715,6 +2742,16 @@ static ssize_t netlink_neigh_update_msg_encode(
return 0;
}
+ if (nhg_id) {
+ if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id))
+ return 0;
+ }
+ if (nfy) {
+ if (!nl_attr_put(&req->n, datalen, NDA_NOTIFY,
+ &nfy_flags, sizeof(nfy_flags)))
+ return 0;
+ }
+
ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr, ipa_len))
return 0;
@@ -2747,8 +2784,9 @@ static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx,
if (netlink_neigh_update_msg_encode(
ctx, cmd, &dst_mac, dplane_ctx_neigh_get_ipaddr(ctx), false,
- PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT), nl_pkt,
- sizeof(nl_pkt))
+ PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT),
+ 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/,
+ nl_pkt, sizeof(nl_pkt))
<= 0)
return -1;
@@ -2777,6 +2815,9 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
char vid_buf[20];
char dst_buf[30];
bool sticky;
+ bool local_inactive = false;
+ bool dp_static = false;
+ uint32_t nhg_id = 0;
ndm = NLMSG_DATA(h);
@@ -2824,13 +2865,29 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
inet_ntoa(vtep_ip));
}
+ if (tb[NDA_NH_ID])
+ nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]);
+
+ if (ndm->ndm_state & NUD_STALE)
+ local_inactive = true;
+
+ if (tb[NDA_NOTIFY]) {
+ uint8_t nfy_flags;
+
+ dp_static = true;
+ nfy_flags = *(uint8_t *)RTA_DATA(tb[NDA_NOTIFY]);
+ /* local activity has not been detected on the entry */
+ if (nfy_flags & (1 << BR_FDB_NFY_INACTIVE))
+ local_inactive = true;
+ }
+
if (IS_ZEBRA_DEBUG_KERNEL)
- zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %s%s",
+ zlog_debug("Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %s%s nhg %d",
nl_msg_type_to_str(h->nlmsg_type),
ndm->ndm_ifindex, vid_present ? vid_buf : "",
ndm->ndm_state, ndm->ndm_flags,
prefix_mac2str(&mac, buf, sizeof(buf)),
- dst_present ? dst_buf : "");
+ dst_present ? dst_buf : "", nhg_id);
/* The interface should exist. */
ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
@@ -2853,7 +2910,7 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
return 0;
}
- sticky = !!(ndm->ndm_state & NUD_NOARP);
+ sticky = !!(ndm->ndm_flags & NTF_STICKY);
if (filter_vlan && vid != filter_vlan) {
if (IS_ZEBRA_DEBUG_KERNEL)
@@ -2881,7 +2938,7 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
vid);
return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
- sticky);
+ sticky, local_inactive, dp_static);
}
/* This is a delete notification.
@@ -2894,6 +2951,9 @@ static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
* Note: We will get notifications from both bridge driver and VxLAN
* driver.
*/
+ if (nhg_id)
+ return 0;
+
if (dst_present) {
u_char zero_mac[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
@@ -3091,18 +3151,43 @@ netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data,
int cmd;
uint8_t flags;
uint16_t state;
+ uint32_t nhg_id;
+ uint32_t update_flags;
+ bool nfy = false;
+ uint8_t nfy_flags = 0;
cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL
? RTM_NEWNEIGH : RTM_DELNEIGH;
- flags = (NTF_SELF | NTF_MASTER);
+ flags = NTF_MASTER;
state = NUD_REACHABLE;
- if (dplane_ctx_mac_is_sticky(ctx))
- state |= NUD_NOARP;
- else
- flags |= NTF_EXT_LEARNED;
+ update_flags = dplane_ctx_mac_get_update_flags(ctx);
+ if (update_flags & DPLANE_MAC_REMOTE) {
+ flags |= NTF_SELF;
+ if (dplane_ctx_mac_is_sticky(ctx))
+ flags |= NTF_STICKY;
+ else
+ flags |= NTF_EXT_LEARNED;
+ /* if it was static-local previously we need to clear the
+ * notify flags on replace with remote
+ */
+ if (update_flags & DPLANE_MAC_WAS_STATIC)
+ nfy = true;
+ } else {
+ /* local mac */
+ if (update_flags & DPLANE_MAC_SET_STATIC) {
+ nfy_flags |= (1 << BR_FDB_NFY_STATIC);
+ state |= NUD_NOARP;
+ }
+
+ if (update_flags & DPLANE_MAC_SET_INACTIVE)
+ nfy_flags |= (1 << BR_FDB_NFY_INACTIVE);
+
+ nfy = true;
+ }
+ nhg_id = dplane_ctx_mac_get_nhg_id(ctx);
vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx));
SET_IPADDR_V4(&vtep_ip);
@@ -3110,6 +3195,7 @@ netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data,
char ipbuf[PREFIX_STRLEN];
char buf[ETHER_ADDR_STRLEN];
char vid_buf[20];
+ const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
vid = dplane_ctx_mac_get_vlan(ctx);
if (vid > 0)
@@ -3117,20 +3203,30 @@ netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data,
else
vid_buf[0] = '\0';
- const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
-
- zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s dst %s",
+ zlog_debug("Tx %s family %s IF %s(%u)%s %sMAC %s dst %s nhg %u%s%s%s%s%s",
nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE),
dplane_ctx_get_ifname(ctx),
dplane_ctx_get_ifindex(ctx), vid_buf,
dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
prefix_mac2str(mac, buf, sizeof(buf)),
- ipaddr2str(&vtep_ip, ipbuf, sizeof(ipbuf)));
+ ipaddr2str(&vtep_ip, ipbuf, sizeof(ipbuf)),
+ nhg_id,
+ (update_flags &
+ DPLANE_MAC_REMOTE) ? " rem" : "",
+ (update_flags &
+ DPLANE_MAC_WAS_STATIC) ? " clr_sync" : "",
+ (update_flags &
+ DPLANE_MAC_SET_STATIC) ? " static" : "",
+ (update_flags &
+ DPLANE_MAC_SET_INACTIVE) ? " inactive" : "",
+ (nfy &
+ DPLANE_MAC_SET_INACTIVE) ? " nfy" : "");
}
total = netlink_neigh_update_msg_encode(
ctx, cmd, dplane_ctx_mac_get_addr(ctx), &vtep_ip, true,
- AF_BRIDGE, 0, flags, state, data, datalen);
+ AF_BRIDGE, 0, flags, state, nhg_id, nfy, nfy_flags,
+ data, datalen);
return total;
}
@@ -3164,6 +3260,8 @@ static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
#define NUD_VALID \
(NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE \
| NUD_DELAY)
+#define NUD_LOCAL_ACTIVE \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE)
static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
{
@@ -3180,6 +3278,7 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
int mac_present = 0;
bool is_ext;
bool is_router;
+ bool local_inactive;
ndm = NLMSG_DATA(h);
@@ -3289,10 +3388,17 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
* result
* in re-adding the neighbor if it is a valid "remote" neighbor.
*/
- if (ndm->ndm_state & NUD_VALID)
+ if (ndm->ndm_state & NUD_VALID) {
+ local_inactive = !(ndm->ndm_state & NUD_LOCAL_ACTIVE);
+
+ /* XXX - populate dp-static based on the sync flags
+ * in the kernel
+ */
return zebra_vxlan_handle_kernel_neigh_update(
ifp, link_if, &ip, &mac, ndm->ndm_state,
- is_ext, is_router);
+ is_ext, is_router, local_inactive,
+ false /* dp_static */);
+ }
return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
}
@@ -3547,8 +3653,9 @@ static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
}
if (netlink_neigh_update_msg_encode(ctx, cmd, mac, ip, true, family,
- RTN_UNICAST, flags, state, nl_pkt,
- sizeof(nl_pkt))
+ RTN_UNICAST, flags, state,
+ 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/,
+ nl_pkt, sizeof(nl_pkt))
<= 0)
return -1;
@@ -3757,4 +3864,172 @@ ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
return NLMSG_ALIGN(req->n.nlmsg_len);
}
+
+/****************************************************************************
+* This code was developed in a branch that didn't have dplane APIs for
+* MAC updates. Hence the use of the legacy style. It will be moved to
+* the new dplane style pre-merge to master. XXX
+*/
+static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip)
+{
+ struct {
+ struct nlmsghdr n;
+ struct nhmsg nhm;
+ char buf[256];
+ } req;
+ int cmd = RTM_NEWNEXTHOP;
+ struct zebra_vrf *zvrf;
+ struct zebra_ns *zns;
+
+ zvrf = zebra_vrf_get_evpn();
+ if (!zvrf)
+ return -1;
+ zns = zvrf->zns;
+
+ memset(&req, 0, sizeof(req));
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
+ req.n.nlmsg_type = cmd;
+ req.nhm.nh_family = AF_INET;
+
+ if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
+ return -1;
+ if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
+ return -1;
+ if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY,
+ &vtep_ip, IPV4_MAX_BYTELEN))
+ return -1;
+
+ if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
+ zlog_debug("Tx %s fdb-nh 0x%x %s",
+ nl_msg_type_to_str(cmd), nh_id, inet_ntoa(vtep_ip));
+ }
+
+ return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
+ 0);
+}
+
+static int netlink_fdb_nh_del(uint32_t nh_id)
+{
+ struct {
+ struct nlmsghdr n;
+ struct nhmsg nhm;
+ char buf[256];
+ } req;
+ int cmd = RTM_DELNEXTHOP;
+ struct zebra_vrf *zvrf;
+ struct zebra_ns *zns;
+
+ zvrf = zebra_vrf_get_evpn();
+ if (!zvrf)
+ return -1;
+ zns = zvrf->zns;
+
+ memset(&req, 0, sizeof(req));
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = cmd;
+ req.nhm.nh_family = AF_UNSPEC;
+
+ if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
+ return -1;
+
+ if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
+ zlog_debug("Tx %s fdb-nh 0x%x",
+ nl_msg_type_to_str(cmd), nh_id);
+ }
+
+ return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
+ 0);
+}
+
+static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt,
+ struct nh_grp *nh_ids)
+{
+ struct {
+ struct nlmsghdr n;
+ struct nhmsg nhm;
+ char buf[256];
+ } req;
+ int cmd = RTM_NEWNEXTHOP;
+ struct zebra_vrf *zvrf;
+ struct zebra_ns *zns;
+ struct nexthop_grp grp[nh_cnt];
+ uint32_t i;
+
+ zvrf = zebra_vrf_get_evpn();
+ if (!zvrf)
+ return -1;
+ zns = zvrf->zns;
+
+ memset(&req, 0, sizeof(req));
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
+ req.n.nlmsg_type = cmd;
+ req.nhm.nh_family = AF_UNSPEC;
+
+ if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id))
+ return -1;
+ if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
+ return -1;
+ memset(&grp, 0, sizeof(grp));
+ for (i = 0; i < nh_cnt; ++i) {
+ grp[i].id = nh_ids[i].id;
+ grp[i].weight = nh_ids[i].weight;
+ }
+ if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP,
+ grp, nh_cnt * sizeof(struct nexthop_grp)))
+ return -1;
+
+
+ if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
+ char vtep_str[ES_VTEP_LIST_STR_SZ];
+ char nh_buf[16];
+
+ vtep_str[0] = '\0';
+ for (i = 0; i < nh_cnt; ++i) {
+ snprintf(nh_buf, sizeof(nh_buf), "%u ",
+ grp[i].id);
+ strlcat(vtep_str, nh_buf, sizeof(vtep_str));
+ }
+
+ zlog_debug("Tx %s fdb-nhg 0x%x %s",
+ nl_msg_type_to_str(cmd), nhg_id, vtep_str);
+ }
+
+ return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
+ 0);
+}
+
+static int netlink_fdb_nhg_del(uint32_t nhg_id)
+{
+ return netlink_fdb_nh_del(nhg_id);
+}
+
+int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
+{
+ return netlink_fdb_nh_update(nh_id, vtep_ip);
+}
+
+int kernel_del_mac_nh(uint32_t nh_id)
+{
+ return netlink_fdb_nh_del(nh_id);
+}
+
+int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
+ struct nh_grp *nh_ids)
+{
+ return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids);
+}
+
+int kernel_del_mac_nhg(uint32_t nhg_id)
+{
+ return netlink_fdb_nhg_del(nhg_id);
+}
+
#endif /* HAVE_NETLINK */
diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c
index 2eadaf48f4..0271dc7f41 100644
--- a/zebra/rt_socket.c
+++ b/zebra/rt_socket.c
@@ -417,4 +417,25 @@ uint32_t kernel_get_speed(struct interface *ifp, int *error)
return ifp->speed;
}
+int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
+{
+ return 0;
+}
+
+int kernel_del_mac_nh(uint32_t nh_id)
+{
+ return 0;
+}
+
+int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
+ struct nh_grp *nh_ids)
+{
+ return 0;
+}
+
+int kernel_del_mac_nhg(uint32_t nhg_id)
+{
+ return 0;
+}
+
#endif /* !HAVE_NETLINK */
diff --git a/zebra/subdir.am b/zebra/subdir.am
index 49e60820bc..c552ca513e 100644
--- a/zebra/subdir.am
+++ b/zebra/subdir.am
@@ -12,6 +12,7 @@ vtysh_scan += \
zebra/rtadv.c \
zebra/zebra_gr.c \
zebra/zebra_mlag_vty.c \
+ zebra/zebra_evpn_mh.c \
zebra/zebra_mpls_vty.c \
zebra/zebra_ptm.c \
zebra/zebra_pw.c \
@@ -108,6 +109,7 @@ zebra_zebra_SOURCES = \
zebra/zebra_vrf.c \
zebra/zebra_vty.c \
zebra/zebra_vxlan.c \
+ zebra/zebra_evpn_mh.c \
zebra/zserv.c \
# end
@@ -115,6 +117,7 @@ clippy_scan += \
zebra/debug.c \
zebra/interface.c \
zebra/rtadv.c \
+ zebra/zebra_evpn_mh.c \
zebra/zebra_mlag_vty.c \
zebra/zebra_routemap.c \
zebra/zebra_vty.c \
@@ -167,6 +170,7 @@ noinst_HEADERS += \
zebra/zebra_vrf.h \
zebra/zebra_vxlan.h \
zebra/zebra_vxlan_private.h \
+ zebra/zebra_evpn_mh.h \
zebra/zserv.h \
# end
diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c
index 2ca3e82fac..0a459b4d0a 100644
--- a/zebra/zapi_msg.c
+++ b/zebra/zapi_msg.c
@@ -51,6 +51,7 @@
#include "zebra/zebra_mpls.h"
#include "zebra/zebra_mroute.h"
#include "zebra/zebra_vxlan.h"
+#include "zebra/zebra_evpn_mh.h"
#include "zebra/rt.h"
#include "zebra/zebra_pbr.h"
#include "zebra/table_manager.h"
@@ -2892,6 +2893,8 @@ void (*const zserv_handlers[])(ZAPI_HANDLER_ARGS) = {
[ZEBRA_ADVERTISE_SVI_MACIP] = zebra_vxlan_advertise_svi_macip,
[ZEBRA_ADVERTISE_SUBNET] = zebra_vxlan_advertise_subnet,
[ZEBRA_ADVERTISE_ALL_VNI] = zebra_vxlan_advertise_all_vni,
+ [ZEBRA_REMOTE_ES_VTEP_ADD] = zebra_evpn_proc_remote_es,
+ [ZEBRA_REMOTE_ES_VTEP_DEL] = zebra_evpn_proc_remote_es,
[ZEBRA_REMOTE_VTEP_ADD] = zebra_vxlan_remote_vtep_add,
[ZEBRA_REMOTE_VTEP_DEL] = zebra_vxlan_remote_vtep_del,
[ZEBRA_REMOTE_MACIP_ADD] = zebra_vxlan_remote_macip_add,
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index 53956e3aec..5dcf76db15 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -180,6 +180,8 @@ struct dplane_mac_info {
struct ethaddr mac;
struct in_addr vtep_ip;
bool is_sticky;
+ uint32_t nhg_id;
+ uint32_t update_flags;
};
/*
@@ -190,6 +192,7 @@ struct dplane_neigh_info {
struct ethaddr mac;
uint32_t flags;
uint16_t state;
+ uint32_t update_flags;
};
/*
@@ -441,13 +444,14 @@ static enum zebra_dplane_result mac_update_common(
enum dplane_op_e op, const struct interface *ifp,
const struct interface *br_ifp,
vlanid_t vid, const struct ethaddr *mac,
- struct in_addr vtep_ip, bool sticky);
+ struct in_addr vtep_ip, bool sticky, uint32_t nhg_id,
+ uint32_t update_flags);
static enum zebra_dplane_result neigh_update_internal(
enum dplane_op_e op,
const struct interface *ifp,
const struct ethaddr *mac,
const struct ipaddr *ip,
- uint32_t flags, uint16_t state);
+ uint32_t flags, uint16_t state, uint32_t update_flags);
/*
* Public APIs
@@ -1552,6 +1556,18 @@ bool dplane_ctx_mac_is_sticky(const struct zebra_dplane_ctx *ctx)
return ctx->u.macinfo.is_sticky;
}
+uint32_t dplane_ctx_mac_get_nhg_id(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.macinfo.nhg_id;
+}
+
+uint32_t dplane_ctx_mac_get_update_flags(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.macinfo.update_flags;
+}
+
const struct ethaddr *dplane_ctx_mac_get_addr(
const struct zebra_dplane_ctx *ctx)
{
@@ -1599,6 +1615,12 @@ uint16_t dplane_ctx_neigh_get_state(const struct zebra_dplane_ctx *ctx)
return ctx->u.neigh.state;
}
+uint32_t dplane_ctx_neigh_get_update_flags(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->u.neigh.update_flags;
+}
+
/* Accessors for PBR rule information */
int dplane_ctx_rule_get_sock(const struct zebra_dplane_ctx *ctx)
{
@@ -2542,8 +2564,8 @@ dplane_route_notif_update(struct route_node *rn,
done:
if (ret == AOK)
result = ZEBRA_DPLANE_REQUEST_QUEUED;
- else if (ctx)
- dplane_ctx_free(&ctx);
+ else if (new_ctx)
+ dplane_ctx_free(&new_ctx);
return result;
}
@@ -2895,35 +2917,75 @@ static enum zebra_dplane_result intf_addr_update_internal(
/*
* Enqueue vxlan/evpn mac add (or update).
*/
-enum zebra_dplane_result dplane_mac_add(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_mac_add(const struct interface *ifp,
const struct interface *bridge_ifp,
vlanid_t vid,
const struct ethaddr *mac,
struct in_addr vtep_ip,
- bool sticky)
+ bool sticky,
+ uint32_t nhg_id,
+ bool was_static)
{
enum zebra_dplane_result result;
+ uint32_t update_flags = 0;
+
+ update_flags |= DPLANE_MAC_REMOTE;
+ if (was_static)
+ update_flags |= DPLANE_MAC_WAS_STATIC;
/* Use common helper api */
result = mac_update_common(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp,
- vid, mac, vtep_ip, sticky);
+ vid, mac, vtep_ip, sticky, nhg_id, update_flags);
return result;
}
/*
* Enqueue vxlan/evpn mac delete.
*/
-enum zebra_dplane_result dplane_mac_del(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_mac_del(const struct interface *ifp,
const struct interface *bridge_ifp,
vlanid_t vid,
const struct ethaddr *mac,
struct in_addr vtep_ip)
{
enum zebra_dplane_result result;
+ uint32_t update_flags = 0;
+
+ update_flags |= DPLANE_MAC_REMOTE;
/* Use common helper api */
result = mac_update_common(DPLANE_OP_MAC_DELETE, ifp, bridge_ifp,
- vid, mac, vtep_ip, false);
+ vid, mac, vtep_ip, false, 0, update_flags);
+ return result;
+}
+
+/*
+ * Enqueue local mac add (or update).
+ */
+enum zebra_dplane_result dplane_local_mac_add(const struct interface *ifp,
+ const struct interface *bridge_ifp,
+ vlanid_t vid,
+ const struct ethaddr *mac,
+ bool sticky,
+ uint32_t set_static,
+ uint32_t set_inactive)
+{
+ enum zebra_dplane_result result;
+ uint32_t update_flags = 0;
+ struct in_addr vtep_ip;
+
+ if (set_static)
+ update_flags |= DPLANE_MAC_SET_STATIC;
+
+ if (set_inactive)
+ update_flags |= DPLANE_MAC_SET_INACTIVE;
+
+ vtep_ip.s_addr = 0;
+
+ /* Use common helper api */
+ result = mac_update_common(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp,
+ vid, mac, vtep_ip, sticky, 0,
+ update_flags);
return result;
}
@@ -2937,7 +2999,9 @@ void dplane_mac_init(struct zebra_dplane_ctx *ctx,
vlanid_t vid,
const struct ethaddr *mac,
struct in_addr vtep_ip,
- bool sticky)
+ bool sticky,
+ uint32_t nhg_id,
+ uint32_t update_flags)
{
struct zebra_ns *zns;
@@ -2958,6 +3022,8 @@ void dplane_mac_init(struct zebra_dplane_ctx *ctx,
ctx->u.macinfo.mac = *mac;
ctx->u.macinfo.vid = vid;
ctx->u.macinfo.is_sticky = sticky;
+ ctx->u.macinfo.nhg_id = nhg_id;
+ ctx->u.macinfo.update_flags = update_flags;
}
/*
@@ -2970,7 +3036,9 @@ mac_update_common(enum dplane_op_e op,
vlanid_t vid,
const struct ethaddr *mac,
struct in_addr vtep_ip,
- bool sticky)
+ bool sticky,
+ uint32_t nhg_id,
+ uint32_t update_flags)
{
enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
int ret;
@@ -2990,7 +3058,8 @@ mac_update_common(enum dplane_op_e op,
ctx->zd_op = op;
/* Common init for the ctx */
- dplane_mac_init(ctx, ifp, br_ifp, vid, mac, vtep_ip, sticky);
+ dplane_mac_init(ctx, ifp, br_ifp, vid, mac, vtep_ip, sticky,
+ nhg_id, update_flags);
/* Enqueue for processing on the dplane pthread */
ret = dplane_update_enqueue(ctx);
@@ -3014,15 +3083,56 @@ mac_update_common(enum dplane_op_e op,
/*
* Enqueue evpn neighbor add for the dataplane.
*/
-enum zebra_dplane_result dplane_neigh_add(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_neigh_add(const struct interface *ifp,
const struct ipaddr *ip,
const struct ethaddr *mac,
- uint32_t flags)
+ uint32_t flags, bool was_static)
{
enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
+ uint32_t update_flags = 0;
+
+ update_flags |= DPLANE_NEIGH_REMOTE;
+
+ if (was_static)
+ update_flags |= DPLANE_NEIGH_WAS_STATIC;
result = neigh_update_internal(DPLANE_OP_NEIGH_INSTALL,
- ifp, mac, ip, flags, DPLANE_NUD_NOARP);
+ ifp, mac, ip, flags, DPLANE_NUD_NOARP,
+ update_flags);
+
+ return result;
+}
+
+/*
+ * Enqueue local neighbor add for the dataplane.
+ */
+enum zebra_dplane_result dplane_local_neigh_add(const struct interface *ifp,
+ const struct ipaddr *ip,
+ const struct ethaddr *mac,
+ bool set_router, bool set_static,
+ bool set_inactive)
+{
+ enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
+ uint32_t update_flags = 0;
+ uint32_t ntf = 0;
+ uint16_t state;
+
+ if (set_static)
+ update_flags |= DPLANE_NEIGH_SET_STATIC;
+
+ if (set_inactive) {
+ update_flags |= DPLANE_NEIGH_SET_INACTIVE;
+ state = DPLANE_NUD_STALE;
+ } else {
+ state = DPLANE_NUD_REACHABLE;
+ }
+
+ if (set_router)
+ ntf |= DPLANE_NTF_ROUTER;
+
+ result = neigh_update_internal(DPLANE_OP_NEIGH_INSTALL,
+ ifp, mac, ip, ntf,
+ state, update_flags);
return result;
}
@@ -3030,14 +3140,18 @@ enum zebra_dplane_result dplane_neigh_add(const struct interface *ifp,
/*
* Enqueue evpn neighbor update for the dataplane.
*/
-enum zebra_dplane_result dplane_neigh_update(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_neigh_update(const struct interface *ifp,
const struct ipaddr *ip,
const struct ethaddr *mac)
{
enum zebra_dplane_result result;
+ uint32_t update_flags = 0;
+
+ update_flags |= DPLANE_NEIGH_REMOTE;
result = neigh_update_internal(DPLANE_OP_NEIGH_UPDATE,
- ifp, mac, ip, 0, DPLANE_NUD_PROBE);
+ ifp, mac, ip, 0, DPLANE_NUD_PROBE,
+ update_flags);
return result;
}
@@ -3045,13 +3159,16 @@ enum zebra_dplane_result dplane_neigh_update(const struct interface *ifp,
/*
* Enqueue evpn neighbor delete for the dataplane.
*/
-enum zebra_dplane_result dplane_neigh_delete(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_neigh_delete(const struct interface *ifp,
const struct ipaddr *ip)
{
enum zebra_dplane_result result;
+ uint32_t update_flags = 0;
+
+ update_flags |= DPLANE_NEIGH_REMOTE;
result = neigh_update_internal(DPLANE_OP_NEIGH_DELETE,
- ifp, NULL, ip, 0, 0);
+ ifp, NULL, ip, 0, 0, update_flags);
return result;
}
@@ -3075,7 +3192,7 @@ enum zebra_dplane_result dplane_vtep_add(const struct interface *ifp,
addr.ipaddr_v4 = *ip;
result = neigh_update_internal(DPLANE_OP_VTEP_ADD,
- ifp, &mac, &addr, 0, 0);
+ ifp, &mac, &addr, 0, 0, 0);
return result;
}
@@ -3100,7 +3217,7 @@ enum zebra_dplane_result dplane_vtep_delete(const struct interface *ifp,
addr.ipaddr_v4 = *ip;
result = neigh_update_internal(DPLANE_OP_VTEP_DELETE,
- ifp, &mac, &addr, 0, 0);
+ ifp, &mac, &addr, 0, 0, 0);
return result;
}
@@ -3113,7 +3230,8 @@ neigh_update_internal(enum dplane_op_e op,
const struct interface *ifp,
const struct ethaddr *mac,
const struct ipaddr *ip,
- uint32_t flags, uint16_t state)
+ uint32_t flags, uint16_t state,
+ uint32_t update_flags)
{
enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
int ret;
@@ -3150,6 +3268,7 @@ neigh_update_internal(enum dplane_op_e op,
ctx->u.neigh.mac = *mac;
ctx->u.neigh.flags = flags;
ctx->u.neigh.state = state;
+ ctx->u.neigh.update_flags = update_flags;
/* Enqueue for processing on the dplane pthread */
ret = dplane_update_enqueue(ctx);
diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
index 0fa21f620d..32032ed77d 100644
--- a/zebra/zebra_dplane.h
+++ b/zebra/zebra_dplane.h
@@ -168,6 +168,18 @@ enum dplane_op_e {
#define DPLANE_NUD_NOARP 0x04
#define DPLANE_NUD_PROBE 0x08
+/* MAC update flags - dplane_mac_info.update_flags */
+#define DPLANE_MAC_REMOTE (1 << 0)
+#define DPLANE_MAC_WAS_STATIC (1 << 1)
+#define DPLANE_MAC_SET_STATIC (1 << 2)
+#define DPLANE_MAC_SET_INACTIVE (1 << 3)
+
+/* Neigh update flags - dplane_neigh_info.update_flags */
+#define DPLANE_NEIGH_REMOTE (1 << 0)
+#define DPLANE_NEIGH_WAS_STATIC (1 << 1)
+#define DPLANE_NEIGH_SET_STATIC (1 << 2)
+#define DPLANE_NEIGH_SET_INACTIVE (1 << 3)
+
/* Enable system route notifications */
void dplane_enable_sys_route_notifs(void);
@@ -386,6 +398,8 @@ const char *dplane_ctx_get_intf_label(const struct zebra_dplane_ctx *ctx);
/* Accessors for MAC information */
vlanid_t dplane_ctx_mac_get_vlan(const struct zebra_dplane_ctx *ctx);
bool dplane_ctx_mac_is_sticky(const struct zebra_dplane_ctx *ctx);
+uint32_t dplane_ctx_mac_get_update_flags(const struct zebra_dplane_ctx *ctx);
+uint32_t dplane_ctx_mac_get_nhg_id(const struct zebra_dplane_ctx *ctx);
const struct ethaddr *dplane_ctx_mac_get_addr(
const struct zebra_dplane_ctx *ctx);
const struct in_addr *dplane_ctx_mac_get_vtep_ip(
@@ -399,6 +413,7 @@ const struct ethaddr *dplane_ctx_neigh_get_mac(
const struct zebra_dplane_ctx *ctx);
uint32_t dplane_ctx_neigh_get_flags(const struct zebra_dplane_ctx *ctx);
uint16_t dplane_ctx_neigh_get_state(const struct zebra_dplane_ctx *ctx);
+uint32_t dplane_ctx_neigh_get_update_flags(const struct zebra_dplane_ctx *ctx);
/* Accessors for policy based routing rule information */
int dplane_ctx_rule_get_sock(const struct zebra_dplane_ctx *ctx);
@@ -497,20 +512,24 @@ enum zebra_dplane_result dplane_intf_addr_unset(const struct interface *ifp,
/*
* Enqueue evpn mac operations for the dataplane.
*/
-extern struct zebra_dplane_ctx *mac_update_internal(
- enum dplane_op_e op, const struct interface *ifp,
- const struct interface *br_ifp,
- vlanid_t vid, const struct ethaddr *mac,
- struct in_addr vtep_ip, bool sticky);
-
-enum zebra_dplane_result dplane_mac_add(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_mac_add(const struct interface *ifp,
const struct interface *bridge_ifp,
vlanid_t vid,
const struct ethaddr *mac,
struct in_addr vtep_ip,
- bool sticky);
+ bool sticky,
+ uint32_t nhg_id,
+ bool was_static);
+
+enum zebra_dplane_result dplane_local_mac_add(const struct interface *ifp,
+ const struct interface *bridge_ifp,
+ vlanid_t vid,
+ const struct ethaddr *mac,
+ bool sticky,
+ uint32_t set_static,
+ uint32_t set_inactive);
-enum zebra_dplane_result dplane_mac_del(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_mac_del(const struct interface *ifp,
const struct interface *bridge_ifp,
vlanid_t vid,
const struct ethaddr *mac,
@@ -523,19 +542,25 @@ void dplane_mac_init(struct zebra_dplane_ctx *ctx,
vlanid_t vid,
const struct ethaddr *mac,
struct in_addr vtep_ip,
- bool sticky);
+ bool sticky,
+ uint32_t nhg_id, uint32_t update_flags);
/*
* Enqueue evpn neighbor updates for the dataplane.
*/
-enum zebra_dplane_result dplane_neigh_add(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_neigh_add(const struct interface *ifp,
+ const struct ipaddr *ip,
+ const struct ethaddr *mac,
+ uint32_t flags, bool was_static);
+enum zebra_dplane_result dplane_local_neigh_add(const struct interface *ifp,
const struct ipaddr *ip,
const struct ethaddr *mac,
- uint32_t flags);
-enum zebra_dplane_result dplane_neigh_update(const struct interface *ifp,
+ bool set_router, bool set_static,
+ bool set_inactive);
+enum zebra_dplane_result dplane_rem_neigh_update(const struct interface *ifp,
const struct ipaddr *ip,
const struct ethaddr *mac);
-enum zebra_dplane_result dplane_neigh_delete(const struct interface *ifp,
+enum zebra_dplane_result dplane_rem_neigh_delete(const struct interface *ifp,
const struct ipaddr *ip);
/*
diff --git a/zebra/zebra_errors.h b/zebra/zebra_errors.h
index 5f2a7a12c6..03953ed17f 100644
--- a/zebra/zebra_errors.h
+++ b/zebra/zebra_errors.h
@@ -134,6 +134,7 @@ enum zebra_log_refs {
EC_ZEBRA_BAD_NHG_MESSAGE,
EC_ZEBRA_DUPLICATE_NHG_MESSAGE,
EC_ZEBRA_VRF_MISCONFIGURED,
+ EC_ZEBRA_ES_CREATE,
};
void zebra_error_init(void);
diff --git a/zebra/zebra_evpn_mh.c b/zebra/zebra_evpn_mh.c
new file mode 100644
index 0000000000..fae36ec6fa
--- /dev/null
+++ b/zebra/zebra_evpn_mh.c
@@ -0,0 +1,2145 @@
+/*
+ * Zebra EVPN multihoming code
+ *
+ * Copyright (C) 2019 Cumulus Networks, Inc.
+ * Anuradha Karuppiah
+ *
+ * This file is part of FRR.
+ *
+ * FRR is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRR is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "hash.h"
+#include "if.h"
+#include "jhash.h"
+#include "linklist.h"
+#include "log.h"
+#include "memory.h"
+#include "prefix.h"
+#include "stream.h"
+#include "table.h"
+#include "vlan.h"
+#include "vxlan.h"
+
+#include "zebra/zebra_router.h"
+#include "zebra/debug.h"
+#include "zebra/interface.h"
+#include "zebra/rib.h"
+#include "zebra/rt.h"
+#include "zebra/rt_netlink.h"
+#include "zebra/zebra_errors.h"
+#include "zebra/zebra_l2.h"
+#include "zebra/zebra_memory.h"
+#include "zebra/zebra_ns.h"
+#include "zebra/zebra_vrf.h"
+#include "zebra/zebra_vxlan.h"
+#include "zebra/zebra_vxlan_private.h"
+#include "zebra/zebra_router.h"
+#include "zebra/zebra_evpn_mh.h"
+#include "zebra/zebra_nhg.h"
+
+DEFINE_MTYPE_STATIC(ZEBRA, ZACC_BD, "Access Broadcast Domain");
+DEFINE_MTYPE_STATIC(ZEBRA, ZES, "Ethernet Segment");
+DEFINE_MTYPE_STATIC(ZEBRA, ZES_EVI, "ES info per-EVI");
+DEFINE_MTYPE_STATIC(ZEBRA, ZMH_INFO, "MH global info");
+DEFINE_MTYPE_STATIC(ZEBRA, ZES_VTEP, "VTEP attached to the ES");
+
+static void zebra_evpn_es_get_one_base_vni(void);
+static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es,
+ zebra_vni_t *vni, bool add);
+static void zebra_evpn_local_es_del(struct zebra_evpn_es *es);
+static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid,
+ struct ethaddr *sysmac);
+
+esi_t zero_esi_buf, *zero_esi = &zero_esi_buf;
+
+/*****************************************************************************/
+/* Ethernet Segment to EVI association -
+ * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI
+ * (zebra_vni_t.es_evi_rb_tree).
+ * 2. Each local ES-EVI entry is sent to BGP which advertises it as an
+ * EAD-EVI (Type-1 EVPN) route
+ * 3. Local ES-EVI setup is re-evaluated on the following triggers -
+ * a. When an ESI is set or cleared on an access port.
+ * b. When an access port associated with an ESI is deleted.
+ * c. When VLAN member ship changes on an access port.
+ * d. When a VXLAN_IF is set or cleared on an access broadcast domain.
+ * e. When a L2-VNI is added or deleted for a VxLAN_IF.
+ * 4. Currently zebra doesn't remote ES-EVIs. Those are managed and maintained
+ * entirely in BGP which consolidates them into a remote ES. The remote ES
+ * is then sent to zebra which allocates a NHG for it.
+ */
+
+/* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */
+static int zebra_es_evi_rb_cmp(const struct zebra_evpn_es_evi *es_evi1,
+ const struct zebra_evpn_es_evi *es_evi2)
+{
+ return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES);
+}
+RB_GENERATE(zebra_es_evi_rb_head, zebra_evpn_es_evi,
+ rb_node, zebra_es_evi_rb_cmp);
+
+/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES
+ * tables.
+ */
+static struct zebra_evpn_es_evi *zebra_evpn_es_evi_new(struct zebra_evpn_es *es,
+ zebra_vni_t *zvni)
+{
+ struct zebra_evpn_es_evi *es_evi;
+
+ es_evi = XCALLOC(MTYPE_ZES_EVI, sizeof(struct zebra_evpn_es_evi));
+
+ es_evi->es = es;
+ es_evi->zvni = zvni;
+
+ /* insert into the VNI-ESI rb tree */
+ if (RB_INSERT(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, es_evi)) {
+ XFREE(MTYPE_ZES_EVI, es_evi);
+ return NULL;
+ }
+
+ /* add to the ES's VNI list */
+ listnode_init(&es_evi->es_listnode, es_evi);
+ listnode_add(es->es_evi_list, &es_evi->es_listnode);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s evi %d new",
+ es_evi->es->esi_str, es_evi->zvni->vni);
+
+ return es_evi;
+}
+
+/* returns TRUE if the VNI is ready to be sent to BGP */
+static inline bool zebra_evpn_vni_send_to_client_ok(zebra_vni_t *zvni)
+{
+ return !!(zvni->flags & ZVNI_READY_FOR_BGP);
+}
+
+/* Evaluate if the es_evi is ready to be sent BGP -
+ * 1. If it is ready an add is sent to BGP
+ * 2. If it is not ready a del is sent (if the ES had been previously added
+ * to BGP).
+ */
+static void zebra_evpn_es_evi_re_eval_send_to_client(
+ struct zebra_evpn_es_evi *es_evi)
+{
+ bool old_ready;
+ bool new_ready;
+
+ old_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP);
+
+ /* ES and L2-VNI have to be individually ready for BGP */
+ if ((es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL) &&
+ (es_evi->es->flags & ZEBRA_EVPNES_READY_FOR_BGP) &&
+ zebra_evpn_vni_send_to_client_ok(es_evi->zvni))
+ es_evi->flags |= ZEBRA_EVPNES_EVI_READY_FOR_BGP;
+ else
+ es_evi->flags &= ~ZEBRA_EVPNES_EVI_READY_FOR_BGP;
+
+ new_ready = !!(es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP);
+
+ if (old_ready == new_ready)
+ return;
+
+ if (new_ready)
+ zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zvni,
+ true /* add */);
+ else
+ zebra_evpn_es_evi_send_to_client(es_evi->es, es_evi->zvni,
+ false /* add */);
+}
+
+/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free
+ * up the memory.
+ */
+static void zebra_evpn_es_evi_free(struct zebra_evpn_es_evi *es_evi)
+{
+ struct zebra_evpn_es *es = es_evi->es;
+ zebra_vni_t *zvni = es_evi->zvni;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s evi %d free",
+ es_evi->es->esi_str, es_evi->zvni->vni);
+
+ /* remove from the ES's VNI list */
+ list_delete_node(es->es_evi_list, &es_evi->es_listnode);
+
+ /* remove from the VNI-ESI rb tree */
+ RB_REMOVE(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, es_evi);
+
+ /* remove from the VNI-ESI rb tree */
+ XFREE(MTYPE_ZES_EVI, es_evi);
+}
+
+/* find the ES-EVI in the per-L2-VNI RB tree */
+static struct zebra_evpn_es_evi *zebra_evpn_es_evi_find(
+ struct zebra_evpn_es *es, zebra_vni_t *zvni)
+{
+ struct zebra_evpn_es_evi es_evi;
+
+ es_evi.es = es;
+
+ return RB_FIND(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree, &es_evi);
+}
+
+/* Tell BGP about an ES-EVI deletion and then delete it */
+static void zebra_evpn_local_es_evi_do_del(struct zebra_evpn_es_evi *es_evi)
+{
+ if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL))
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("local es %s evi %d del",
+ es_evi->es->esi_str, es_evi->zvni->vni);
+
+ if (es_evi->flags & ZEBRA_EVPNES_EVI_READY_FOR_BGP) {
+ /* send a del only if add was sent for it earlier */
+ zebra_evpn_es_evi_send_to_client(es_evi->es,
+ es_evi->zvni, false /* add */);
+ }
+
+ /* delete it from the VNI's local list */
+ list_delete_node(es_evi->zvni->local_es_evi_list,
+ &es_evi->l2vni_listnode);
+
+ es_evi->flags &= ~ZEBRA_EVPNES_EVI_LOCAL;
+ zebra_evpn_es_evi_free(es_evi);
+}
+static void zebra_evpn_local_es_evi_del(struct zebra_evpn_es *es,
+ zebra_vni_t *zvni)
+{
+ struct zebra_evpn_es_evi *es_evi;
+
+ es_evi = zebra_evpn_es_evi_find(es, zvni);
+ if (es_evi)
+ zebra_evpn_local_es_evi_do_del(es_evi);
+}
+
+/* Create an ES-EVI if it doesn't already exist and tell BGP */
+static void zebra_evpn_local_es_evi_add(struct zebra_evpn_es *es,
+ zebra_vni_t *zvni)
+{
+ struct zebra_evpn_es_evi *es_evi;
+
+ es_evi = zebra_evpn_es_evi_find(es, zvni);
+ if (!es_evi) {
+ es_evi = zebra_evpn_es_evi_new(es, zvni);
+ if (!es_evi)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("local es %s evi %d add",
+ es_evi->es->esi_str, es_evi->zvni->vni);
+ es_evi->flags |= ZEBRA_EVPNES_EVI_LOCAL;
+ /* add to the VNI's local list */
+ listnode_init(&es_evi->l2vni_listnode, es_evi);
+ listnode_add(zvni->local_es_evi_list, &es_evi->l2vni_listnode);
+
+ zebra_evpn_es_evi_re_eval_send_to_client(es_evi);
+ }
+}
+
+static void zebra_evpn_es_evi_show_entry(struct vty *vty,
+ struct zebra_evpn_es_evi *es_evi, json_object *json)
+{
+ char type_str[4];
+
+ if (json) {
+ /* XXX */
+ } else {
+ type_str[0] = '\0';
+ if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+
+ vty_out(vty, "%-8d %-30s %-4s\n",
+ es_evi->zvni->vni, es_evi->es->esi_str,
+ type_str);
+ }
+}
+
+static void zebra_evpn_es_evi_show_entry_detail(struct vty *vty,
+ struct zebra_evpn_es_evi *es_evi, json_object *json)
+{
+ char type_str[4];
+
+ if (json) {
+ /* XXX */
+ } else {
+ type_str[0] = '\0';
+ if (es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+
+ vty_out(vty, "VNI %d ESI: %s\n",
+ es_evi->zvni->vni, es_evi->es->esi_str);
+ vty_out(vty, " Type: %s\n", type_str);
+ vty_out(vty, " Ready for BGP: %s\n",
+ (es_evi->flags &
+ ZEBRA_EVPNES_EVI_READY_FOR_BGP) ?
+ "yes" : "no");
+ vty_out(vty, "\n");
+ }
+}
+
+static void zebra_evpn_es_evi_show_one_vni(zebra_vni_t *zvni,
+ struct vty *vty, json_object *json, int detail)
+{
+ struct zebra_evpn_es_evi *es_evi;
+
+ RB_FOREACH(es_evi, zebra_es_evi_rb_head, &zvni->es_evi_rb_tree) {
+ if (detail)
+ zebra_evpn_es_evi_show_entry_detail(vty, es_evi, json);
+ else
+ zebra_evpn_es_evi_show_entry(vty, es_evi, json);
+ }
+}
+
+struct evpn_mh_show_ctx {
+ struct vty *vty;
+ json_object *json;
+ int detail;
+};
+
+static void zebra_evpn_es_evi_show_one_vni_hash_cb(struct hash_bucket *bucket,
+ void *ctxt)
+{
+ zebra_vni_t *zvni = (zebra_vni_t *)bucket->data;
+ struct evpn_mh_show_ctx *wctx = (struct evpn_mh_show_ctx *)ctxt;
+
+ zebra_evpn_es_evi_show_one_vni(zvni, wctx->vty,
+ wctx->json, wctx->detail);
+}
+
+void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail)
+{
+ json_object *json = NULL;
+ struct zebra_vrf *zvrf;
+ struct evpn_mh_show_ctx wctx;
+
+ zvrf = zebra_vrf_get_evpn();
+
+ memset(&wctx, 0, sizeof(wctx));
+ wctx.vty = vty;
+ wctx.json = json;
+ wctx.detail = detail;
+
+ if (!detail && !json) {
+ vty_out(vty, "Type: L local, R remote\n");
+ vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type");
+ }
+ /* Display all L2-VNIs */
+ hash_iterate(zvrf->vni_table, zebra_evpn_es_evi_show_one_vni_hash_cb,
+ &wctx);
+}
+
+void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, vni_t vni, int detail)
+{
+ json_object *json = NULL;
+ zebra_vni_t *zvni;
+
+ zvni = zvni_lookup(vni);
+ if (zvni) {
+ if (!detail && !json) {
+ vty_out(vty, "Type: L local, R remote\n");
+ vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type");
+ }
+ } else {
+ if (!uj)
+ vty_out(vty, "VNI %d doesn't exist\n", vni);
+ }
+ zebra_evpn_es_evi_show_one_vni(zvni, vty, json, detail);
+}
+
+/* Initialize the ES tables maintained per-L2_VNI */
+void zebra_evpn_vni_es_init(zebra_vni_t *zvni)
+{
+ /* Initialize the ES-EVI RB tree */
+ RB_INIT(zebra_es_evi_rb_head, &zvni->es_evi_rb_tree);
+
+ /* Initialize the local and remote ES lists maintained for quick
+ * walks by type
+ */
+ zvni->local_es_evi_list = list_new();
+ listset_app_node_mem(zvni->local_es_evi_list);
+}
+
+/* Cleanup the ES info maintained per-L2_VNI */
+void zebra_evpn_vni_es_cleanup(zebra_vni_t *zvni)
+{
+ struct zebra_evpn_es_evi *es_evi;
+ struct zebra_evpn_es_evi *es_evi_next;
+
+ RB_FOREACH_SAFE(es_evi, zebra_es_evi_rb_head,
+ &zvni->es_evi_rb_tree, es_evi_next) {
+ zebra_evpn_local_es_evi_do_del(es_evi);
+ }
+
+ list_delete(&zvni->local_es_evi_list);
+ zebra_evpn_es_clear_base_vni(zvni);
+}
+
+/* called when the oper state or bridge membership changes for the
+ * vxlan device
+ */
+void zebra_evpn_vni_update_all_es(zebra_vni_t *zvni)
+{
+ struct zebra_evpn_es_evi *es_evi;
+ struct listnode *node;
+
+ /* the VNI is now elgible as a base for EVPN-MH */
+ if (zebra_evpn_vni_send_to_client_ok(zvni))
+ zebra_evpn_es_set_base_vni(zvni);
+ else
+ zebra_evpn_es_clear_base_vni(zvni);
+
+ for (ALL_LIST_ELEMENTS_RO(zvni->local_es_evi_list, node, es_evi))
+ zebra_evpn_es_evi_re_eval_send_to_client(es_evi);
+}
+
+/*****************************************************************************/
+/* Access broadcast domains (BD)
+ * 1. These broadcast domains can be VLAN aware (in which case
+ * the key is VID) or VLAN unaware (in which case the key is
+ * 2. A VID-BD is created when a VLAN is associated with an access port or
+ * when the VLAN is associated with VXLAN_IF
+ * 3. A BD is translated into ES-EVI entries when a VNI is associated
+ * with the broadcast domain
+ */
+/* Hash key for VLAN based broadcast domains */
+static unsigned int zebra_evpn_acc_vl_hash_keymake(const void *p)
+{
+ const struct zebra_evpn_access_bd *acc_bd = p;
+
+ return jhash_1word(acc_bd->vid, 0);
+}
+
+/* Compare two VLAN based broadcast domains */
+static bool zebra_evpn_acc_vl_cmp(const void *p1, const void *p2)
+{
+ const struct zebra_evpn_access_bd *acc_bd1 = p1;
+ const struct zebra_evpn_access_bd *acc_bd2 = p2;
+
+ if (acc_bd1 == NULL && acc_bd2 == NULL)
+ return true;
+
+ if (acc_bd1 == NULL || acc_bd2 == NULL)
+ return false;
+
+ return (acc_bd1->vid == acc_bd2->vid);
+}
+
+/* Lookup VLAN based broadcast domain */
+static struct zebra_evpn_access_bd *zebra_evpn_acc_vl_find(vlanid_t vid)
+{
+ struct zebra_evpn_access_bd *acc_bd;
+ struct zebra_evpn_access_bd tmp;
+
+ tmp.vid = vid;
+ acc_bd = hash_lookup(zmh_info->evpn_vlan_table, &tmp);
+
+ return acc_bd;
+}
+
+/* A new broadcast domain can be created when a VLAN member or VLAN<=>VxLAN_IF
+ * mapping is added.
+ */
+static struct zebra_evpn_access_bd *zebra_evpn_acc_vl_new(vlanid_t vid)
+{
+ struct zebra_evpn_access_bd *acc_bd;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("access vlan %d add", vid);
+
+ acc_bd = XCALLOC(MTYPE_ZACC_BD, sizeof(struct zebra_evpn_access_bd));
+
+ acc_bd->vid = vid;
+
+ /* Initialize the mbr list */
+ acc_bd->mbr_zifs = list_new();
+
+ /* Add to hash */
+ if (!hash_get(zmh_info->evpn_vlan_table, acc_bd, hash_alloc_intern)) {
+ XFREE(MTYPE_ZACC_BD, acc_bd);
+ return NULL;
+ }
+
+ return acc_bd;
+}
+
+/* Free VLAN based broadcast domain -
+ * This just frees appropriate memory, caller should have taken other
+ * needed actions.
+ */
+static void zebra_evpn_acc_vl_free(struct zebra_evpn_access_bd *acc_bd)
+{
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("access vlan %d del", acc_bd->vid);
+
+ /* cleanup resources maintained against the ES */
+ list_delete(&acc_bd->mbr_zifs);
+
+ /* remove EVI from various tables */
+ hash_release(zmh_info->evpn_vlan_table, acc_bd);
+
+ XFREE(MTYPE_ZACC_BD, acc_bd);
+}
+
+static void zebra_evpn_acc_vl_cleanup_all(struct hash_bucket *bucket, void *arg)
+{
+ struct zebra_evpn_access_bd *acc_bd = bucket->data;
+
+ zebra_evpn_acc_vl_free(acc_bd);
+}
+
+/* called when a bd mbr is removed or VxLAN_IF is diassociated from the access
+ * VLAN
+ */
+static void zebra_evpn_acc_bd_free_on_deref(struct zebra_evpn_access_bd *acc_bd)
+{
+ if (!list_isempty(acc_bd->mbr_zifs) || acc_bd->vxlan_zif)
+ return;
+
+ /* if there are no references free the EVI */
+ zebra_evpn_acc_vl_free(acc_bd);
+}
+
+/* called when a EVPN-L2VNI is set or cleared against a BD */
+static void zebra_evpn_acc_bd_vni_set(struct zebra_evpn_access_bd *acc_bd,
+ zebra_vni_t *zvni, zebra_vni_t *old_zvni)
+{
+ struct zebra_if *zif;
+ struct listnode *node;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("access vlan %d l2-vni %u set",
+ acc_bd->vid, zvni ? zvni->vni : 0);
+
+ for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif)) {
+ if (!zif->es_info.es)
+ continue;
+
+ if (zvni)
+ zebra_evpn_local_es_evi_add(zif->es_info.es, zvni);
+ else if (old_zvni)
+ zebra_evpn_local_es_evi_del(zif->es_info.es, old_zvni);
+ }
+}
+
+/* handle VLAN->VxLAN_IF association */
+void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif)
+{
+ struct zebra_evpn_access_bd *acc_bd;
+ struct zebra_if *old_vxlan_zif;
+ zebra_vni_t *old_zvni;
+
+ if (!vid)
+ return;
+
+ acc_bd = zebra_evpn_acc_vl_find(vid);
+ if (!acc_bd)
+ acc_bd = zebra_evpn_acc_vl_new(vid);
+
+ old_vxlan_zif = acc_bd->vxlan_zif;
+ acc_bd->vxlan_zif = vxlan_zif;
+ if (vxlan_zif == old_vxlan_zif)
+ return;
+
+ old_zvni = acc_bd->zvni;
+ acc_bd->zvni = zvni_lookup(vxlan_zif->l2info.vxl.vni);
+ if (acc_bd->zvni == old_zvni)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("access vlan %d vni %u ref",
+ acc_bd->vid, vxlan_zif->l2info.vxl.vni);
+
+ if (old_zvni)
+ zebra_evpn_acc_bd_vni_set(acc_bd, NULL, old_zvni);
+
+ if (acc_bd->zvni)
+ zebra_evpn_acc_bd_vni_set(acc_bd, acc_bd->zvni, NULL);
+}
+
+/* handle VLAN->VxLAN_IF deref */
+void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif)
+{
+ struct zebra_evpn_access_bd *acc_bd;
+
+ if (!vid)
+ return;
+
+ acc_bd = zebra_evpn_acc_vl_find(vid);
+ if (!acc_bd)
+ return;
+
+ /* clear vxlan_if only if it matches */
+ if (acc_bd->vxlan_zif != vxlan_zif)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("access vlan %d vni %u deref",
+ acc_bd->vid, vxlan_zif->l2info.vxl.vni);
+
+ if (acc_bd->zvni)
+ zebra_evpn_acc_bd_vni_set(acc_bd, NULL, acc_bd->zvni);
+
+ acc_bd->zvni = NULL;
+ acc_bd->vxlan_zif = NULL;
+
+ /* if there are no other references the access_bd can be freed */
+ zebra_evpn_acc_bd_free_on_deref(acc_bd);
+}
+
+/* handle EVPN L2VNI add/del */
+void zebra_evpn_vxl_vni_set(struct zebra_if *zif, zebra_vni_t *zvni,
+ bool set)
+{
+ struct zebra_l2info_vxlan *vxl;
+ struct zebra_evpn_access_bd *acc_bd;
+
+ if (!zif)
+ return;
+
+ /* locate access_bd associated with the vxlan device */
+ vxl = &zif->l2info.vxl;
+ acc_bd = zebra_evpn_acc_vl_find(vxl->access_vlan);
+ if (!acc_bd)
+ return;
+
+ if (set) {
+ zebra_evpn_es_set_base_vni(zvni);
+ if (acc_bd->zvni != zvni) {
+ acc_bd->zvni = zvni;
+ zebra_evpn_acc_bd_vni_set(acc_bd, zvni, NULL);
+ }
+ } else {
+ if (acc_bd->zvni) {
+ zebra_vni_t *old_zvni = acc_bd->zvni;
+ acc_bd->zvni = NULL;
+ zebra_evpn_acc_bd_vni_set(acc_bd, NULL, old_zvni);
+ }
+ }
+}
+
+/* handle addition of new VLAN members */
+void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif)
+{
+ struct zebra_evpn_access_bd *acc_bd;
+
+ if (!vid)
+ return;
+
+ acc_bd = zebra_evpn_acc_vl_find(vid);
+ if (!acc_bd)
+ acc_bd = zebra_evpn_acc_vl_new(vid);
+
+ if (listnode_lookup(acc_bd->mbr_zifs, zif))
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("access vlan %d mbr %s ref",
+ vid, zif->ifp->name);
+
+ listnode_add(acc_bd->mbr_zifs, zif);
+ if (acc_bd->zvni && zif->es_info.es)
+ zebra_evpn_local_es_evi_add(zif->es_info.es, acc_bd->zvni);
+}
+
+/* handle deletion of VLAN members */
+void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif)
+{
+ struct zebra_evpn_access_bd *acc_bd;
+ struct listnode *node;
+
+ if (!vid)
+ return;
+
+ acc_bd = zebra_evpn_acc_vl_find(vid);
+ if (!acc_bd)
+ return;
+
+ node = listnode_lookup(acc_bd->mbr_zifs, zif);
+ if (!node)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("access vlan %d mbr %s deref",
+ vid, zif->ifp->name);
+
+ list_delete_node(acc_bd->mbr_zifs, node);
+
+ if (acc_bd->zvni && zif->es_info.es)
+ zebra_evpn_local_es_evi_del(zif->es_info.es, acc_bd->zvni);
+
+ /* if there are no other references the access_bd can be freed */
+ zebra_evpn_acc_bd_free_on_deref(acc_bd);
+}
+
+static void zebra_evpn_acc_vl_show_entry_detail(struct vty *vty,
+ struct zebra_evpn_access_bd *acc_bd, json_object *json)
+{
+ struct zebra_if *zif;
+ struct listnode *node;
+
+ if (json) {
+ /* XXX */
+ } else {
+ vty_out(vty, "VLAN: %u\n", acc_bd->vid);
+ vty_out(vty, " VxLAN Interface: %s\n",
+ acc_bd->vxlan_zif ?
+ acc_bd->vxlan_zif->ifp->name : "-");
+ vty_out(vty, " L2-VNI: %d\n",
+ acc_bd->zvni ? acc_bd->zvni->vni : 0);
+ vty_out(vty, " Member Count: %d\n",
+ listcount(acc_bd->mbr_zifs));
+ vty_out(vty, " Members: \n");
+ for (ALL_LIST_ELEMENTS_RO(acc_bd->mbr_zifs, node, zif))
+ vty_out(vty, " %s\n", zif->ifp->name);
+ vty_out(vty, "\n");
+ }
+}
+
+static void zebra_evpn_acc_vl_show_entry(struct vty *vty,
+ struct zebra_evpn_access_bd *acc_bd, json_object *json)
+{
+ if (!json)
+ vty_out(vty, "%-5u %21s %-8d %u\n",
+ acc_bd->vid,
+ acc_bd->vxlan_zif ?
+ acc_bd->vxlan_zif->ifp->name : "-",
+ acc_bd->zvni ? acc_bd->zvni->vni : 0,
+ listcount(acc_bd->mbr_zifs));
+}
+
+static void zebra_evpn_acc_vl_show_hash(struct hash_bucket *bucket, void *ctxt)
+{
+ struct evpn_mh_show_ctx *wctx = ctxt;
+ struct zebra_evpn_access_bd *acc_bd = bucket->data;
+
+ if (wctx->detail)
+ zebra_evpn_acc_vl_show_entry_detail(wctx->vty,
+ acc_bd, wctx->json);
+ else
+ zebra_evpn_acc_vl_show_entry(wctx->vty,
+ acc_bd, wctx->json);
+}
+
+void zebra_evpn_acc_vl_show(struct vty *vty, bool uj)
+{
+ json_object *json = NULL;
+ struct evpn_mh_show_ctx wctx;
+
+ memset(&wctx, 0, sizeof(wctx));
+ wctx.vty = vty;
+ wctx.json = json;
+ wctx.detail = false;
+
+ if (!json)
+ vty_out(vty, "%-5s %21s %-8s %s\n",
+ "VLAN", "VxLAN-IF", "L2-VNI", "# Members");
+
+ hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash,
+ &wctx);
+}
+
+void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj)
+{
+ json_object *json = NULL;
+ struct evpn_mh_show_ctx wctx;
+
+ memset(&wctx, 0, sizeof(wctx));
+ wctx.vty = vty;
+ wctx.json = json;
+ wctx.detail = true;
+
+ hash_iterate(zmh_info->evpn_vlan_table, zebra_evpn_acc_vl_show_hash,
+ &wctx);
+}
+
+void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid)
+{
+ json_object *json = NULL;
+ struct zebra_evpn_access_bd *acc_bd;
+
+ acc_bd = zebra_evpn_acc_vl_find(vid);
+ if (!acc_bd) {
+ if (!json) {
+ vty_out(vty, "VLAN %u not present\n", vid);
+ return;
+ }
+ }
+ zebra_evpn_acc_vl_show_entry_detail(vty, acc_bd, json);
+}
+
+/* Initialize VLAN member bitmap on an interface. Although VLAN membership
+ * is independent of EVPN we only process it if its of interest to EVPN-MH
+ * i.e. on access ports that can be setup as Ethernet Segments. And that is
+ * intended as an optimization.
+ */
+void zebra_evpn_if_init(struct zebra_if *zif)
+{
+ if (!zebra_evpn_is_if_es_capable(zif))
+ return;
+
+ if (!bf_is_inited(zif->vlan_bitmap))
+ bf_init(zif->vlan_bitmap, IF_VLAN_BITMAP_MAX);
+
+ /* if an es_id and sysmac are already present against the interface
+ * activate it
+ */
+ zebra_evpn_local_es_update(zif, zif->es_info.lid, &zif->es_info.sysmac);
+}
+
+/* handle deletion of an access port by removing it from all associated
+ * broadcast domains.
+ */
+void zebra_evpn_if_cleanup(struct zebra_if *zif)
+{
+ vlanid_t vid;
+
+ if (!bf_is_inited(zif->vlan_bitmap))
+ return;
+
+ bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) {
+ zebra_evpn_vl_mbr_deref(vid, zif);
+ }
+
+ bf_free(zif->vlan_bitmap);
+
+ /* Delete associated Ethernet Segment */
+ if (zif->es_info.es)
+ zebra_evpn_local_es_del(zif->es_info.es);
+}
+
+/*****************************************************************************
+ * L2 NH/NHG Management
+ * A L2 NH entry is programmed in the kernel for every ES-VTEP entry. This
+ * NH is then added to the L2-ECMP-NHG associated with the ES.
+ */
+static uint32_t zebra_evpn_nhid_alloc(bool is_nhg)
+{
+ uint32_t id;
+ int type;
+
+ bf_assign_index(zmh_info->nh_id_bitmap, id);
+
+ if (!id)
+ return 0;
+
+ type = is_nhg ? EVPN_NHG_ID_TYPE_BIT : EVPN_NH_ID_TYPE_BIT;
+ return (id | type);
+}
+
+static void zebra_evpn_nhid_free(uint32_t nh_id)
+{
+ uint32_t id = (nh_id & EVPN_NH_ID_VAL_MASK);
+
+ if (!id)
+ return;
+
+ bf_release_index(zmh_info->nh_id_bitmap, id);
+}
+
+/* The MAC ECMP group is activated on the first VTEP */
+static void zebra_evpn_nhg_update(struct zebra_evpn_es *es)
+{
+ uint32_t nh_cnt = 0;
+ struct nh_grp nh_ids[ES_VTEP_MAX_CNT];
+ struct zebra_evpn_es_vtep *es_vtep;
+ struct listnode *node;
+
+ if (!es->nhg_id)
+ return;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ if (!es_vtep->nh_id)
+ continue;
+
+ if (nh_cnt >= ES_VTEP_MAX_CNT)
+ break;
+
+ memset(&nh_ids[nh_cnt], 0, sizeof(struct nh_grp));
+ nh_ids[nh_cnt].id = es_vtep->nh_id;
+ ++nh_cnt;
+ }
+
+ if (nh_cnt) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NH) {
+ char nh_str[ES_VTEP_LIST_STR_SZ];
+ uint32_t i;
+ char nh_buf[16];
+
+ nh_str[0] = '\0';
+ for (i = 0; i < nh_cnt; ++i) {
+ snprintf(nh_buf, sizeof(nh_buf), "%u ",
+ nh_ids[i].id);
+ strlcat(nh_str, nh_buf, sizeof(nh_str));
+ }
+ zlog_debug("es %s nhg 0x%x add %s",
+ es->esi_str, es->nhg_id, nh_str);
+ }
+
+ es->flags |= ZEBRA_EVPNES_NHG_ACTIVE;
+ kernel_upd_mac_nhg(es->nhg_id, nh_cnt, nh_ids);
+ } else {
+ if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NH)
+ zlog_debug("es %s nhg 0x%x del",
+ es->esi_str, es->nhg_id);
+ es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE;
+ kernel_del_mac_nhg(es->nhg_id);
+ }
+ }
+
+ /* XXX - update remote macs associated with the ES */
+}
+
+static void zebra_evpn_nh_add(struct zebra_evpn_es_vtep *es_vtep)
+{
+ if (es_vtep->nh_id)
+ return;
+
+ es_vtep->nh_id = zebra_evpn_nhid_alloc(false);
+
+ if (!es_vtep->nh_id)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NH)
+ zlog_debug("es %s vtep %s nh 0x%x add",
+ es_vtep->es->esi_str,
+ inet_ntoa(es_vtep->vtep_ip), es_vtep->nh_id);
+ /* install the NH */
+ kernel_upd_mac_nh(es_vtep->nh_id, es_vtep->vtep_ip);
+ /* add the NH to the parent NHG */
+ zebra_evpn_nhg_update(es_vtep->es);
+}
+
+static void zebra_evpn_nh_del(struct zebra_evpn_es_vtep *es_vtep)
+{
+ uint32_t nh_id;
+
+ if (!es_vtep->nh_id)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NH)
+ zlog_debug("es %s vtep %s nh 0x%x del",
+ es_vtep->es->esi_str,
+ inet_ntoa(es_vtep->vtep_ip), es_vtep->nh_id);
+
+ nh_id = es_vtep->nh_id;
+ es_vtep->nh_id = 0;
+
+ /* remove the NH from the parent NHG */
+ zebra_evpn_nhg_update(es_vtep->es);
+ /* uninstall the NH */
+ kernel_del_mac_nh(nh_id);
+ zebra_evpn_nhid_free(nh_id);
+
+}
+
+/*****************************************************************************/
+/* Ethernet Segment Management
+ * 1. Ethernet Segment is a collection of links attached to the same
+ * server (MHD) or switch (MHN)
+ * 2. An Ethernet Segment can span multiple PEs and is identified by the
+ * 10-byte ES-ID.
+ * 3. Zebra manages the local ESI configuration.
+ * 4. It also maintains the aliasing that maps an ESI (local or remote)
+ * to one or more PEs/VTEPs.
+ * 5. remote ESs are added by BGP (on rxing EAD Type-1 routes)
+ */
+/* A list of remote VTEPs is maintained for each ES. This list includes -
+ * 1. VTEPs for which we have imported the ESR i.e. ES-peers
+ * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI
+ * have been imported into one or more VNIs
+ */
+static int zebra_evpn_es_vtep_cmp(void *p1, void *p2)
+{
+ const struct zebra_evpn_es_vtep *es_vtep1 = p1;
+ const struct zebra_evpn_es_vtep *es_vtep2 = p2;
+
+ return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr;
+}
+
+static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_new(
+ struct zebra_evpn_es *es, struct in_addr vtep_ip)
+{
+ struct zebra_evpn_es_vtep *es_vtep;
+
+ es_vtep = XCALLOC(MTYPE_ZES_VTEP, sizeof(*es_vtep));
+
+ es_vtep->es = es;
+ es_vtep->vtep_ip.s_addr = vtep_ip.s_addr;
+ listnode_init(&es_vtep->es_listnode, es_vtep);
+ listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode);
+
+ return es_vtep;
+}
+
+static void zebra_evpn_es_vtep_free(struct zebra_evpn_es_vtep *es_vtep)
+{
+ struct zebra_evpn_es *es = es_vtep->es;
+
+ list_delete_node(es->es_vtep_list, &es_vtep->es_listnode);
+ /* update the L2-NHG associated with the ES */
+ zebra_evpn_nh_del(es_vtep);
+ XFREE(MTYPE_ZES_VTEP, es_vtep);
+}
+
+
+/* check if VTEP is already part of the list */
+static struct zebra_evpn_es_vtep *zebra_evpn_es_vtep_find(
+ struct zebra_evpn_es *es, struct in_addr vtep_ip)
+{
+ struct listnode *node = NULL;
+ struct zebra_evpn_es_vtep *es_vtep;
+
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+ if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr)
+ return es_vtep;
+ }
+ return NULL;
+}
+
+static void zebra_evpn_es_vtep_add(struct zebra_evpn_es *es,
+ struct in_addr vtep_ip)
+{
+ struct zebra_evpn_es_vtep *es_vtep;
+
+ es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip);
+
+ if (!es_vtep) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s vtep %s add",
+ es->esi_str, inet_ntoa(vtep_ip));
+ es_vtep = zebra_evpn_es_vtep_new(es, vtep_ip);
+ /* update the L2-NHG associated with the ES */
+ zebra_evpn_nh_add(es_vtep);
+ }
+}
+
+static void zebra_evpn_es_vtep_del(struct zebra_evpn_es *es,
+ struct in_addr vtep_ip)
+{
+ struct zebra_evpn_es_vtep *es_vtep;
+
+ es_vtep = zebra_evpn_es_vtep_find(es, vtep_ip);
+
+ if (es_vtep) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s vtep %s del",
+ es->esi_str, inet_ntoa(vtep_ip));
+ zebra_evpn_es_vtep_free(es_vtep);
+ }
+}
+
+/* compare ES-IDs for the global ES RB tree */
+static int zebra_es_rb_cmp(const struct zebra_evpn_es *es1,
+ const struct zebra_evpn_es *es2)
+{
+ return memcmp(&es1->esi, &es2->esi, ESI_BYTES);
+}
+RB_GENERATE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp);
+
+/* Lookup ES */
+struct zebra_evpn_es *zebra_evpn_es_find(esi_t *esi)
+{
+ struct zebra_evpn_es tmp;
+
+ memcpy(&tmp.esi, esi, sizeof(esi_t));
+ return RB_FIND(zebra_es_rb_head, &zmh_info->es_rb_tree, &tmp);
+}
+
+/* A new local es is created when a local-es-id and sysmac is configured
+ * against an interface.
+ */
+static struct zebra_evpn_es *zebra_evpn_es_new(esi_t *esi)
+{
+ struct zebra_evpn_es *es;
+
+ es = XCALLOC(MTYPE_ZES, sizeof(struct zebra_evpn_es));
+
+ /* fill in ESI */
+ memcpy(&es->esi, esi, sizeof(esi_t));
+ esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str));
+
+ /* Add to rb_tree */
+ if (RB_INSERT(zebra_es_rb_head, &zmh_info->es_rb_tree, es)) {
+ XFREE(MTYPE_ZES, es);
+ return NULL;
+ }
+
+ /* Initialise the ES-EVI list */
+ es->es_evi_list = list_new();
+ listset_app_node_mem(es->es_evi_list);
+
+ /* Initialise the VTEP list */
+ es->es_vtep_list = list_new();
+ listset_app_node_mem(es->es_vtep_list);
+ es->es_vtep_list->cmp = zebra_evpn_es_vtep_cmp;
+
+ /* mac entries associated with the ES */
+ es->mac_list = list_new();
+ listset_app_node_mem(es->mac_list);
+
+ /* reserve a NHG */
+ es->nhg_id = zebra_evpn_nhid_alloc(true);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s nhg 0x%x new", es->esi_str, es->nhg_id);
+
+ return es;
+}
+
+/* Free a given ES -
+ * This just frees appropriate memory, caller should have taken other
+ * needed actions.
+ */
+static struct zebra_evpn_es *zebra_evpn_es_free(struct zebra_evpn_es *es)
+{
+ /* If the ES has a local or remote reference it cannot be freed.
+ * Free is also prevented if there are MAC entries referencing
+ * it.
+ */
+ if ((es->flags & (ZEBRA_EVPNES_LOCAL | ZEBRA_EVPNES_REMOTE)) ||
+ listcount(es->mac_list))
+ return es;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s free", es->esi_str);
+
+ /* If the NHG is still installed uninstall it and free the id */
+ if (es->flags & ZEBRA_EVPNES_NHG_ACTIVE) {
+ es->flags &= ~ZEBRA_EVPNES_NHG_ACTIVE;
+ kernel_del_mac_nhg(es->nhg_id);
+ }
+ zebra_evpn_nhid_free(es->nhg_id);
+
+ /* cleanup resources maintained against the ES */
+ list_delete(&es->es_evi_list);
+ list_delete(&es->es_vtep_list);
+ list_delete(&es->mac_list);
+
+ /* remove from the VNI-ESI rb tree */
+ RB_REMOVE(zebra_es_rb_head, &zmh_info->es_rb_tree, es);
+
+ XFREE(MTYPE_ZES, es);
+
+ return NULL;
+}
+
+/* Inform BGP about local ES addition */
+static int zebra_evpn_es_send_add_to_client(struct zebra_evpn_es *es)
+{
+ struct zserv *client;
+ struct stream *s;
+ uint8_t oper_up;
+
+ client = zserv_find_client(ZEBRA_ROUTE_BGP, 0);
+ /* BGP may not be running. */
+ if (!client)
+ return 0;
+
+ s = stream_new(ZEBRA_MAX_PACKET_SIZ);
+
+ zclient_create_header(s, ZEBRA_LOCAL_ES_ADD, zebra_vrf_get_evpn_id());
+ stream_put(s, &es->esi, sizeof(esi_t));
+ stream_put_ipv4(s, zmh_info->es_originator_ip.s_addr);
+ oper_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP);
+ stream_putc(s, oper_up);
+
+ /* Write packet size. */
+ stream_putw_at(s, 0, stream_get_endp(s));
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("send add local es %s %s to %s",
+ es->esi_str,
+ inet_ntoa(zmh_info->es_originator_ip),
+ zebra_route_string(client->proto));
+
+ client->local_es_add_cnt++;
+ return zserv_send_message(client, s);
+}
+
+/* Inform BGP about local ES deletion */
+static int zebra_evpn_es_send_del_to_client(struct zebra_evpn_es *es)
+{
+ struct zserv *client;
+ struct stream *s;
+
+ client = zserv_find_client(ZEBRA_ROUTE_BGP, 0);
+ /* BGP may not be running. */
+ if (!client)
+ return 0;
+
+ s = stream_new(ZEBRA_MAX_PACKET_SIZ);
+ stream_reset(s);
+
+ zclient_create_header(s, ZEBRA_LOCAL_ES_DEL, zebra_vrf_get_evpn_id());
+ stream_put(s, &es->esi, sizeof(esi_t));
+
+ /* Write packet size. */
+ stream_putw_at(s, 0, stream_get_endp(s));
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("send del local es %s to %s", es->esi_str,
+ zebra_route_string(client->proto));
+
+ client->local_es_del_cnt++;
+ return zserv_send_message(client, s);
+}
+
+/* XXX - call any time ZEBRA_EVPNES_LOCAL gets set or cleared */
+static void zebra_evpn_es_re_eval_send_to_client(struct zebra_evpn_es *es,
+ bool es_evi_re_reval)
+{
+ bool old_ready;
+ bool new_ready;
+ struct listnode *node;
+ struct zebra_evpn_es_evi *es_evi;
+
+ old_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP);
+
+ if ((es->flags & ZEBRA_EVPNES_LOCAL) &&
+ zmh_info->es_originator_ip.s_addr)
+ es->flags |= ZEBRA_EVPNES_READY_FOR_BGP;
+ else
+ es->flags &= ~ZEBRA_EVPNES_READY_FOR_BGP;
+
+ new_ready = !!(es->flags & ZEBRA_EVPNES_READY_FOR_BGP);
+ if (old_ready == new_ready)
+ return;
+
+ if (new_ready)
+ zebra_evpn_es_send_add_to_client(es);
+ else
+ zebra_evpn_es_send_del_to_client(es);
+
+ /* re-eval associated EVIs */
+ if (es_evi_re_reval) {
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, node, es_evi)) {
+ if (!(es_evi->flags & ZEBRA_EVPNES_EVI_LOCAL))
+ continue;
+ zebra_evpn_es_evi_re_eval_send_to_client(es_evi);
+ }
+ }
+}
+
+void zebra_evpn_es_send_all_to_client(bool add)
+{
+ struct listnode *es_node;
+ struct listnode *evi_node;
+ struct zebra_evpn_es *es;
+ struct zebra_evpn_es_evi *es_evi;
+
+ if (!zmh_info)
+ return;
+
+ for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, es_node, es)) {
+ if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) {
+ if (add)
+ zebra_evpn_es_send_add_to_client(es);
+ for (ALL_LIST_ELEMENTS_RO(es->es_evi_list,
+ evi_node, es_evi)) {
+ if (!(es_evi->flags &
+ ZEBRA_EVPNES_EVI_READY_FOR_BGP))
+ continue;
+
+ if (add)
+ zebra_evpn_es_evi_send_to_client(
+ es, es_evi->zvni,
+ true /* add */);
+ else
+ zebra_evpn_es_evi_send_to_client(
+ es, es_evi->zvni,
+ false /* add */);
+ }
+ if (!add)
+ zebra_evpn_es_send_del_to_client(es);
+ }
+ }
+}
+
+/* walk the vlan bitmap associated with the zif and create or delete
+ * es_evis for all vlans associated with a VNI.
+ * XXX: This API is really expensive. optimize later if possible.
+ */
+static void zebra_evpn_es_setup_evis(struct zebra_evpn_es *es)
+{
+ struct zebra_if *zif = es->zif;
+ uint16_t vid;
+ struct zebra_evpn_access_bd *acc_bd;
+
+
+ bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) {
+ acc_bd = zebra_evpn_acc_vl_find(vid);
+ if (acc_bd->zvni)
+ zebra_evpn_local_es_evi_add(es, acc_bd->zvni);
+ }
+}
+
+static void zebra_evpn_es_local_mac_update(struct zebra_evpn_es *es,
+ bool force_clear_static)
+{
+ zebra_mac_t *mac;
+ struct listnode *node;
+
+ for (ALL_LIST_ELEMENTS_RO(es->mac_list, node, mac)) {
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE)) {
+ zebra_vxlan_sync_mac_dp_install(mac,
+ false /* set_inactive */,
+ force_clear_static, __func__);
+ }
+ }
+}
+
+static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es,
+ struct zebra_if *zif)
+{
+ if (es->flags & ZEBRA_EVPNES_LOCAL)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("local es %s add; nhg 0x%x if %s",
+ es->esi_str, es->nhg_id, zif->ifp->name);
+
+ es->flags |= ZEBRA_EVPNES_LOCAL;
+ listnode_init(&es->local_es_listnode, es);
+ listnode_add(zmh_info->local_es_list, &es->local_es_listnode);
+
+ /* attach es to interface */
+ zif->es_info.es = es;
+
+ /* attach interface to es */
+ es->zif = zif;
+ if (if_is_operative(zif->ifp))
+ es->flags |= ZEBRA_EVPNES_OPER_UP;
+
+ /* setup base-vni if one doesn't already exist; the ES will get sent
+ * to BGP as a part of that process
+ */
+ if (!zmh_info->es_base_vni)
+ zebra_evpn_es_get_one_base_vni();
+ else
+ /* send notification to bgp */
+ zebra_evpn_es_re_eval_send_to_client(es,
+ false /* es_evi_re_reval */);
+
+ /* Setup ES-EVIs for all VxLAN stretched VLANs associated with
+ * the zif
+ */
+ zebra_evpn_es_setup_evis(es);
+ /* if there any local macs referring to the ES as dest we
+ * need to set the static reference on them if the MAC is
+ * synced from an ES peer
+ */
+ zebra_evpn_es_local_mac_update(es,
+ false /* force_clear_static */);
+}
+
+static void zebra_evpn_es_local_info_clear(struct zebra_evpn_es *es)
+{
+ struct zebra_if *zif;
+
+ if (!(es->flags & ZEBRA_EVPNES_LOCAL))
+ return;
+
+ es->flags &= ~ZEBRA_EVPNES_LOCAL;
+ /* if there any local macs referring to the ES as dest we
+ * need to clear the static reference on them
+ */
+ zebra_evpn_es_local_mac_update(es,
+ true /* force_clear_static */);
+
+ /* clear the es from the parent interface */
+ zif = es->zif;
+ zif->es_info.es = NULL;
+ es->zif = NULL;
+
+ /* remove from the ES list */
+ list_delete_node(zmh_info->local_es_list, &es->local_es_listnode);
+
+ /* free up the ES if there is no remote reference */
+ zebra_evpn_es_free(es);
+}
+
+/* Delete an ethernet segment and inform BGP */
+static void zebra_evpn_local_es_del(struct zebra_evpn_es *es)
+{
+ struct zebra_evpn_es_evi *es_evi;
+ struct listnode *node = NULL;
+ struct listnode *nnode = NULL;
+ struct zebra_if *zif;
+
+ if (!CHECK_FLAG(es->flags, ZEBRA_EVPNES_LOCAL))
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES) {
+ zif = es->zif;
+ zlog_debug("local es %s del; nhg 0x%x if %s",
+ es->esi_str, es->nhg_id,
+ zif ? zif->ifp->name : "-");
+ }
+
+ /* remove all ES-EVIs associated with the ES */
+ for (ALL_LIST_ELEMENTS(es->es_evi_list, node, nnode, es_evi))
+ zebra_evpn_local_es_evi_do_del(es_evi);
+
+ /* send a del if the ES had been sent to BGP earlier */
+ if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP)
+ zebra_evpn_es_send_del_to_client(es);
+
+ zebra_evpn_es_local_info_clear(es);
+}
+
+/* eval remote info associated with the ES */
+static void zebra_evpn_es_remote_info_re_eval(struct zebra_evpn_es *es)
+{
+ /* if there are remote VTEPs the ES-EVI is classified as "remote" */
+ if (listcount(es->es_vtep_list)) {
+ if (!(es->flags & ZEBRA_EVPNES_REMOTE)) {
+ es->flags |= ZEBRA_EVPNES_REMOTE;
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("remote es %s add; nhg 0x%x",
+ es->esi_str, es->nhg_id);
+ }
+ } else {
+ if (es->flags & ZEBRA_EVPNES_REMOTE) {
+ es->flags &= ~ZEBRA_EVPNES_REMOTE;
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("remote es %s del; nhg 0x%x",
+ es->esi_str, es->nhg_id);
+ zebra_evpn_es_free(es);
+ }
+ }
+}
+
+/* A new local es is created when a local-es-id and sysmac is configured
+ * against an interface.
+ */
+static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid,
+ struct ethaddr *sysmac)
+{
+ struct zebra_evpn_es *old_es = zif->es_info.es;
+ struct zebra_evpn_es *es;
+ esi_t esi;
+ int offset = 0;
+ int field_bytes = 0;
+
+ /* Complete config of the ES-ID bootstraps the ES */
+ if (!lid || is_zero_mac(sysmac)) {
+ /* if in ES is attached to zif delete it */
+ if (old_es)
+ zebra_evpn_local_es_del(old_es);
+ return 0;
+ }
+
+ /* build 10-byte type-3-ESI -
+ * Type(1-byte), MAC(6-bytes), ES-LID (3-bytes)
+ */
+ field_bytes = 1;
+ esi.val[offset] = ESI_TYPE_MAC;
+ offset += field_bytes;
+
+ field_bytes = ETH_ALEN;
+ memcpy(&esi.val[offset], (uint8_t *)sysmac, field_bytes);
+ offset += field_bytes;
+
+ esi.val[offset++] = (uint8_t)(lid >> 16);
+ esi.val[offset++] = (uint8_t)(lid >> 8);
+ esi.val[offset++] = (uint8_t)lid;
+
+ if (old_es && !memcmp(&old_es->esi, &esi, sizeof(esi_t)))
+ /* dup - nothing to be done */
+ return 0;
+
+ /* release the old_es against the zif */
+ if (old_es)
+ zebra_evpn_local_es_del(old_es);
+
+ es = zebra_evpn_es_find(&esi);
+ if (es) {
+ /* if it exists against another interface flag an error */
+ if (es->zif && es->zif != zif)
+ return -1;
+ } else {
+ /* create new es */
+ es = zebra_evpn_es_new(&esi);
+ }
+
+ zebra_evpn_es_local_info_set(es, zif);
+
+ return 0;
+}
+
+static int zebra_evpn_remote_es_del(esi_t *esi, struct in_addr vtep_ip)
+{
+ char buf[ESI_STR_LEN];
+ struct zebra_evpn_es *es;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("remote es %s vtep %s del",
+ esi_to_str(esi, buf, sizeof(buf)),
+ inet_ntoa(vtep_ip));
+
+ es = zebra_evpn_es_find(esi);
+ if (!es) {
+ /* XXX - error log */
+ return -1;
+ }
+
+ zebra_evpn_es_vtep_del(es, vtep_ip);
+ zebra_evpn_es_remote_info_re_eval(es);
+
+ return 0;
+}
+
+/* force delete a remote ES on the way down */
+static void zebra_evpn_remote_es_flush(struct zebra_evpn_es *es)
+{
+ struct zebra_evpn_es_vtep *es_vtep;
+ struct listnode *node;
+ struct listnode *nnode;
+
+ for (ALL_LIST_ELEMENTS(es->es_vtep_list, node, nnode, es_vtep)) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s vtep %s flush",
+ es->esi_str,
+ inet_ntoa(es_vtep->vtep_ip));
+ zebra_evpn_es_vtep_free(es_vtep);
+ zebra_evpn_es_remote_info_re_eval(es);
+ }
+}
+
+static int zebra_evpn_remote_es_add(esi_t *esi, struct in_addr vtep_ip)
+{
+ char buf[ESI_STR_LEN];
+ struct zebra_evpn_es *es;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("remote es %s vtep %s add",
+ esi_to_str(esi, buf, sizeof(buf)),
+ inet_ntoa(vtep_ip));
+
+ es = zebra_evpn_es_find(esi);
+ if (!es) {
+ es = zebra_evpn_es_new(esi);
+ if (!es) {
+ /* XXX - error log */
+ return -1;
+ }
+ }
+
+ zebra_evpn_es_vtep_add(es, vtep_ip);
+ zebra_evpn_es_remote_info_re_eval(es);
+
+ return 0;
+}
+
+void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS)
+{
+ struct stream *s;
+ struct in_addr vtep_ip;
+ esi_t esi;
+
+ if (!is_evpn_enabled()) {
+ zlog_debug(
+ "%s: EVPN not enabled yet we received a es_add zapi call",
+ __PRETTY_FUNCTION__);
+ return;
+ }
+
+ memset(&esi, 0, sizeof(esi_t));
+ s = msg;
+
+ stream_get(&esi, s, sizeof(esi_t));
+ vtep_ip.s_addr = stream_get_ipv4(s);
+
+ if (hdr->command == ZEBRA_REMOTE_ES_VTEP_ADD)
+ zebra_evpn_remote_es_add(&esi, vtep_ip);
+ else
+ zebra_evpn_remote_es_del(&esi, vtep_ip);
+}
+
+void zebra_evpn_es_mac_deref_entry(zebra_mac_t *mac)
+{
+ struct zebra_evpn_es *es = mac->es;
+
+ mac->es = NULL;
+ if (!es)
+ return;
+
+ list_delete_node(es->mac_list, &mac->es_listnode);
+ if (!listcount(es->mac_list))
+ zebra_evpn_es_free(es);
+}
+
+/* Associate a MAC entry with a local or remote ES. Returns false if there
+ * was no ES change.
+ */
+bool zebra_evpn_es_mac_ref_entry(zebra_mac_t *mac, struct zebra_evpn_es *es)
+{
+ if (mac->es == es)
+ return false;
+
+ if (mac->es)
+ zebra_evpn_es_mac_deref_entry(mac);
+
+ if (!es)
+ return true;
+
+ mac->es = es;
+ listnode_init(&mac->es_listnode, mac);
+ listnode_add(es->mac_list, &mac->es_listnode);
+
+ return true;
+}
+
+bool zebra_evpn_es_mac_ref(zebra_mac_t *mac, esi_t *esi)
+{
+ struct zebra_evpn_es *es;
+
+ es = zebra_evpn_es_find(esi);
+ if (!es) {
+ es = zebra_evpn_es_new(esi);
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("auto es %s add on mac ref", es->esi_str);
+ }
+
+ return zebra_evpn_es_mac_ref_entry(mac, es);
+}
+
+/* Inform BGP about local ES-EVI add or del */
+static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es,
+ zebra_vni_t *zvni, bool add)
+{
+ struct zserv *client;
+ struct stream *s;
+
+ client = zserv_find_client(ZEBRA_ROUTE_BGP, 0);
+ /* BGP may not be running. */
+ if (!client)
+ return 0;
+
+ s = stream_new(ZEBRA_MAX_PACKET_SIZ);
+
+ zclient_create_header(s,
+ add ? ZEBRA_LOCAL_ES_EVI_ADD : ZEBRA_LOCAL_ES_EVI_DEL,
+ zebra_vrf_get_evpn_id());
+ stream_put(s, &es->esi, sizeof(esi_t));
+ stream_putl(s, zvni->vni);
+
+ /* Write packet size. */
+ stream_putw_at(s, 0, stream_get_endp(s));
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("send %s local es %s evi %u to %s",
+ add ? "add" : "del",
+ es->esi_str, zvni->vni,
+ zebra_route_string(client->proto));
+
+ client->local_es_add_cnt++;
+ return zserv_send_message(client, s);
+}
+
+/* sysmac part of a local ESI has changed */
+static int zebra_evpn_es_sys_mac_update(struct zebra_if *zif,
+ struct ethaddr *sysmac)
+{
+ int rv;
+
+ rv = zebra_evpn_local_es_update(zif, zif->es_info.lid, sysmac);
+ if (!rv)
+ memcpy(&zif->es_info.sysmac, sysmac, sizeof(struct ethaddr));
+
+ return rv;
+}
+
+/* local-ID part of ESI has changed */
+static int zebra_evpn_es_lid_update(struct zebra_if *zif, uint32_t lid)
+{
+ int rv;
+
+ rv = zebra_evpn_local_es_update(zif, lid, &zif->es_info.sysmac);
+ if (!rv)
+ zif->es_info.lid = lid;
+
+ return rv;
+}
+
+void zebra_evpn_es_cleanup(void)
+{
+ struct zebra_evpn_es *es;
+ struct zebra_evpn_es *es_next;
+
+ RB_FOREACH_SAFE(es, zebra_es_rb_head,
+ &zmh_info->es_rb_tree, es_next) {
+ zebra_evpn_local_es_del(es);
+ zebra_evpn_remote_es_flush(es);
+ }
+}
+
+/* Only certain types of access ports can be setup as an Ethernet Segment */
+bool zebra_evpn_is_if_es_capable(struct zebra_if *zif)
+{
+ if (zif->zif_type == ZEBRA_IF_BOND)
+ return true;
+
+ /* XXX: allow swpX i.e. a regular ethernet port to be an ES link too */
+ return false;
+}
+
+void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif)
+{
+ char buf[ETHER_ADDR_STRLEN];
+
+ if (zif->es_info.lid || !is_zero_mac(&zif->es_info.sysmac))
+ vty_out(vty, " EVPN MH: ES id %u ES sysmac %s\n",
+ zif->es_info.lid,
+ prefix_mac2str(&zif->es_info.sysmac,
+ buf, sizeof(buf)));
+}
+
+void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up)
+{
+ struct zebra_evpn_es *es = zif->es_info.es;
+ bool old_up = !!(es->flags & ZEBRA_EVPNES_OPER_UP);
+
+ if (old_up == up)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es %s state changed to %s ",
+ es->esi_str,
+ up ? "up" : "down");
+ if (up)
+ es->flags |= ZEBRA_EVPNES_OPER_UP;
+ else
+ es->flags &= ~ZEBRA_EVPNES_OPER_UP;
+
+ /* inform BGP of the ES oper state change */
+ if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP)
+ zebra_evpn_es_send_add_to_client(es);
+}
+
+static char *zebra_evpn_es_vtep_str(char *vtep_str, struct zebra_evpn_es *es,
+ uint8_t vtep_str_size)
+{
+ struct zebra_evpn_es_vtep *zvtep;
+ struct listnode *node;
+ bool first = true;
+
+ vtep_str[0] = '\0';
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, zvtep)) {
+ if (first) {
+ first = false;
+ strlcat(vtep_str, inet_ntoa(zvtep->vtep_ip),
+ vtep_str_size);
+ } else {
+ strlcat(vtep_str, ",", vtep_str_size);
+ strlcat(vtep_str, inet_ntoa(zvtep->vtep_ip),
+ vtep_str_size);
+ }
+ }
+ return vtep_str;
+}
+
+static void zebra_evpn_es_show_entry(struct vty *vty,
+ struct zebra_evpn_es *es, json_object *json)
+{
+ char type_str[4];
+ char vtep_str[ES_VTEP_LIST_STR_SZ];
+
+ if (json) {
+ /* XXX */
+ } else {
+ type_str[0] = '\0';
+ if (es->flags & ZEBRA_EVPNES_LOCAL)
+ strlcat(type_str, "L", sizeof(type_str));
+ if (es->flags & ZEBRA_EVPNES_REMOTE)
+ strlcat(type_str, "R", sizeof(type_str));
+
+ zebra_evpn_es_vtep_str(vtep_str, es, sizeof(vtep_str));
+
+ vty_out(vty, "%-30s %-4s %-21s %s\n",
+ es->esi_str, type_str,
+ es->zif ? es->zif->ifp->name : "-",
+ vtep_str);
+ }
+}
+
+static void zebra_evpn_es_show_entry_detail(struct vty *vty,
+ struct zebra_evpn_es *es, json_object *json)
+{
+ char type_str[80];
+ struct zebra_evpn_es_vtep *zvtep;
+ struct listnode *node;
+
+ if (json) {
+ /* XXX */
+ } else {
+ type_str[0] = '\0';
+ if (es->flags & ZEBRA_EVPNES_LOCAL)
+ strlcat(type_str, "Local", sizeof(type_str));
+ if (es->flags & ZEBRA_EVPNES_REMOTE) {
+ if (strnlen(type_str, sizeof(type_str)))
+ strlcat(type_str, ",", sizeof(type_str));
+ strlcat(type_str, "Remote", sizeof(type_str));
+ }
+
+ vty_out(vty, "ESI: %s\n", es->esi_str);
+ vty_out(vty, " Type: %s\n", type_str);
+ vty_out(vty, " Interface: %s\n",
+ (es->zif) ?
+ es->zif->ifp->name : "-");
+ vty_out(vty, " State: %s\n",
+ (es->flags & ZEBRA_EVPNES_OPER_UP) ?
+ "up" : "down");
+ vty_out(vty, " Ready for BGP: %s\n",
+ (es->flags & ZEBRA_EVPNES_READY_FOR_BGP) ?
+ "yes" : "no");
+ vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list));
+ vty_out(vty, " MAC Count: %d\n", listcount(es->mac_list));
+ vty_out(vty, " Nexthop group: 0x%x\n", es->nhg_id);
+ vty_out(vty, " VTEPs:\n");
+ for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, zvtep))
+ vty_out(vty, " %s nh: 0x%x\n",
+ inet_ntoa(zvtep->vtep_ip),
+ zvtep->nh_id);
+
+ vty_out(vty, "\n");
+ }
+}
+
+void zebra_evpn_es_show(struct vty *vty, bool uj)
+{
+ struct zebra_evpn_es *es;
+ json_object *json = NULL;
+
+ if (uj) {
+ /* XXX */
+ } else {
+ vty_out(vty, "Type: L local, R remote\n");
+ vty_out(vty, "%-30s %-4s %-21s %s\n",
+ "ESI", "Type", "ES-IF", "VTEPs");
+ }
+
+ RB_FOREACH(es, zebra_es_rb_head, &zmh_info->es_rb_tree)
+ zebra_evpn_es_show_entry(vty, es, json);
+}
+
+void zebra_evpn_es_show_detail(struct vty *vty, bool uj)
+{
+ struct zebra_evpn_es *es;
+ json_object *json = NULL;
+
+ RB_FOREACH(es, zebra_es_rb_head, &zmh_info->es_rb_tree)
+ zebra_evpn_es_show_entry_detail(vty, es, json);
+}
+
+void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi)
+{
+ struct zebra_evpn_es *es;
+ char esi_str[ESI_STR_LEN];
+ json_object *json = NULL;
+
+ es = zebra_evpn_es_find(esi);
+
+ if (!es) {
+ esi_to_str(esi, esi_str, sizeof(esi_str));
+ vty_out(vty, "ESI %s does not exist\n", esi_str);
+ return;
+ }
+
+ zebra_evpn_es_show_entry_detail(vty, es, json);
+}
+
+int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp)
+{
+ struct zebra_if *zif = ifp->info;
+ char buf[ETHER_ADDR_STRLEN];
+
+ if (zif->es_info.lid)
+ vty_out(vty, " evpn mh es-id %u\n", zif->es_info.lid);
+
+ if (!is_zero_mac(&zif->es_info.sysmac))
+ vty_out(vty, " evpn mh es-sys-mac %s\n",
+ prefix_mac2str(&zif->es_info.sysmac,
+ buf, sizeof(buf)));
+ return 0;
+}
+
+#ifndef VTYSH_EXTRACT_PL
+#include "zebra/zebra_evpn_mh_clippy.c"
+#endif
+/* CLI for setting up sysmac part of ESI on an access port */
+DEFPY(zebra_evpn_es_sys_mac,
+ zebra_evpn_es_sys_mac_cmd,
+ "[no$no] evpn mh es-sys-mac [X:X:X:X:X:X$mac]",
+ NO_STR
+ "EVPN\n"
+ EVPN_MH_VTY_STR
+ "Ethernet segment system MAC\n"
+ MAC_STR
+)
+{
+ VTY_DECLVAR_CONTEXT(interface, ifp);
+ struct zebra_if *zif;
+ int ret = 0;
+
+ zif = ifp->info;
+
+ if (no) {
+ static struct ethaddr zero_mac;
+
+ ret = zebra_evpn_es_sys_mac_update(zif, &zero_mac);
+ if (ret == -1) {
+ vty_out(vty, "%%Failed to clear ES sysmac\n");
+ return CMD_WARNING;
+ }
+ } else {
+
+ if (!zebra_evpn_is_if_es_capable(zif)) {
+ vty_out(vty,
+ "%%ESI cannot be associated with this interface type\n");
+ return CMD_WARNING;
+ }
+
+ if (!mac || is_zero_mac(&mac->eth_addr)) {
+ vty_out(vty, "%%ES sysmac value is invalid\n");
+ return CMD_WARNING;
+ }
+
+ ret = zebra_evpn_es_sys_mac_update(zif, &mac->eth_addr);
+ if (ret == -1) {
+ vty_out(vty, "%%ESI already exists on a different interface\n");
+ return CMD_WARNING;
+ }
+ }
+ return CMD_SUCCESS;
+}
+
+/* CLI for setting up local-ID part of ESI on an access port */
+DEFPY(zebra_evpn_es_id,
+ zebra_evpn_es_id_cmd,
+ "[no$no] evpn mh es-id [(1-16777215)$es_lid]",
+ NO_STR
+ "EVPN\n"
+ EVPN_MH_VTY_STR
+ "Ethernet segment local identifier\n"
+ "ID\n"
+)
+{
+ VTY_DECLVAR_CONTEXT(interface, ifp);
+ struct zebra_if *zif;
+ int ret;
+
+ zif = ifp->info;
+
+ if (no) {
+ ret = zebra_evpn_es_lid_update(zif, 0);
+ if (ret == -1) {
+ vty_out(vty, "%%Failed to clear ES local id\n");
+ return CMD_WARNING;
+ }
+ } else {
+ if (!zebra_evpn_is_if_es_capable(zif)) {
+ vty_out(vty,
+ "%%ESI cannot be associated with this interface type\n");
+ return CMD_WARNING;
+ }
+
+ if (!es_lid) {
+ vty_out(vty, "%%Specify local ES ID\n");
+ return CMD_WARNING;
+ }
+ ret = zebra_evpn_es_lid_update(zif, es_lid);
+ if (ret == -1) {
+ vty_out(vty,
+ "%%ESI already exists on a different interface\n");
+ return CMD_WARNING;
+ }
+ }
+ return CMD_SUCCESS;
+}
+
+/*****************************************************************************/
+/* A base L2-VNI is maintained to derive parameters such as ES originator-IP.
+ * XXX: once single vxlan device model becomes available this will not be
+ * necessary
+ */
+/* called when a new vni is added or becomes oper up or becomes a bridge port */
+void zebra_evpn_es_set_base_vni(zebra_vni_t *zvni)
+{
+ struct listnode *node;
+ struct zebra_evpn_es *es;
+
+ if (zmh_info->es_base_vni) {
+ if (zmh_info->es_base_vni != zvni) {
+ /* unrelated VNI; ignore it */
+ return;
+ }
+ /* check if the local vtep-ip has changed */
+ } else {
+ /* check if the VNI can be used as base VNI */
+ if (!zebra_evpn_vni_send_to_client_ok(zvni))
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es base vni set to %d",
+ zvni->vni);
+ zmh_info->es_base_vni = zvni;
+ }
+
+ /* update local VTEP-IP */
+ if (zmh_info->es_originator_ip.s_addr ==
+ zmh_info->es_base_vni->local_vtep_ip.s_addr)
+ return;
+
+ zmh_info->es_originator_ip.s_addr =
+ zmh_info->es_base_vni->local_vtep_ip.s_addr;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es originator ip set to %s",
+ inet_ntoa(zmh_info->es_base_vni->local_vtep_ip));
+
+ /* if originator ip changes we need to update bgp */
+ for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) {
+ if (es->flags & ZEBRA_EVPNES_READY_FOR_BGP)
+ zebra_evpn_es_send_add_to_client(es);
+ else
+ zebra_evpn_es_re_eval_send_to_client(es,
+ true /* es_evi_re_reval */);
+ }
+}
+
+/* called when a vni is removed or becomes oper down or is removed from a
+ * bridge
+ */
+void zebra_evpn_es_clear_base_vni(zebra_vni_t *zvni)
+{
+ struct listnode *node;
+ struct zebra_evpn_es *es;
+
+ if (zmh_info->es_base_vni != zvni)
+ return;
+
+ zmh_info->es_base_vni = NULL;
+ /* lost current base VNI; try to find a new one */
+ zebra_evpn_es_get_one_base_vni();
+
+ /* couldn't locate an eligible base vni */
+ if (!zmh_info->es_base_vni && zmh_info->es_originator_ip.s_addr) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_ES)
+ zlog_debug("es originator ip cleared");
+
+ zmh_info->es_originator_ip.s_addr = 0;
+ /* lost originator ip */
+ for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) {
+ zebra_evpn_es_re_eval_send_to_client(es,
+ true /* es_evi_re_reval */);
+ }
+ }
+}
+
+/* Locate an "eligible" L2-VNI to follow */
+static int zebra_evpn_es_get_one_base_vni_cb(struct hash_bucket *b, void *data)
+{
+ zebra_vni_t *zvni = b->data;
+
+ zebra_evpn_es_set_base_vni(zvni);
+
+ if (zmh_info->es_base_vni)
+ return HASHWALK_ABORT;
+
+ return HASHWALK_CONTINUE;
+}
+
+/* locate a base_vni to follow for the purposes of common params like
+ * originator IP
+ */
+static void zebra_evpn_es_get_one_base_vni(void)
+{
+ struct zebra_vrf *zvrf;
+
+ zvrf = zebra_vrf_get_evpn();
+ hash_walk(zvrf->vni_table, zebra_evpn_es_get_one_base_vni_cb, NULL);
+}
+
+/*****************************************************************************/
+void zebra_evpn_mh_config_write(struct vty *vty)
+{
+ if (zmh_info->mac_hold_time != EVPN_MH_MAC_HOLD_TIME_DEF)
+ vty_out(vty, "evpn mh mac-holdtime %ld\n",
+ zmh_info->mac_hold_time);
+
+ if (zmh_info->neigh_hold_time != EVPN_MH_NEIGH_HOLD_TIME_DEF)
+ vty_out(vty, "evpn mh neigh-holdtime %ld\n",
+ zmh_info->neigh_hold_time);
+}
+
+int zebra_evpn_mh_neigh_holdtime_update(struct vty *vty,
+ uint32_t duration, bool set_default)
+{
+ if (set_default)
+ zmh_info->neigh_hold_time = EVPN_MH_NEIGH_HOLD_TIME_DEF;
+
+ zmh_info->neigh_hold_time = duration;
+
+ return 0;
+}
+
+int zebra_evpn_mh_mac_holdtime_update(struct vty *vty,
+ uint32_t duration, bool set_default)
+{
+ if (set_default)
+ duration = EVPN_MH_MAC_HOLD_TIME_DEF;
+
+ zmh_info->mac_hold_time = duration;
+
+ return 0;
+}
+
+void zebra_evpn_interface_init(void)
+{
+ install_element(INTERFACE_NODE, &zebra_evpn_es_id_cmd);
+ install_element(INTERFACE_NODE, &zebra_evpn_es_sys_mac_cmd);
+}
+
+void zebra_evpn_mh_init(void)
+{
+ zrouter.mh_info = XCALLOC(MTYPE_ZMH_INFO, sizeof(*zrouter.mh_info));
+
+ zmh_info->mac_hold_time = EVPN_MH_MAC_HOLD_TIME_DEF;
+ zmh_info->neigh_hold_time = EVPN_MH_NEIGH_HOLD_TIME_DEF;
+ /* setup ES tables */
+ RB_INIT(zebra_es_rb_head, &zmh_info->es_rb_tree);
+ zmh_info->local_es_list = list_new();
+ listset_app_node_mem(zmh_info->local_es_list);
+
+ bf_init(zmh_info->nh_id_bitmap, EVPN_NH_ID_MAX);
+ bf_assign_zero_index(zmh_info->nh_id_bitmap);
+
+ /* setup broadcast domain tables */
+ zmh_info->evpn_vlan_table = hash_create(zebra_evpn_acc_vl_hash_keymake,
+ zebra_evpn_acc_vl_cmp, "access VLAN hash table");
+}
+
+void zebra_evpn_mh_terminate(void)
+{
+ list_delete(&zmh_info->local_es_list);
+
+ hash_iterate(zmh_info->evpn_vlan_table,
+ zebra_evpn_acc_vl_cleanup_all, NULL);
+ hash_free(zmh_info->evpn_vlan_table);
+}
diff --git a/zebra/zebra_evpn_mh.h b/zebra/zebra_evpn_mh.h
new file mode 100644
index 0000000000..46c25a04bc
--- /dev/null
+++ b/zebra/zebra_evpn_mh.h
@@ -0,0 +1,239 @@
+/*
+ * Zebra EVPN MH Data structures and definitions
+ *
+ * Copyright (C) 2019 Cumulus Networks, Inc.
+ * Anuradha Karuppiah
+ *
+ * This file is part of FRR.
+ *
+ * FRR is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRR is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ZEBRA_EVPN_MH_H
+#define _ZEBRA_EVPN_MH_H
+
+#include <zebra.h>
+
+#include "if.h"
+#include "linklist.h"
+#include "bitfield.h"
+#include "zebra_vxlan.h"
+#include "zebra_vxlan_private.h"
+
+#define EVPN_MH_VTY_STR "Multihoming\n"
+
+/* Ethernet Segment entry -
+ * - Local and remote ESs are maintained in a global RB tree,
+ * zmh_info->es_rb_tree using ESI as key
+ * - Local ESs are added via zebra config (ZEBRA_EVPNES_LOCAL) when an
+ * access port is associated with an ES-ID
+ * - Remotes ESs are added by BGP based on received/remote EAD/Type-1 routes
+ * (ZEBRA_EVPNES_REMOTE)
+ * - An ES can be simulatenously LOCAL and REMOTE; infact all LOCAL ESs are
+ * expected to have REMOTE ES peers.
+ */
+struct zebra_evpn_es {
+ esi_t esi;
+ char esi_str[ESI_STR_LEN];
+
+ /* ES flags */
+ uint32_t flags;
+#define ZEBRA_EVPNES_LOCAL (1 << 0) /* configured in zebra */
+#define ZEBRA_EVPNES_REMOTE (1 << 1) /* added by bgp */
+#define ZEBRA_EVPNES_OPER_UP (1 << 2) /* es->ifp is oper-up */
+#define ZEBRA_EVPNES_READY_FOR_BGP (1 << 3) /* ready to be sent to BGP */
+#define ZEBRA_EVPNES_NHG_ACTIVE (1 << 4) /* NHG has been installed */
+
+ /* memory used for adding the es to zmh_info->es_rb_tree */
+ RB_ENTRY(zebra_evpn_es) rb_node;
+
+ /* [EVPNES_LOCAL] memory used for linking the es to
+ * zmh_info->local_es_list
+ */
+ struct listnode local_es_listnode;
+
+ /* [EVPNES_LOCAL] corresponding interface */
+ struct zebra_if *zif;
+
+ /* list of ES-EVIs associated with the ES */
+ struct list *es_evi_list;
+
+ /* [!EVPNES_LOCAL] List of remote VTEPs (zebra_evpn_es_vtep) */
+ struct list *es_vtep_list;
+
+ /* list of zebra_mac entries using this ES as destination */
+ struct list *mac_list;
+
+ /* Nexthop group id */
+ uint32_t nhg_id;
+};
+RB_HEAD(zebra_es_rb_head, zebra_evpn_es);
+RB_PROTOTYPE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp);
+
+/* ES per-EVI info
+ * - ES-EVIs are maintained per-VNI (vni->es_evi_rb_tree)
+ * - Local ES-EVIs are linked to per-VNI list for quick access
+ * - Although some infrastucture is present for remote ES-EVIs, currently
+ * BGP does NOT send remote ES-EVIs to zebra. This may change in the
+ * future (but must be changed thoughtfully and only if needed as ES-EVI
+ * can get prolific and come in the way of rapid failovers)
+ */
+struct zebra_evpn_es_evi {
+ struct zebra_evpn_es *es;
+ zebra_vni_t *zvni;
+
+ /* ES-EVI flags */
+ uint32_t flags;
+ /* local ES-EVI */
+#define ZEBRA_EVPNES_EVI_LOCAL (1 << 0) /* created by zebra */
+#define ZEBRA_EVPNES_EVI_READY_FOR_BGP (1 << 1) /* ready to be sent to BGP */
+
+ /* memory used for adding the es_evi to
+ * es_evi->zvni->es_evi_rb_tree
+ */
+ RB_ENTRY(zebra_evpn_es_evi) rb_node;
+ /* memory used for linking the es_evi to
+ * es_evi->zvni->local_es_evi_list
+ */
+ struct listnode l2vni_listnode;
+ /* memory used for linking the es_evi to
+ * es_evi->es->es_evi_list
+ */
+ struct listnode es_listnode;
+};
+
+/* PE attached to an ES */
+struct zebra_evpn_es_vtep {
+ struct zebra_evpn_es *es; /* parent ES */
+ struct in_addr vtep_ip;
+
+ /* memory used for adding the entry to es->es_vtep_list */
+ struct listnode es_listnode;
+
+ /* MAC nexthop */
+ uint32_t nh_id;
+
+ /* XXX - maintain a backpointer to zebra_vtep_t */
+};
+
+/* Local/access-side broadcast domain - zebra_evpn_access_bd is added to -
+ * zrouter->evpn_vlan_table (for VLAN aware bridges) OR
+ * zrouter->evpn_bridge_table (for VLAN unaware bridges)
+ * XXX - support for VLAN unaware bridges is yet to be flushed out
+ */
+struct zebra_evpn_access_bd {
+ vlanid_t vid;
+
+ struct zebra_if *vxlan_zif; /* vxlan device */
+ /* list of members associated with the BD i.e. (potential) ESs */
+ struct list *mbr_zifs;
+ /* presence of zvni activates the EVI on all the ESs in mbr_zifs */
+ zebra_vni_t *zvni;
+};
+
+/* multihoming information stored in zrouter */
+#define zmh_info (zrouter.mh_info)
+struct zebra_evpn_mh_info {
+ /* RB tree of Ethernet segments (used for EVPN-MH) */
+ struct zebra_es_rb_head es_rb_tree;
+ /* List of local ESs */
+ struct list *local_es_list;
+
+ /* EVPN MH broadcast domains indexed by the VID */
+ struct hash *evpn_vlan_table;
+
+ /* A base L2-VNI is maintained to derive parameters such as
+ * ES originator-IP.
+ * XXX: once single vxlan device model becomes available this will
+ * not be necessary
+ */
+ zebra_vni_t *es_base_vni;
+ struct in_addr es_originator_ip;
+
+ /* L2 NH and NHG ids -
+ * Most significant 8 bits is type. Lower 24 bits is the value
+ * allocated from the nh_id_bitmap.
+ */
+ bitfield_t nh_id_bitmap;
+#define EVPN_NH_ID_MAX (16*1024)
+#define EVPN_NH_ID_VAL_MASK 0xffffff
+#define EVPN_NH_ID_TYPE_POS 24
+/* The purpose of using different types for NHG and NH is NOT to manage the
+ * id space separately. It is simply to make debugging easier.
+ */
+#define EVPN_NH_ID_TYPE_BIT (1 << EVPN_NH_ID_TYPE_POS)
+#define EVPN_NHG_ID_TYPE_BIT (2 << EVPN_NH_ID_TYPE_POS)
+
+ /* XXX - re-visit the default hold timer value */
+#define EVPN_MH_MAC_HOLD_TIME_DEF (18 * 60)
+ long mac_hold_time;
+#define EVPN_MH_NEIGH_HOLD_TIME_DEF (18 * 60)
+ long neigh_hold_time;
+};
+
+static inline bool zebra_evpn_mac_is_es_local(zebra_mac_t *mac)
+{
+ return mac->es && (mac->es->flags & ZEBRA_EVPNES_LOCAL);
+}
+
+/* Returns true if the id is of L2-NHG or L2-NH type */
+static inline bool zebra_evpn_mh_is_fdb_nh(uint32_t id)
+{
+ return ((id & EVPN_NHG_ID_TYPE_BIT) ||
+ (id & EVPN_NH_ID_TYPE_BIT));
+}
+
+/*****************************************************************************/
+extern esi_t *zero_esi;
+extern void zebra_evpn_mh_init(void);
+extern void zebra_evpn_mh_terminate(void);
+extern bool zebra_evpn_is_if_es_capable(struct zebra_if *zif);
+extern void zebra_evpn_if_init(struct zebra_if *zif);
+extern void zebra_evpn_if_cleanup(struct zebra_if *zif);
+extern void zebra_evpn_vni_es_init(zebra_vni_t *zvni);
+extern void zebra_evpn_vni_es_cleanup(zebra_vni_t *zvni);
+extern void zebra_evpn_vxl_vni_set(struct zebra_if *zif, zebra_vni_t *zvni,
+ bool set);
+extern void zebra_evpn_es_set_base_vni(zebra_vni_t *zvni);
+extern void zebra_evpn_es_clear_base_vni(zebra_vni_t *zvni);
+extern void zebra_evpn_vl_vxl_ref(uint16_t vid, struct zebra_if *vxlan_zif);
+extern void zebra_evpn_vl_vxl_deref(uint16_t vid, struct zebra_if *vxlan_zif);
+extern void zebra_evpn_vl_mbr_ref(uint16_t vid, struct zebra_if *zif);
+extern void zebra_evpn_vl_mbr_deref(uint16_t vid, struct zebra_if *zif);
+extern void zebra_evpn_es_send_all_to_client(bool add);
+extern void zebra_evpn_es_if_oper_state_change(struct zebra_if *zif, bool up);
+extern void zebra_evpn_es_show(struct vty *vty, bool uj);
+extern void zebra_evpn_es_show_detail(struct vty *vty, bool uj);
+extern void zebra_evpn_es_show_esi(struct vty *vty, bool uj, esi_t *esi);
+extern void zebra_evpn_vni_update_all_es(zebra_vni_t *zvni);
+extern void zebra_evpn_proc_remote_es(ZAPI_HANDLER_ARGS);
+extern void zebra_evpn_es_evi_show(struct vty *vty, bool uj, int detail);
+extern void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj,
+ vni_t vni, int detail);
+extern void zebra_evpn_es_mac_deref_entry(zebra_mac_t *mac);
+extern bool zebra_evpn_es_mac_ref_entry(zebra_mac_t *mac,
+ struct zebra_evpn_es *es);
+extern bool zebra_evpn_es_mac_ref(zebra_mac_t *mac, esi_t *esi);
+extern struct zebra_evpn_es *zebra_evpn_es_find(esi_t *esi);
+extern void zebra_evpn_interface_init(void);
+extern int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp);
+extern void zebra_evpn_acc_vl_show(struct vty *vty, bool uj);
+extern void zebra_evpn_acc_vl_show_detail(struct vty *vty, bool uj);
+extern void zebra_evpn_acc_vl_show_vid(struct vty *vty, bool uj, vlanid_t vid);
+extern void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif);
+extern void zebra_evpn_es_cleanup(void);
+extern int zebra_evpn_mh_mac_holdtime_update(struct vty *vty,
+ uint32_t duration, bool set_default);
+void zebra_evpn_mh_config_write(struct vty *vty);
+int zebra_evpn_mh_neigh_holdtime_update(struct vty *vty,
+ uint32_t duration, bool set_default);
+
+#endif /* _ZEBRA_EVPN_MH_H */
diff --git a/zebra/zebra_l2.c b/zebra/zebra_l2.c
index 4c0cc62fbf..1758c8f96a 100644
--- a/zebra/zebra_l2.c
+++ b/zebra/zebra_l2.c
@@ -43,6 +43,7 @@
#include "zebra/rt_netlink.h"
#include "zebra/zebra_l2.h"
#include "zebra/zebra_vxlan.h"
+#include "zebra/zebra_evpn_mh.h"
/* definitions */
@@ -53,13 +54,7 @@ static void map_slaves_to_bridge(struct interface *br_if, int link)
{
struct vrf *vrf;
struct interface *ifp;
- struct zebra_vrf *zvrf;
- struct zebra_ns *zns;
- zvrf = zebra_vrf_lookup_by_id(br_if->vrf_id);
- assert(zvrf);
- zns = zvrf->zns;
- assert(zns);
RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) {
FOR_ALL_INTERFACES (vrf, ifp) {
struct zebra_if *zif;
@@ -78,8 +73,7 @@ static void map_slaves_to_bridge(struct interface *br_if, int link)
br_slave = &zif->brslave_info;
if (link) {
- if (br_slave->bridge_ifindex == br_if->ifindex &&
- br_slave->ns_id == zns->ns_id)
+ if (br_slave->bridge_ifindex == br_if->ifindex)
br_slave->br_if = br_if;
} else {
if (br_slave->br_if == br_if)
@@ -90,14 +84,12 @@ static void map_slaves_to_bridge(struct interface *br_if, int link)
}
/* Public functions */
-void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave,
- struct zebra_ns *zns)
+void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave)
{
struct interface *br_if;
/* TODO: Handle change of master */
- assert(zns);
- br_if = if_lookup_by_index_per_ns(zebra_ns_lookup(zns->ns_id),
+ br_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT),
br_slave->bridge_ifindex);
if (br_if)
br_slave->br_if = br_if;
@@ -119,7 +111,7 @@ void zebra_l2_map_slave_to_bond(struct zebra_l2info_bondslave *bond_slave,
bond_slave->bond_if = bond_if;
else
bond_slave->bond_if = if_create_ifindex(bond_slave->bond_ifindex,
- vrf_id, NULL);
+ vrf_id);
}
void zebra_l2_unmap_slave_from_bond(struct zebra_l2info_bondslave *bond_slave)
@@ -191,6 +183,7 @@ void zebra_l2_vxlanif_add_update(struct interface *ifp,
if (add) {
memcpy(&zif->l2info.vxl, vxlan_info, sizeof(*vxlan_info));
+ zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif);
zebra_vxlan_if_add(ifp);
return;
}
@@ -229,6 +222,9 @@ void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp,
return;
zif->l2info.vxl.access_vlan = access_vlan;
+
+ zebra_evpn_vl_vxl_deref(old_access_vlan, zif);
+ zebra_evpn_vl_vxl_ref(zif->l2info.vxl.access_vlan, zif);
zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_VLAN_CHANGE);
}
@@ -237,6 +233,12 @@ void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp,
*/
void zebra_l2_vxlanif_del(struct interface *ifp)
{
+ struct zebra_if *zif;
+
+ zif = ifp->info;
+ assert(zif);
+
+ zebra_evpn_vl_vxl_deref(zif->l2info.vxl.access_vlan, zif);
zebra_vxlan_if_del(ifp);
}
@@ -246,32 +248,23 @@ void zebra_l2_vxlanif_del(struct interface *ifp)
* from a bridge before it can be mapped to another bridge.
*/
void zebra_l2if_update_bridge_slave(struct interface *ifp,
- ifindex_t bridge_ifindex,
- ns_id_t ns_id)
+ ifindex_t bridge_ifindex)
{
struct zebra_if *zif;
ifindex_t old_bridge_ifindex;
- ns_id_t old_ns_id;
- struct zebra_vrf *zvrf;
zif = ifp->info;
assert(zif);
- zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id);
- if (!zvrf)
- return;
-
old_bridge_ifindex = zif->brslave_info.bridge_ifindex;
- old_ns_id = zif->brslave_info.ns_id;
- if (old_bridge_ifindex == bridge_ifindex &&
- old_ns_id == zif->brslave_info.ns_id)
+ if (old_bridge_ifindex == bridge_ifindex)
return;
- zif->brslave_info.ns_id = ns_id;
zif->brslave_info.bridge_ifindex = bridge_ifindex;
+
/* Set up or remove link with master */
if (bridge_ifindex != IFINDEX_INTERNAL) {
- zebra_l2_map_slave_to_bridge(&zif->brslave_info, zvrf->zns);
+ zebra_l2_map_slave_to_bridge(&zif->brslave_info);
/* In the case of VxLAN, invoke the handler for EVPN. */
if (zif->zif_type == ZEBRA_IF_VXLAN)
zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_MASTER_CHANGE);
@@ -307,3 +300,43 @@ void zebra_l2if_update_bond_slave(struct interface *ifp, ifindex_t bond_ifindex)
else if (old_bond_ifindex != IFINDEX_INTERNAL)
zebra_l2_unmap_slave_from_bond(&zif->bondslave_info);
}
+
+void zebra_vlan_bitmap_compute(struct interface *ifp,
+ uint32_t vid_start, uint16_t vid_end)
+{
+ uint32_t vid;
+ struct zebra_if *zif;
+
+ zif = (struct zebra_if *)ifp->info;
+ assert(zif);
+
+ for (vid = vid_start; vid <= vid_end; ++vid)
+ bf_set_bit(zif->vlan_bitmap, vid);
+}
+
+void zebra_vlan_mbr_re_eval(struct interface *ifp, bitfield_t old_vlan_bitmap)
+{
+ uint32_t vid;
+ struct zebra_if *zif;
+
+ zif = (struct zebra_if *)ifp->info;
+ assert(zif);
+
+ if (!bf_cmp(zif->vlan_bitmap, old_vlan_bitmap))
+ /* no change */
+ return;
+
+ bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) {
+ /* if not already set create new reference */
+ if (!bf_test_index(old_vlan_bitmap, vid))
+ zebra_evpn_vl_mbr_ref(vid, zif);
+
+ /* also clear from the old vlan bitmap */
+ bf_release_index(old_vlan_bitmap, vid);
+ }
+
+ /* any bits remaining in the old vlan bitmap are stale references */
+ bf_for_each_set_bit(old_vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) {
+ zebra_evpn_vl_mbr_deref(vid, zif);
+ }
+}
diff --git a/zebra/zebra_l2.h b/zebra/zebra_l2.h
index a3c780ee09..2735d915ec 100644
--- a/zebra/zebra_l2.h
+++ b/zebra/zebra_l2.h
@@ -37,7 +37,6 @@ extern "C" {
struct zebra_l2info_brslave {
ifindex_t bridge_ifindex; /* Bridge Master */
struct interface *br_if; /* Pointer to master */
- ns_id_t ns_id; /* network namespace where bridge is */
};
/* zebra L2 interface information - bridge interface */
@@ -82,8 +81,7 @@ union zebra_l2if_info {
#define IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zif) ((zif)->l2info.br.vlan_aware == 1)
-extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave,
- struct zebra_ns *zns);
+extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave);
extern void
zebra_l2_unmap_slave_from_bridge(struct zebra_l2info_brslave *br_slave);
extern void
@@ -103,11 +101,14 @@ extern void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp,
vlanid_t access_vlan);
extern void zebra_l2_vxlanif_del(struct interface *ifp);
extern void zebra_l2if_update_bridge_slave(struct interface *ifp,
- ifindex_t bridge_ifindex,
- ns_id_t ns_id);
+ ifindex_t bridge_ifindex);
extern void zebra_l2if_update_bond_slave(struct interface *ifp,
ifindex_t bond_ifindex);
+extern void zebra_vlan_bitmap_compute(struct interface *ifp,
+ uint32_t vid_start, uint16_t vid_end);
+extern void zebra_vlan_mbr_re_eval(struct interface *ifp,
+ bitfield_t vlan_bitmap);
#ifdef __cplusplus
}
diff --git a/zebra/zebra_memory.c b/zebra/zebra_memory.c
index a9c2c5fe58..da8121774e 100644
--- a/zebra/zebra_memory.c
+++ b/zebra/zebra_memory.c
@@ -28,3 +28,5 @@
DEFINE_MGROUP(ZEBRA, "zebra")
DEFINE_MTYPE(ZEBRA, RE, "Route Entry")
DEFINE_MTYPE(ZEBRA, RIB_DEST, "RIB destination")
+DEFINE_MTYPE(ZEBRA, ZVLAN, "VLAN")
+DEFINE_MTYPE(ZEBRA, ZVLAN_BITMAP, "VLAN bitmap")
diff --git a/zebra/zebra_netns_id.c b/zebra/zebra_netns_id.c
index 79121bb086..8de4daf439 100644
--- a/zebra/zebra_netns_id.c
+++ b/zebra/zebra_netns_id.c
@@ -159,34 +159,27 @@ static ns_id_t extract_nsid(struct nlmsghdr *nlh, char *buf)
return ns_id;
}
-/* fd_param = -1 is ignored.
- * netnspath set to null is ignored.
- * one of the 2 params is mandatory. netnspath is looked in priority
- */
-ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param)
+ns_id_t zebra_ns_id_get(const char *netnspath)
{
int ns_id = -1;
struct sockaddr_nl snl;
- int fd = -1, sock, ret;
+ int fd, sock, ret;
unsigned int seq;
ns_id_t return_nsid = NS_UNKNOWN;
/* netns path check */
- if (!netnspath && fd_param == -1)
+ if (!netnspath)
return NS_UNKNOWN;
- if (netnspath) {
- fd = open(netnspath, O_RDONLY);
- if (fd == -1)
- return NS_UNKNOWN;
- } else if (fd_param != -1)
- fd = fd_param;
+ fd = open(netnspath, O_RDONLY);
+ if (fd == -1)
+ return NS_UNKNOWN;
+
/* netlink socket */
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
flog_err_sys(EC_LIB_SOCKET, "netlink( %u) socket() error: %s",
sock, safe_strerror(errno));
- if (fd_param == -1)
- close(fd);
+ close(fd);
return NS_UNKNOWN;
}
memset(&snl, 0, sizeof(snl));
@@ -199,8 +192,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param)
"netlink( %u) socket() bind error: %s", sock,
safe_strerror(errno));
close(sock);
- if (fd_param == -1)
- close(fd);
+ close(fd);
return NS_UNKNOWN;
}
@@ -222,8 +214,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param)
ret = send_receive(sock, nlh, seq, buf);
if (ret < 0) {
close(sock);
- if (fd_param == -1)
- close(fd);
+ close(fd);
return NS_UNKNOWN;
}
nlh = (struct nlmsghdr *)buf;
@@ -267,8 +258,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param)
"netlink( %u) recvfrom() error 2 when reading: %s",
fd, safe_strerror(errno));
close(sock);
- if (fd_param == -1)
- close(fd);
+ close(fd);
if (errno == ENOTSUP) {
zlog_debug("NEWNSID locally generated");
return zebra_ns_id_get_fallback(netnspath);
@@ -289,8 +279,7 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param)
ret = send_receive(sock, nlh, seq, buf);
if (ret < 0) {
close(sock);
- if (fd_param == -1)
- close(fd);
+ close(fd);
return NS_UNKNOWN;
}
nlh = (struct nlmsghdr *)buf;
@@ -321,18 +310,16 @@ ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param)
} while (len != 0 && ret == 0);
}
- if (fd_param == -1)
- close(fd);
+ close(fd);
close(sock);
return return_nsid;
}
#else
-ns_id_t zebra_ns_id_get(const char *netnspath, int fd __attribute__ ((unused)))
+ns_id_t zebra_ns_id_get(const char *netnspath)
{
return zebra_ns_id_get_fallback(netnspath);
}
-
#endif /* ! defined(HAVE_NETLINK) */
#ifdef HAVE_NETNS
@@ -368,7 +355,7 @@ ns_id_t zebra_ns_id_get_default(void)
return NS_DEFAULT_INTERNAL;
}
close(fd);
- return zebra_ns_id_get((char *)NS_DEFAULT_NAME, -1);
+ return zebra_ns_id_get((char *)NS_DEFAULT_NAME);
#else /* HAVE_NETNS */
return NS_DEFAULT_INTERNAL;
#endif /* !HAVE_NETNS */
diff --git a/zebra/zebra_netns_id.h b/zebra/zebra_netns_id.h
index dd9eab18e0..7a5f6851f4 100644
--- a/zebra/zebra_netns_id.h
+++ b/zebra/zebra_netns_id.h
@@ -24,7 +24,7 @@
extern "C" {
#endif
-extern ns_id_t zebra_ns_id_get(const char *netnspath, int fd);
+extern ns_id_t zebra_ns_id_get(const char *netnspath);
extern ns_id_t zebra_ns_id_get_default(void);
#ifdef __cplusplus
diff --git a/zebra/zebra_netns_notify.c b/zebra/zebra_netns_notify.c
index 995fa6fb5a..ec7681bf23 100644
--- a/zebra/zebra_netns_notify.c
+++ b/zebra/zebra_netns_notify.c
@@ -72,14 +72,13 @@ static void zebra_ns_notify_create_context_from_entry_name(const char *name)
char *netnspath = ns_netns_pathname(NULL, name);
struct vrf *vrf;
int ret;
- ns_id_t ns_id, ns_id_external, ns_id_relative = NS_UNKNOWN;
- struct ns *default_ns;
+ ns_id_t ns_id, ns_id_external;
if (netnspath == NULL)
return;
frr_with_privs(&zserv_privs) {
- ns_id = zebra_ns_id_get(netnspath, -1);
+ ns_id = zebra_ns_id_get(netnspath);
}
if (ns_id == NS_UNKNOWN)
return;
@@ -98,21 +97,9 @@ static void zebra_ns_notify_create_context_from_entry_name(const char *name)
ns_map_nsid_with_external(ns_id, false);
return;
}
-
- default_ns = ns_get_default();
-
- /* force kernel ns_id creation in that new vrf */
- frr_with_privs(&zserv_privs) {
- ns_switch_to_netns(netnspath);
- ns_id_relative = zebra_ns_id_get(NULL, default_ns->fd);
- ns_switchback_to_initial();
- }
-
frr_with_privs(&zserv_privs) {
ret = vrf_netns_handler_create(NULL, vrf, netnspath,
- ns_id_external,
- ns_id,
- ns_id_relative);
+ ns_id_external, ns_id);
}
if (ret != CMD_SUCCESS) {
flog_warn(EC_ZEBRA_NS_VRF_CREATION_FAILED,
diff --git a/zebra/zebra_ns.c b/zebra/zebra_ns.c
index 6462daf687..4e51437337 100644
--- a/zebra/zebra_ns.c
+++ b/zebra/zebra_ns.c
@@ -153,25 +153,20 @@ static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete)
/* During zebra shutdown, do partial cleanup while the async dataplane
* is still running.
*/
-int zebra_ns_early_shutdown(struct ns *ns,
- void *param_in __attribute__((unused)),
- void **param_out __attribute__((unused)))
+int zebra_ns_early_shutdown(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
return 0;
- zebra_ns_disable_internal(zns, false);
- return NS_WALK_CONTINUE;
+ return zebra_ns_disable_internal(zns, false);
}
/* During zebra shutdown, do final cleanup
* after all dataplane work is complete.
*/
-int zebra_ns_final_shutdown(struct ns *ns,
- void *param_in __attribute__((unused)),
- void **param_out __attribute__((unused)))
+int zebra_ns_final_shutdown(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
@@ -180,7 +175,7 @@ int zebra_ns_final_shutdown(struct ns *ns,
kernel_terminate(zns, true);
- return NS_WALK_CONTINUE;
+ return 0;
}
int zebra_ns_init(const char *optional_default_name)
@@ -188,16 +183,12 @@ int zebra_ns_init(const char *optional_default_name)
struct ns *default_ns;
ns_id_t ns_id;
ns_id_t ns_id_external;
- struct ns *ns;
frr_with_privs(&zserv_privs) {
ns_id = zebra_ns_id_get_default();
}
ns_id_external = ns_map_nsid_with_external(ns_id, true);
ns_init_management(ns_id_external, ns_id);
- ns = ns_get_default();
- if (ns)
- ns->relative_default_ns = ns_id;
default_ns = ns_lookup(ns_get_default_id());
if (!default_ns) {
diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
index f7d1f40782..dc79a83db0 100644
--- a/zebra/zebra_ns.h
+++ b/zebra/zebra_ns.h
@@ -67,12 +67,9 @@ struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id);
int zebra_ns_init(const char *optional_default_name);
int zebra_ns_enable(ns_id_t ns_id, void **info);
int zebra_ns_disabled(struct ns *ns);
-int zebra_ns_early_shutdown(struct ns *ns,
- void *param_in __attribute__((unused)),
- void **param_out __attribute__((unused)));
-int zebra_ns_final_shutdown(struct ns *ns,
- void *param_in __attribute__((unused)),
- void **param_out __attribute__((unused)));
+int zebra_ns_early_shutdown(struct ns *ns);
+int zebra_ns_final_shutdown(struct ns *ns);
+
int zebra_ns_config_write(struct vty *vty, struct ns *ns);
#ifdef __cplusplus
diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h
index 863c5fa71c..f73a8f2d59 100644
--- a/zebra/zebra_router.h
+++ b/zebra/zebra_router.h
@@ -125,6 +125,12 @@ struct zebra_router {
/* L3-VNI hash table (for EVPN). Only in default instance */
struct hash *l3vni_table;
+ /* Tables and other global info maintained for EVPN multihoming */
+ struct zebra_evpn_mh_info *mh_info;
+
+ /* EVPN MH broadcast domains indexed by the VID */
+ struct hash *evpn_vlan_table;
+
struct hash *rules_hash;
struct hash *ipset_hash;
diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c
index 2ca57f1c56..2ea04eee2e 100644
--- a/zebra/zebra_vty.c
+++ b/zebra/zebra_vty.c
@@ -44,6 +44,7 @@
#include "zebra/zebra_routemap.h"
#include "lib/json.h"
#include "zebra/zebra_vxlan.h"
+#include "zebra/zebra_evpn_mh.h"
#ifndef VTYSH_EXTRACT_PL
#include "zebra/zebra_vty_clippy.c"
#endif
@@ -2516,6 +2517,81 @@ DEFUN (show_evpn_global,
return CMD_SUCCESS;
}
+DEFPY(show_evpn_es,
+ show_evpn_es_cmd,
+ "show evpn es [NAME$esi_str] [json$json] [detail$detail]",
+ SHOW_STR
+ "EVPN\n"
+ "Ethernet Segment\n"
+ "ES ID\n"
+ JSON_STR
+ "Detailed information\n")
+{
+ esi_t esi;
+ bool uj = !!json;
+
+ if (esi_str) {
+ if (!str_to_esi(esi_str, &esi)) {
+ vty_out(vty, "%% Malformed ESI\n");
+ return CMD_WARNING;
+ }
+ zebra_evpn_es_show_esi(vty, uj, &esi);
+ } else {
+ if (detail)
+ zebra_evpn_es_show_detail(vty, uj);
+ else
+ zebra_evpn_es_show(vty, uj);
+ }
+
+ return CMD_SUCCESS;
+}
+
+DEFPY(show_evpn_es_evi,
+ show_evpn_es_evi_cmd,
+ "show evpn es-evi [vni (1-16777215)$vni] [json$json] [detail$detail]",
+ SHOW_STR
+ "EVPN\n"
+ "Ethernet Segment per EVI\n"
+ "VxLAN Network Identifier\n"
+ "VNI\n"
+ JSON_STR
+ "Detailed information\n")
+{
+ bool uj = !!json;
+ bool ud = !!detail;
+
+ if (vni)
+ zebra_evpn_es_evi_show_vni(vty, uj, vni, ud);
+ else
+ zebra_evpn_es_evi_show(vty, uj, ud);
+
+ return CMD_SUCCESS;
+}
+
+DEFPY(show_evpn_access_vlan,
+ show_evpn_access_vlan_cmd,
+ "show evpn access-vlan [(1-4094)$vid] [json$json] [detail$detail]",
+ SHOW_STR
+ "EVPN\n"
+ "Access VLANs\n"
+ "VLAN ID\n"
+ JSON_STR
+ "Detailed information\n")
+{
+ bool uj = !!json;
+
+ if (vid) {
+ zebra_evpn_acc_vl_show_vid(vty, uj, vid);
+ } else {
+ if (detail)
+ zebra_evpn_acc_vl_show_detail(vty, uj);
+ else
+ zebra_evpn_acc_vl_show(vty, uj);
+ }
+
+ return CMD_SUCCESS;
+}
+
DEFUN (show_evpn_vni,
show_evpn_vni_cmd,
"show evpn vni [json]",
@@ -3734,6 +3810,9 @@ void zebra_vty_init(void)
install_element(VIEW_NODE, &show_evpn_vni_cmd);
install_element(VIEW_NODE, &show_evpn_vni_detail_cmd);
install_element(VIEW_NODE, &show_evpn_vni_vni_cmd);
+ install_element(VIEW_NODE, &show_evpn_es_cmd);
+ install_element(VIEW_NODE, &show_evpn_es_evi_cmd);
+ install_element(VIEW_NODE, &show_evpn_access_vlan_cmd);
install_element(VIEW_NODE, &show_evpn_rmac_vni_mac_cmd);
install_element(VIEW_NODE, &show_evpn_rmac_vni_cmd);
install_element(VIEW_NODE, &show_evpn_rmac_vni_all_cmd);
diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c
index 1bb673c940..ff09b48dcf 100644
--- a/zebra/zebra_vxlan.c
+++ b/zebra/zebra_vxlan.c
@@ -50,6 +50,7 @@
#include "zebra/zebra_vrf.h"
#include "zebra/zebra_vxlan.h"
#include "zebra/zebra_vxlan_private.h"
+#include "zebra/zebra_evpn_mh.h"
#include "zebra/zebra_router.h"
DEFINE_MTYPE_STATIC(ZEBRA, HOST_PREFIX, "host prefix");
@@ -74,7 +75,6 @@ static const struct message zvtep_flood_str[] = {
{0}
};
-
/* static function declarations */
static int ip_prefix_send_to_client(vrf_id_t vrf_id, struct prefix *p,
uint16_t cmd);
@@ -95,22 +95,26 @@ static void zvni_print_hash(struct hash_bucket *bucket, void *ctxt[]);
static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,
struct ipaddr *ip, uint8_t flags,
- uint32_t seq, int state, uint16_t cmd);
+ uint32_t seq, int state,
+ struct zebra_evpn_es *es,
+ uint16_t cmd);
static unsigned int neigh_hash_keymake(const void *p);
static void *zvni_neigh_alloc(void *p);
static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip,
- struct ethaddr *mac);
+ struct ethaddr *mac, zebra_mac_t *zmac,
+ uint32_t n_flags);
static int zvni_neigh_del(zebra_vni_t *zvni, zebra_neigh_t *n);
static void zvni_neigh_del_all(zebra_vni_t *zvni, int uninstall, int upd_client,
uint32_t flags);
static zebra_neigh_t *zvni_neigh_lookup(zebra_vni_t *zvni, struct ipaddr *ip);
static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip,
- struct ethaddr *macaddr,
- uint8_t flags, uint32_t seq);
+ struct ethaddr *mac, zebra_mac_t *zmac,
+ uint32_t flags, uint32_t seq);
static int zvni_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip,
- struct ethaddr *macaddr,
- uint8_t flags, int state);
-static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n);
+ struct ethaddr *mac,
+ uint32_t flags, int state, bool force);
+static int zvni_rem_neigh_install(zebra_vni_t *zvni,
+ zebra_neigh_t *n, bool was_static);
static int zvni_neigh_uninstall(zebra_vni_t *zvni, zebra_neigh_t *n);
static int zvni_neigh_probe(zebra_vni_t *zvni, zebra_neigh_t *n);
static zebra_vni_t *zvni_from_svi(struct interface *ifp,
@@ -157,21 +161,22 @@ static void zvni_mac_del_all(zebra_vni_t *zvni, int uninstall, int upd_client,
uint32_t flags);
static zebra_mac_t *zvni_mac_lookup(zebra_vni_t *zvni, struct ethaddr *macaddr);
static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr,
- uint8_t flags, uint32_t seq);
-static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr);
+ uint32_t flags, uint32_t seq, struct zebra_evpn_es *es);
+static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr,
+ uint32_t flags, bool force);
static zebra_vni_t *zvni_map_vlan(struct interface *ifp,
struct interface *br_if, vlanid_t vid);
-static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac);
-static int zvni_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac);
+static int zvni_rem_mac_install(zebra_vni_t *zvni,
+ zebra_mac_t *mac, bool was_static);
+static int zvni_rem_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac);
static void zvni_install_mac_hash(struct hash_bucket *bucket, void *ctxt);
static unsigned int vni_hash_keymake(const void *p);
static void *zvni_alloc(void *p);
-static zebra_vni_t *zvni_lookup(vni_t vni);
static zebra_vni_t *zvni_add(vni_t vni);
static int zvni_del(zebra_vni_t *zvni);
static int zvni_send_add_to_client(zebra_vni_t *zvni);
-static int zvni_send_del_to_client(vni_t vni);
+static int zvni_send_del_to_client(zebra_vni_t *zvni);
static void zvni_build_hash_table(void);
static int zvni_vtep_match(struct in_addr *vtep_ip, zebra_vtep_t *zvtep);
static zebra_vtep_t *zvni_vtep_find(zebra_vni_t *zvni, struct in_addr *vtep_ip);
@@ -224,6 +229,22 @@ static void zebra_vxlan_sg_cleanup(struct hash_bucket *bucket, void *arg);
static void zvni_send_mac_to_client(zebra_vni_t *zvn);
static void zvni_send_neigh_to_client(zebra_vni_t *zvni);
+static void zebra_vxlan_rem_mac_del(zebra_vni_t *zvni,
+ zebra_mac_t *zmac);
+static inline void zebra_vxlan_mac_stop_hold_timer(zebra_mac_t *mac);
+static inline bool zebra_vxlan_mac_is_static(zebra_mac_t *mac);
+static void zebra_vxlan_local_neigh_ref_mac(zebra_neigh_t *n,
+ struct ethaddr *macaddr, zebra_mac_t *mac,
+ bool send_mac_update);
+static void zebra_vxlan_local_neigh_deref_mac(zebra_neigh_t *n,
+ bool send_mac_update);
+static inline bool zebra_vxlan_neigh_is_ready_for_bgp(zebra_neigh_t *n);
+static inline bool zebra_vxlan_neigh_clear_sync_info(zebra_neigh_t *n);
+static void zebra_vxlan_sync_neigh_dp_install(zebra_neigh_t *n,
+ bool set_inactive, bool force_clear_static, const char *caller);
+static inline bool zebra_vxlan_neigh_is_static(zebra_neigh_t *neigh);
+static void zebra_vxlan_neigh_send_add_del_to_client(zebra_neigh_t *n,
+ bool old_bgp_ready, bool new_bgp_ready);
/* Private functions */
static int host_rb_entry_compare(const struct host_rb_entry *hle1,
@@ -730,6 +751,7 @@ static void zvni_print_neigh(zebra_neigh_t *n, void *ctxt, json_object *json)
struct zebra_vrf *zvrf = NULL;
struct timeval detect_start_time = {0, 0};
char timebuf[MONOTIME_STRLEN];
+ char thread_buf[THREAD_TIMER_STRLEN];
zvrf = zebra_vrf_get_evpn();
if (!zvrf)
@@ -742,25 +764,75 @@ static void zvni_print_neigh(zebra_neigh_t *n, void *ctxt, json_object *json)
state_str = IS_ZEBRA_NEIGH_ACTIVE(n) ? "active" : "inactive";
vty = (struct vty *)ctxt;
if (json == NULL) {
+ bool sync_info = false;
+
vty_out(vty, "IP: %s\n",
- ipaddr2str(&n->ip, buf2, sizeof(buf2)));
+ ipaddr2str(&n->ip, buf2, sizeof(buf2)));
vty_out(vty, " Type: %s\n", type_str);
vty_out(vty, " State: %s\n", state_str);
vty_out(vty, " MAC: %s\n",
- prefix_mac2str(&n->emac, buf1, sizeof(buf1)));
+ prefix_mac2str(&n->emac, buf1, sizeof(buf1)));
+ vty_out(vty, " Sync-info:");
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) {
+ vty_out(vty, " local-inactive");
+ sync_info = true;
+ }
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY)) {
+ vty_out(vty, " peer-proxy");
+ sync_info = true;
+ }
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE)) {
+ vty_out(vty, " peer-active");
+ sync_info = true;
+ }
+ if (n->hold_timer) {
+ vty_out(vty, " (ht: %s)",
+ thread_timer_to_hhmmss(
+ thread_buf,
+ sizeof(thread_buf),
+ n->hold_timer));
+ sync_info = true;
+ }
+ if (!sync_info)
+ vty_out(vty, " -");
+ vty_out(vty, "\n");
} else {
json_object_string_add(json, "ip", buf2);
json_object_string_add(json, "type", type_str);
json_object_string_add(json, "state", state_str);
json_object_string_add(json, "mac", buf1);
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE))
+ json_object_boolean_true_add(json,
+ "localInactive");
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY))
+ json_object_boolean_true_add(json,
+ "peerProxy");
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE))
+ json_object_boolean_true_add(json,
+ "peerActive");
+ if (n->hold_timer)
+ json_object_string_add(json, "peerActiveHold",
+ thread_timer_to_hhmmss(
+ thread_buf,
+ sizeof(thread_buf),
+ n->hold_timer));
}
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) {
- if (json == NULL) {
- vty_out(vty, " Remote VTEP: %s\n",
- inet_ntoa(n->r_vtep_ip));
- } else
- json_object_string_add(json, "remoteVtep",
- inet_ntoa(n->r_vtep_ip));
+ if (n->mac->es) {
+ if (json)
+ json_object_string_add(json, "remoteEs",
+ n->mac->es->esi_str);
+ else
+ vty_out(vty, " Remote ES: %s\n",
+ n->mac->es->esi_str);
+ } else {
+ if (json)
+ json_object_string_add(json, "remoteVtep",
+ inet_ntoa(n->r_vtep_ip));
+ else
+ vty_out(vty, " Remote VTEP: %s\n",
+ inet_ntoa(n->r_vtep_ip));
+ }
}
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW)) {
if (!json) {
@@ -811,6 +883,30 @@ static void zvni_print_neigh(zebra_neigh_t *n, void *ctxt, json_object *json)
}
}
+static void zvni_print_neigh_hdr(struct vty *vty,
+ struct neigh_walk_ctx *wctx)
+{
+ vty_out(vty,
+ "Flags: I=local-inactive, P=peer-active, X=peer-proxy\n");
+ vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %s\n",
+ -wctx->addr_width, "Neighbor", "Type", "Flags",
+ "State", "MAC", "Remote ES/VTEP", "Seq #'s");
+}
+
+static char *zvni_print_neigh_flags(zebra_neigh_t *n, char *flags_buf,
+ uint32_t flags_buf_sz)
+{
+ snprintf(flags_buf, flags_buf_sz, "%s%s%s",
+ (n->flags & ZEBRA_NEIGH_ES_PEER_ACTIVE) ?
+ "P" : "",
+ (n->flags & ZEBRA_NEIGH_ES_PEER_PROXY) ?
+ "X" : "",
+ (n->flags & ZEBRA_NEIGH_LOCAL_INACTIVE) ?
+ "I" : "");
+
+ return flags_buf;
+}
+
/*
* Print neighbor hash entry - called for display of all neighbors.
*/
@@ -823,6 +919,7 @@ static void zvni_print_neigh_hash(struct hash_bucket *bucket, void *ctxt)
char buf2[INET6_ADDRSTRLEN];
struct neigh_walk_ctx *wctx = ctxt;
const char *state_str;
+ char flags_buf[6];
vty = wctx->vty;
json_vni = wctx->json;
@@ -839,9 +936,11 @@ static void zvni_print_neigh_hash(struct hash_bucket *bucket, void *ctxt)
return;
if (json_vni == NULL) {
- vty_out(vty, "%*s %-6s %-8s %-17s %u/%u\n",
+ vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %u/%u\n",
-wctx->addr_width, buf2, "local",
- state_str, buf1, n->loc_seq, n->rem_seq);
+ zvni_print_neigh_flags(n, flags_buf,
+ sizeof(flags_buf)), state_str,
+ buf1, "", n->loc_seq, n->rem_seq);
} else {
json_object_string_add(json_row, "type", "local");
json_object_string_add(json_row, "state", state_str);
@@ -871,19 +970,25 @@ static void zvni_print_neigh_hash(struct hash_bucket *bucket, void *ctxt)
if (json_vni == NULL) {
if ((wctx->flags & SHOW_REMOTE_NEIGH_FROM_VTEP) &&
(wctx->count == 0))
- vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n",
- -wctx->addr_width, "Neighbor", "Type",
- "State", "MAC", "Remote VTEP",
- "Seq #'s");
- vty_out(vty, "%*s %-6s %-8s %-17s %-21s %u/%u\n",
- -wctx->addr_width, buf2, "remote", state_str,
- buf1, inet_ntoa(n->r_vtep_ip), n->loc_seq, n->rem_seq);
+ zvni_print_neigh_hdr(vty, wctx);
+ vty_out(vty, "%*s %-6s %-5s %-8s %-17s %-30s %u/%u\n",
+ -wctx->addr_width, buf2, "remote",
+ zvni_print_neigh_flags(n, flags_buf,
+ sizeof(flags_buf)),
+ state_str, buf1,
+ n->mac->es ? n->mac->es->esi_str :
+ inet_ntoa(n->r_vtep_ip),
+ n->loc_seq, n->rem_seq);
} else {
json_object_string_add(json_row, "type", "remote");
json_object_string_add(json_row, "state", state_str);
json_object_string_add(json_row, "mac", buf1);
- json_object_string_add(json_row, "remoteVtep",
- inet_ntoa(n->r_vtep_ip));
+ if (n->mac->es)
+ json_object_string_add(json_row, "remoteEs",
+ n->mac->es->esi_str);
+ else
+ json_object_string_add(json_row, "remoteVtep",
+ inet_ntoa(n->r_vtep_ip));
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_DEF_GW))
json_object_boolean_true_add(json_row,
"defaultGateway");
@@ -986,11 +1091,9 @@ static void zvni_print_neigh_hash_all_vni(struct hash_bucket *bucket,
wctx.json = json_vni;
hash_iterate(zvni->neigh_table, zvni_find_neigh_addr_width, &wctx);
- if (json == NULL) {
- vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n",
- -wctx.addr_width, "IP", "Type",
- "State", "MAC", "Remote VTEP", "Seq #'s");
- }
+ if (json == NULL)
+ zvni_print_neigh_hdr(vty, &wctx);
+
if (print_dup)
hash_iterate(zvni->neigh_table, zvni_print_dad_neigh_hash,
&wctx);
@@ -1163,6 +1266,35 @@ static void zl3vni_print_rmac(zebra_mac_t *zrmac, struct vty *vty,
}
}
+static void
+zebra_vxlan_mac_get_access_info(zebra_mac_t *mac,
+ struct interface **ifpP, vlanid_t *vid)
+{
+ /* if the mac is associated with an ES we must get the access
+ * info from the ES
+ */
+ if (mac->es) {
+ struct zebra_if *zif;
+
+ /* get the access port from the es */
+ *ifpP = mac->es->zif ? mac->es->zif->ifp : NULL;
+ /* get the vlan from the VNI */
+ if (mac->zvni->vxlan_if) {
+ zif = mac->zvni->vxlan_if->info;
+ *vid = zif->l2info.vxl.access_vlan;
+ } else {
+ *vid = 0;
+ }
+ } else {
+ struct zebra_ns *zns;
+
+ *vid = mac->fwd_info.local.vid;
+ zns = zebra_ns_lookup(NS_DEFAULT);
+ *ifpP = if_lookup_by_index_per_ns(zns,
+ mac->fwd_info.local.ifindex);
+ }
+}
+
/*
* Print a specific MAC entry.
*/
@@ -1176,6 +1308,7 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json)
struct zebra_vrf *zvrf;
struct timeval detect_start_time = {0, 0};
char timebuf[MONOTIME_STRLEN];
+ char thread_buf[THREAD_TIMER_STRLEN];
zvrf = zebra_vrf_get_evpn();
if (!zvrf)
@@ -1188,21 +1321,21 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json)
json_object *json_mac = json_object_new_object();
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {
- struct zebra_ns *zns;
struct interface *ifp;
- ifindex_t ifindex;
+ vlanid_t vid;
- ifindex = mac->fwd_info.local.ifindex;
- zns = zebra_ns_lookup(mac->fwd_info.local.ns_id);
- ifp = if_lookup_by_index_per_ns(zns, ifindex);
- if (!ifp)
- return;
+ zebra_vxlan_mac_get_access_info(mac,
+ &ifp, &vid);
json_object_string_add(json_mac, "type", "local");
- json_object_string_add(json_mac, "intf", ifp->name);
- json_object_int_add(json_mac, "ifindex", ifindex);
- if (mac->fwd_info.local.vid)
+ if (ifp) {
+ json_object_string_add(json_mac,
+ "intf", ifp->name);
+ json_object_int_add(json_mac,
+ "ifindex", ifp->ifindex);
+ }
+ if (vid)
json_object_int_add(json_mac, "vlan",
- mac->fwd_info.local.vid);
+ vid);
} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {
json_object_string_add(json_mac, "type", "remote");
json_object_string_add(
@@ -1231,6 +1364,25 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json)
else
json_object_boolean_false_add(json_mac, "isDuplicate");
+ json_object_int_add(json_mac, "syncNeighCount", mac->sync_neigh_cnt);
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE))
+ json_object_boolean_true_add(json_mac,
+ "localInactive");
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY))
+ json_object_boolean_true_add(json_mac,
+ "peerProxy");
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE))
+ json_object_boolean_true_add(json_mac,
+ "peerActive");
+ if (mac->hold_timer)
+ json_object_string_add(json_mac, "peerActiveHold",
+ thread_timer_to_hhmmss(
+ thread_buf,
+ sizeof(thread_buf),
+ mac->hold_timer));
+ if (mac->es)
+ json_object_string_add(json_mac, "esi",
+ mac->es->esi_str);
/* print all the associated neigh */
if (!listcount(mac->neigh_list))
json_object_string_add(json_mac, "neighbors", "none");
@@ -1270,22 +1422,28 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json)
vty_out(vty, "MAC: %s\n", buf1);
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {
- struct zebra_ns *zns;
struct interface *ifp;
- ifindex_t ifindex;
+ vlanid_t vid;
- ifindex = mac->fwd_info.local.ifindex;
- zns = zebra_ns_lookup(mac->fwd_info.local.ns_id);
- ifp = if_lookup_by_index_per_ns(zns, ifindex);
- if (!ifp)
- return;
- vty_out(vty, " Intf: %s(%u)", ifp->name, ifindex);
- if (mac->fwd_info.local.vid)
- vty_out(vty, " VLAN: %u",
- mac->fwd_info.local.vid);
+ zebra_vxlan_mac_get_access_info(mac,
+ &ifp, &vid);
+
+ if (mac->es)
+ vty_out(vty, " ESI: %s\n", mac->es->esi_str);
+
+ if (ifp)
+ vty_out(vty, " Intf: %s(%u)",
+ ifp->name, ifp->ifindex);
+ else
+ vty_out(vty, " Intf: -");
+ vty_out(vty, " VLAN: %u", vid);
} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {
- vty_out(vty, " Remote VTEP: %s",
- inet_ntoa(mac->fwd_info.r_vtep_ip));
+ if (mac->es)
+ vty_out(vty, " Remote ES: %s",
+ mac->es->esi_str);
+ else
+ vty_out(vty, " Remote VTEP: %s",
+ inet_ntoa(mac->fwd_info.r_vtep_ip));
} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) {
vty_out(vty, " Auto Mac ");
}
@@ -1300,8 +1458,22 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json)
vty_out(vty, " Remote-gateway Mac ");
vty_out(vty, "\n");
- vty_out(vty, " Local Seq: %u Remote Seq: %u", mac->loc_seq,
- mac->rem_seq);
+ vty_out(vty, " Sync-info: neigh#: %u", mac->sync_neigh_cnt);
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE))
+ vty_out(vty, " local-inactive");
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY))
+ vty_out(vty, " peer-proxy");
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE))
+ vty_out(vty, " peer-active");
+ if (mac->hold_timer)
+ vty_out(vty, " (ht: %s)",
+ thread_timer_to_hhmmss(
+ thread_buf,
+ sizeof(thread_buf),
+ mac->hold_timer));
+ vty_out(vty, "\n");
+ vty_out(vty, " Local Seq: %u Remote Seq: %u",
+ mac->loc_seq, mac->rem_seq);
vty_out(vty, "\n");
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_DUPLICATE)) {
@@ -1338,6 +1510,22 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json)
}
}
+static char *zvni_print_mac_flags(zebra_mac_t *mac, char *flags_buf,
+ uint32_t flags_buf_sz)
+{
+ snprintf(flags_buf, flags_buf_sz, "%s%s%s%s",
+ mac->sync_neigh_cnt ?
+ "N" : "",
+ (mac->flags & ZEBRA_MAC_ES_PEER_ACTIVE) ?
+ "P" : "",
+ (mac->flags & ZEBRA_MAC_ES_PEER_PROXY) ?
+ "X" : "",
+ (mac->flags & ZEBRA_MAC_LOCAL_INACTIVE) ?
+ "I" : "");
+
+ return flags_buf;
+}
+
/*
* Print MAC hash entry - called for display of all MACs.
*/
@@ -1348,6 +1536,7 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt)
zebra_mac_t *mac;
char buf1[ETHER_ADDR_STRLEN];
struct mac_walk_ctx *wctx = ctxt;
+ char flags_buf[6];
vty = wctx->vty;
json_mac_hdr = wctx->json;
@@ -1359,26 +1548,24 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt)
json_mac = json_object_new_object();
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {
- struct zebra_ns *zns;
- ifindex_t ifindex;
struct interface *ifp;
vlanid_t vid;
if (wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP)
return;
- zns = zebra_ns_lookup(mac->fwd_info.local.ns_id);
- ifindex = mac->fwd_info.local.ifindex;
- ifp = if_lookup_by_index_per_ns(zns, ifindex);
- if (!ifp) // unexpected
- return;
- vid = mac->fwd_info.local.vid;
- if (json_mac_hdr == NULL)
- vty_out(vty, "%-17s %-6s %-21s", buf1, "local",
- ifp->name);
- else {
+ zebra_vxlan_mac_get_access_info(mac,
+ &ifp, &vid);
+ if (json_mac_hdr == NULL) {
+ vty_out(vty, "%-17s %-6s %-5s %-30s", buf1, "local",
+ zvni_print_mac_flags(mac, flags_buf,
+ sizeof(flags_buf)),
+ ifp ? ifp->name : "-");
+ } else {
json_object_string_add(json_mac, "type", "local");
- json_object_string_add(json_mac, "intf", ifp->name);
+ if (ifp)
+ json_object_string_add(json_mac,
+ "intf", ifp->name);
}
if (vid) {
if (json_mac_hdr == NULL)
@@ -1418,14 +1605,19 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt)
if (json_mac_hdr == NULL) {
if ((wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) &&
- (wctx->count == 0)) {
+ (wctx->count == 0)) {
vty_out(vty, "\nVNI %u\n\n", wctx->zvni->vni);
- vty_out(vty, "%-17s %-6s %-21s %-5s %s\n",
- "MAC", "Type", "Intf/Remote VTEP",
+ vty_out(vty, "%-17s %-6s %-5s%-30s %-5s %s\n",
+ "MAC", "Type", "Flags",
+ "Intf/Remote ES/VTEP",
"VLAN", "Seq #'s");
}
- vty_out(vty, "%-17s %-6s %-21s %-5s %u/%u\n", buf1,
- "remote", inet_ntoa(mac->fwd_info.r_vtep_ip),
+ vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %u/%u\n", buf1,
+ "remote",
+ zvni_print_mac_flags(mac, flags_buf,
+ sizeof(flags_buf)),
+ mac->es ? mac->es->esi_str :
+ inet_ntoa(mac->fwd_info.r_vtep_ip),
"", mac->loc_seq, mac->rem_seq);
} else {
json_object_string_add(json_mac, "type", "remote");
@@ -1540,8 +1732,11 @@ static void zvni_print_mac_hash_all_vni(struct hash_bucket *bucket, void *ctxt)
if (json == NULL) {
vty_out(vty, "\nVNI %u #MACs (local and remote) %u\n\n",
zvni->vni, num_macs);
- vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", "MAC",
- "Type", "Intf/Remote VTEP", "VLAN", "Seq #'s");
+ vty_out(vty,
+ "Flags: N=sync-neighs, I=local-inactive, P=peer-active, X=peer-proxy\n");
+ vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %s\n", "MAC",
+ "Type", "Flags", "Intf/Remote ES/VTEP",
+ "VLAN", "Seq #'s");
} else
json_object_int_add(json_vni, "numMacs", num_macs);
}
@@ -2106,13 +2301,16 @@ static void zvni_print_hash_detail(struct hash_bucket *bucket, void *data)
*/
static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,
struct ipaddr *ip, uint8_t flags,
- uint32_t seq, int state, uint16_t cmd)
+ uint32_t seq, int state,
+ struct zebra_evpn_es *es,
+ uint16_t cmd)
{
char buf[ETHER_ADDR_STRLEN];
char buf2[INET6_ADDRSTRLEN];
int ipa_len;
struct zserv *client = NULL;
struct stream *s = NULL;
+ esi_t *esi = es ? &es->esi : zero_esi;
client = zserv_find_client(ZEBRA_ROUTE_BGP, 0);
/* BGP may not be running. */
@@ -2140,6 +2338,7 @@ static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,
if (cmd == ZEBRA_MACIP_ADD) {
stream_putc(s, flags); /* sticky mac/gateway mac */
stream_putl(s, seq); /* sequence number */
+ stream_put(s, esi, sizeof(esi_t));
} else {
stream_putl(s, state); /* state - active/inactive */
}
@@ -2150,10 +2349,11 @@ static int zvni_macip_send_msg_to_client(vni_t vni, struct ethaddr *macaddr,
if (IS_ZEBRA_DEBUG_VXLAN)
zlog_debug(
- "Send MACIP %s flags 0x%x MAC %s IP %s seq %u L2-VNI %u to %s",
+ "Send MACIP %s f 0x%x MAC %s IP %s seq %u L2-VNI %u ESI %s to %s",
(cmd == ZEBRA_MACIP_ADD) ? "Add" : "Del", flags,
prefix_mac2str(macaddr, buf, sizeof(buf)),
ipaddr2str(ip, buf2, sizeof(buf2)), seq, vni,
+ es ? es->esi_str : "-",
zebra_route_string(client->proto));
if (cmd == ZEBRA_MACIP_ADD)
@@ -2222,26 +2422,26 @@ static void *zvni_neigh_alloc(void *p)
* Add neighbor entry.
*/
static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip,
- struct ethaddr *mac)
+ struct ethaddr *mac, zebra_mac_t *zmac,
+ uint32_t n_flags)
{
zebra_neigh_t tmp_n;
zebra_neigh_t *n = NULL;
- zebra_mac_t *zmac = NULL;
memset(&tmp_n, 0, sizeof(zebra_neigh_t));
memcpy(&tmp_n.ip, ip, sizeof(struct ipaddr));
n = hash_get(zvni->neigh_table, &tmp_n, zvni_neigh_alloc);
assert(n);
- memcpy(&n->emac, mac, ETH_ALEN);
n->state = ZEBRA_NEIGH_INACTIVE;
n->zvni = zvni;
n->dad_ip_auto_recovery_timer = NULL;
+ n->flags = n_flags;
- /* Associate the neigh to mac */
- zmac = zvni_mac_lookup(zvni, mac);
- if (zmac)
- listnode_add_sort(zmac->neigh_list, n);
+ if (!zmac)
+ zmac = zvni_mac_lookup(zvni, mac);
+ zebra_vxlan_local_neigh_ref_mac(n, mac,
+ zmac, false /* send_mac_update */);
return n;
}
@@ -2252,11 +2452,9 @@ static zebra_neigh_t *zvni_neigh_add(zebra_vni_t *zvni, struct ipaddr *ip,
static int zvni_neigh_del(zebra_vni_t *zvni, zebra_neigh_t *n)
{
zebra_neigh_t *tmp_n;
- zebra_mac_t *zmac = NULL;
- zmac = zvni_mac_lookup(zvni, &n->emac);
- if (zmac)
- listnode_delete(zmac->neigh_list, n);
+ if (n->mac)
+ listnode_delete(n->mac->neigh_list, n);
/* Cancel auto recovery */
THREAD_OFF(n->dad_ip_auto_recovery_timer);
@@ -2284,10 +2482,18 @@ static void zvni_neigh_del_hash_entry(struct hash_bucket *bucket, void *arg)
&& IPV4_ADDR_SAME(&n->r_vtep_ip, &wctx->r_vtep_ip))) {
if (wctx->upd_client && (n->flags & ZEBRA_NEIGH_LOCAL))
zvni_neigh_send_del_to_client(wctx->zvni->vni, &n->ip,
- &n->emac, 0, n->state);
-
- if (wctx->uninstall)
- zvni_neigh_uninstall(wctx->zvni, n);
+ &n->emac, n->flags, n->state,
+ false /*force*/);
+
+ if (wctx->uninstall) {
+ if (zebra_vxlan_neigh_is_static(n))
+ zebra_vxlan_sync_neigh_dp_install(n,
+ false /* set_inactive */,
+ true /* force_clear_static */,
+ __func__);
+ if ((n->flags & ZEBRA_NEIGH_REMOTE))
+ zvni_neigh_uninstall(wctx->zvni, n);
+ }
zvni_neigh_del(wctx->zvni, n);
}
@@ -2335,8 +2541,7 @@ static zebra_neigh_t *zvni_neigh_lookup(zebra_vni_t *zvni, struct ipaddr *ip)
* locally or undergoing any other change (such as sequence number).
*/
static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni,
- zebra_mac_t *zmac,
- bool seq_change)
+ zebra_mac_t *zmac, bool seq_change, bool es_change)
{
zebra_neigh_t *n = NULL;
struct listnode *node = NULL;
@@ -2358,7 +2563,8 @@ static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni,
*/
for (ALL_LIST_ELEMENTS_RO(zmac->neigh_list, node, n)) {
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) {
- if (IS_ZEBRA_NEIGH_INACTIVE(n) || seq_change) {
+ if (IS_ZEBRA_NEIGH_INACTIVE(n) || seq_change ||
+ es_change) {
ZEBRA_NEIGH_SET_ACTIVE(n);
n->loc_seq = zmac->loc_seq;
if (!(zvrf->dup_addr_detect &&
@@ -2366,7 +2572,7 @@ static void zvni_process_neigh_on_local_mac_change(zebra_vni_t *zvni,
ZEBRA_NEIGH_DUPLICATE)))
zvni_neigh_send_add_to_client(
zvni->vni, &n->ip, &n->emac,
- n->flags, n->loc_seq);
+ n->mac, n->flags, n->loc_seq);
}
}
}
@@ -2400,7 +2606,9 @@ static void zvni_process_neigh_on_local_mac_del(zebra_vni_t *zvni,
ZEBRA_NEIGH_SET_INACTIVE(n);
n->loc_seq = 0;
zvni_neigh_send_del_to_client(zvni->vni, &n->ip,
- &n->emac, 0, ZEBRA_NEIGH_ACTIVE);
+ &n->emac, n->flags,
+ ZEBRA_NEIGH_ACTIVE,
+ false /*force*/);
}
}
}
@@ -2431,7 +2639,9 @@ static void zvni_process_neigh_on_remote_mac_add(zebra_vni_t *zvni,
ZEBRA_NEIGH_SET_INACTIVE(n);
n->loc_seq = 0;
zvni_neigh_send_del_to_client(zvni->vni, &n->ip,
- &n->emac, 0, ZEBRA_NEIGH_ACTIVE);
+ &n->emac, n->flags,
+ ZEBRA_NEIGH_ACTIVE,
+ false /* force */);
}
}
}
@@ -2464,11 +2674,27 @@ static void zvni_probe_neigh_on_mac_add(zebra_vni_t *zvni, zebra_mac_t *zmac)
*/
static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip,
struct ethaddr *macaddr,
- uint8_t neigh_flags,
+ zebra_mac_t *zmac,
+ uint32_t neigh_flags,
uint32_t seq)
{
uint8_t flags = 0;
+ if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_LOCAL_INACTIVE)) {
+ /* host reachability has not been verified locally */
+
+ /* if no ES peer is claiming reachability we can't advertise
+ * the entry
+ */
+ if (!CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_ES_PEER_ACTIVE))
+ return 0;
+
+ /* ES peers are claiming reachability; we will
+ * advertise the entry but with a proxy flag
+ */
+ SET_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT);
+ }
+
if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_DEF_GW))
SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW);
/* Set router flag (R-bit) based on local neigh entry add */
@@ -2478,24 +2704,34 @@ static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip,
SET_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP);
return zvni_macip_send_msg_to_client(vni, macaddr, ip, flags,
- seq, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_ADD);
+ seq, ZEBRA_NEIGH_ACTIVE,
+ zmac ? zmac->es : NULL,
+ ZEBRA_MACIP_ADD);
}
/*
* Inform BGP about local neighbor deletion.
*/
static int zvni_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip,
- struct ethaddr *macaddr, uint8_t flags,
- int state)
+ struct ethaddr *macaddr, uint32_t flags,
+ int state, bool force)
{
+ if (!force) {
+ if (CHECK_FLAG(flags, ZEBRA_NEIGH_LOCAL_INACTIVE) &&
+ !CHECK_FLAG(flags, ZEBRA_NEIGH_ES_PEER_ACTIVE))
+ /* the neigh was not advertised - nothing to delete */
+ return 0;
+ }
+
return zvni_macip_send_msg_to_client(vni, macaddr, ip, flags,
- 0, state, ZEBRA_MACIP_DEL);
+ 0, state, NULL, ZEBRA_MACIP_DEL);
}
/*
* Install remote neighbor into the kernel.
*/
-static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n)
+static int zvni_rem_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n,
+ bool was_static)
{
struct zebra_if *zif;
struct zebra_l2info_vxlan *vxl;
@@ -2520,7 +2756,8 @@ static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n)
flags |= DPLANE_NTF_ROUTER;
ZEBRA_NEIGH_SET_ACTIVE(n);
- dplane_neigh_add(vlan_if, &n->ip, &n->emac, flags);
+ dplane_rem_neigh_add(vlan_if, &n->ip, &n->emac, flags,
+ was_static);
return ret;
}
@@ -2555,7 +2792,7 @@ static int zvni_neigh_uninstall(zebra_vni_t *zvni, zebra_neigh_t *n)
ZEBRA_NEIGH_SET_INACTIVE(n);
n->loc_seq = 0;
- dplane_neigh_delete(vlan_if, &n->ip);
+ dplane_rem_neigh_delete(vlan_if, &n->ip);
return 0;
}
@@ -2578,7 +2815,7 @@ static int zvni_neigh_probe(zebra_vni_t *zvni, zebra_neigh_t *n)
if (!vlan_if)
return -1;
- dplane_neigh_update(vlan_if, &n->ip, &n->emac);
+ dplane_rem_neigh_update(vlan_if, &n->ip, &n->emac);
return 0;
}
@@ -2594,7 +2831,7 @@ static void zvni_install_neigh_hash(struct hash_bucket *bucket, void *ctxt)
n = (zebra_neigh_t *)bucket->data;
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE))
- zvni_neigh_install(wctx->zvni, n);
+ zvni_rem_neigh_install(wctx->zvni, n, false /*was_static*/);
}
/* Get the VRR interface for SVI if any */
@@ -2729,12 +2966,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,
zebra_mac_t *mac = NULL;
struct zebra_if *zif = NULL;
struct zebra_l2info_vxlan *vxl = NULL;
- struct zebra_vrf *zvrf;
- ns_id_t local_ns_id = NS_DEFAULT;
- zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id);
- if (zvrf && zvrf->zns)
- local_ns_id = zvrf->zns->ns_id;
zif = zvni->vxlan_if->info;
if (!zif)
return -1;
@@ -2759,12 +2991,11 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,
SET_FLAG(mac->flags, ZEBRA_MAC_DEF_GW);
memset(&mac->fwd_info, 0, sizeof(mac->fwd_info));
mac->fwd_info.local.ifindex = ifp->ifindex;
- mac->fwd_info.local.ns_id = local_ns_id;
mac->fwd_info.local.vid = vxl->access_vlan;
n = zvni_neigh_lookup(zvni, ip);
if (!n) {
- n = zvni_neigh_add(zvni, ip, macaddr);
+ n = zvni_neigh_add(zvni, ip, macaddr, mac, 0);
if (!n) {
flog_err(
EC_ZEBRA_MAC_ADD_FAILED,
@@ -2798,7 +3029,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,
prefix_mac2str(macaddr, buf, sizeof(buf)),
ipaddr2str(ip, buf2, sizeof(buf2)), n->flags);
- zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr,
+ zvni_neigh_send_add_to_client(zvni->vni, ip, &n->emac, n->mac,
n->flags, n->loc_seq);
} else if (advertise_svi_macip_enabled(zvni)) {
@@ -2810,7 +3041,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni,
prefix_mac2str(macaddr, buf, sizeof(buf)),
ipaddr2str(ip, buf2, sizeof(buf2)), n->flags);
- zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr,
+ zvni_neigh_send_add_to_client(zvni->vni, ip, &n->emac, n->mac,
n->flags, n->loc_seq);
}
@@ -2859,7 +3090,8 @@ static int zvni_gw_macip_del(struct interface *ifp, zebra_vni_t *zvni,
/* Remove neighbor from BGP. */
zvni_neigh_send_del_to_client(zvni->vni, &n->ip, &n->emac,
- ZEBRA_MACIP_TYPE_GW, ZEBRA_NEIGH_ACTIVE);
+ n->flags, ZEBRA_NEIGH_ACTIVE,
+ false /*force*/);
/* Delete this neighbor entry. */
zvni_neigh_del(zvni, n);
@@ -3007,11 +3239,36 @@ static void zvni_svi_macip_del_for_vni_hash(struct hash_bucket *bucket,
return;
}
+static inline void zvni_local_neigh_update_log(const char *pfx,
+ zebra_neigh_t *n, bool is_router, bool local_inactive,
+ bool old_bgp_ready, bool new_bgp_ready,
+ bool inform_dataplane, bool inform_bgp, const char *sfx)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+
+ if (!IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ return;
+
+ zlog_debug("%s neigh vni %u ip %s mac %s f 0x%x%s%s%s%s%s%s %s",
+ pfx, n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf, sizeof(macbuf)),
+ n->flags, is_router ? " router" : "",
+ local_inactive ? " local-inactive" : "",
+ old_bgp_ready ? " old_bgp_ready" : "",
+ new_bgp_ready ? " new_bgp_ready" : "",
+ inform_dataplane ? " inform_dp" : "",
+ inform_bgp ? " inform_bgp" : "",
+ sfx);
+}
+
static int zvni_local_neigh_update(zebra_vni_t *zvni,
struct interface *ifp,
struct ipaddr *ip,
struct ethaddr *macaddr,
- bool is_router)
+ bool is_router,
+ bool local_inactive, bool dp_static)
{
char buf[ETHER_ADDR_STRLEN];
char buf2[INET6_ADDRSTRLEN];
@@ -3025,6 +3282,11 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
bool neigh_was_remote = false;
bool do_dad = false;
struct in_addr vtep_ip = {.s_addr = 0};
+ bool inform_dataplane = false;
+ bool created = false;
+ bool new_static = false;
+ bool old_bgp_ready = false;
+ bool new_bgp_ready;
/* Check if the MAC exists. */
zmac = zvni_mac_lookup(zvni, macaddr);
@@ -3072,7 +3334,7 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
n = zvni_neigh_lookup(zvni, ip);
if (!n) {
/* New neighbor - create */
- n = zvni_neigh_add(zvni, ip, macaddr);
+ n = zvni_neigh_add(zvni, ip, macaddr, zmac, 0);
if (!n) {
flog_err(
EC_ZEBRA_MAC_ADD_FAILED,
@@ -3085,17 +3347,28 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
/* Set "local" forwarding info. */
SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL);
n->ifindex = ifp->ifindex;
+ created = true;
} else {
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) {
bool mac_different;
bool cur_is_router;
+ bool old_local_inactive;
+
+ old_local_inactive = !!CHECK_FLAG(n->flags,
+ ZEBRA_NEIGH_LOCAL_INACTIVE);
+
+ old_bgp_ready =
+ zebra_vxlan_neigh_is_ready_for_bgp(n);
/* Note any changes and see if of interest to BGP. */
- mac_different = (memcmp(n->emac.octet,
- macaddr->octet, ETH_ALEN) != 0) ? 1 : 0;
+ mac_different = !!memcmp(&n->emac,
+ macaddr, ETH_ALEN);
cur_is_router = !!CHECK_FLAG(n->flags,
ZEBRA_NEIGH_ROUTER_FLAG);
- if (!mac_different && is_router == cur_is_router) {
+ new_static = zebra_vxlan_neigh_is_static(n);
+ if (!mac_different && is_router == cur_is_router &&
+ old_local_inactive == local_inactive &&
+ dp_static != new_static) {
if (IS_ZEBRA_DEBUG_VXLAN)
zlog_debug(
" Ignoring entry mac is the same and is_router == cur_is_router");
@@ -3103,7 +3376,9 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
return 0;
}
+ old_zmac = n->mac;
if (!mac_different) {
+ /* XXX - cleanup this code duplication */
bool is_neigh_freezed = false;
/* Only the router flag has changed. */
@@ -3114,6 +3389,15 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
UNSET_FLAG(n->flags,
ZEBRA_NEIGH_ROUTER_FLAG);
+ if (local_inactive)
+ SET_FLAG(n->flags,
+ ZEBRA_NEIGH_LOCAL_INACTIVE);
+ else
+ UNSET_FLAG(n->flags,
+ ZEBRA_NEIGH_LOCAL_INACTIVE);
+ new_bgp_ready =
+ zebra_vxlan_neigh_is_ready_for_bgp(n);
+
/* Neigh is in freeze state and freeze action
* is enabled, do not send update to client.
*/
@@ -3122,13 +3406,20 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
CHECK_FLAG(n->flags,
ZEBRA_NEIGH_DUPLICATE));
- if (IS_ZEBRA_NEIGH_ACTIVE(n) &&
- !is_neigh_freezed)
- return zvni_neigh_send_add_to_client(
- zvni->vni, ip, macaddr,
- n->flags, n->loc_seq);
- else {
- if (IS_ZEBRA_DEBUG_VXLAN)
+ zvni_local_neigh_update_log("local", n,
+ is_router, local_inactive,
+ old_bgp_ready, new_bgp_ready,
+ false, false, "flag-update");
+
+ /* if the neigh can no longer be advertised
+ * remove it from bgp
+ */
+ if (!is_neigh_freezed) {
+ zebra_vxlan_neigh_send_add_del_to_client(
+ n, old_bgp_ready, new_bgp_ready);
+ } else {
+ if (IS_ZEBRA_DEBUG_VXLAN &&
+ IS_ZEBRA_NEIGH_ACTIVE(n))
zlog_debug(
" Neighbor active and frozen");
}
@@ -3141,25 +3432,32 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
* We also need to update the MAC's sequence number
* in different situations.
*/
- if (IS_ZEBRA_NEIGH_ACTIVE(n))
+ if (old_bgp_ready) {
zvni_neigh_send_del_to_client(zvni->vni, &n->ip,
- &n->emac, 0, n->state);
- old_zmac = zvni_mac_lookup(zvni, &n->emac);
+ &n->emac, n->flags, n->state,
+ false /*force*/);
+ old_bgp_ready = false;
+ }
if (old_zmac) {
old_mac_seq = CHECK_FLAG(old_zmac->flags,
ZEBRA_MAC_REMOTE) ?
old_zmac->rem_seq : old_zmac->loc_seq;
neigh_mac_change = upd_mac_seq = true;
- listnode_delete(old_zmac->neigh_list, n);
- zvni_deref_ip2mac(zvni, old_zmac);
+ zebra_vxlan_local_neigh_deref_mac(n,
+ true /* send_mac_update */);
}
+ /* if mac changes abandon peer flags and tell
+ * dataplane to clear the static flag
+ */
+ if (zebra_vxlan_neigh_clear_sync_info(n))
+ inform_dataplane = true;
/* Update the forwarding info. */
n->ifindex = ifp->ifindex;
- memcpy(&n->emac, macaddr, ETH_ALEN);
/* Link to new MAC */
- listnode_add_sort(zmac->neigh_list, n);
+ zebra_vxlan_local_neigh_ref_mac(n, macaddr, zmac,
+ true /* send_mac_update */);
} else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) {
/*
* Neighbor has moved from remote to local. Its
@@ -3167,7 +3465,7 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
*/
if (memcmp(n->emac.octet, macaddr->octet,
ETH_ALEN) != 0) {
- old_zmac = zvni_mac_lookup(zvni, &n->emac);
+ old_zmac = n->mac;
if (old_zmac) {
old_mac_seq = CHECK_FLAG(
old_zmac->flags,
@@ -3175,14 +3473,13 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
old_zmac->rem_seq :
old_zmac->loc_seq;
neigh_mac_change = upd_mac_seq = true;
- listnode_delete(old_zmac->neigh_list,
- n);
- zvni_deref_ip2mac(zvni, old_zmac);
+ zebra_vxlan_local_neigh_deref_mac(n,
+ true /* send_update */);
}
/* Link to new MAC */
- memcpy(&n->emac, macaddr, ETH_ALEN);
- listnode_add_sort(zmac->neigh_list, n);
+ zebra_vxlan_local_neigh_ref_mac(n, macaddr,
+ zmac, true /*send_update*/);
}
/* Based on Mobility event Scenario-B from the
* draft, neigh's previous state was remote treat this
@@ -3211,12 +3508,27 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
MAX(seq1, seq2) : zmac->loc_seq;
}
+ if (local_inactive)
+ SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE);
+ else
+ UNSET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE);
+
/* Mark Router flag (R-bit) */
if (is_router)
SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG);
else
UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG);
+ /* if the dataplane thinks that this is a sync entry but
+ * zebra doesn't we need to re-concile the diff
+ * by re-installing the dataplane entry
+ */
+ if (dp_static) {
+ new_static = zebra_vxlan_neigh_is_static(n);
+ if (!new_static)
+ inform_dataplane = true;
+ }
+
/* Check old and/or new MAC detected as duplicate mark
* the neigh as duplicate
*/
@@ -3239,16 +3551,28 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
zebra_vxlan_dup_addr_detect_for_neigh(zvrf, n, vtep_ip, do_dad,
&neigh_on_hold, true);
+ if (inform_dataplane)
+ zebra_vxlan_sync_neigh_dp_install(n, false /* set_inactive */,
+ false /* force_clear_static */, __func__);
+
/* Before we program this in BGP, we need to check if MAC is locally
* learnt. If not, force neighbor to be inactive and reset its seq.
*/
if (!CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL)) {
+ zvni_local_neigh_update_log("local",
+ n, is_router, local_inactive,
+ false, false, inform_dataplane, false,
+ "auto-mac");
ZEBRA_NEIGH_SET_INACTIVE(n);
n->loc_seq = 0;
zmac->loc_seq = mac_new_seq;
return 0;
}
+ zvni_local_neigh_update_log("local",
+ n, is_router, local_inactive, false, false, inform_dataplane,
+ true, created ? "created" : "updated");
+
/* If the MAC's sequence number has changed, inform the MAC and all
* neighbors associated with the MAC to BGP, else just inform this
* neighbor.
@@ -3260,9 +3584,10 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
zvni->vni, zmac->loc_seq, mac_new_seq);
zmac->loc_seq = mac_new_seq;
if (zvni_mac_send_add_to_client(zvni->vni, macaddr,
- zmac->flags, zmac->loc_seq))
+ zmac->flags, zmac->loc_seq, zmac->es))
return -1;
- zvni_process_neigh_on_local_mac_change(zvni, zmac, 1);
+ zvni_process_neigh_on_local_mac_change(zvni, zmac, 1,
+ 0 /*es_change*/);
return 0;
}
@@ -3270,9 +3595,10 @@ static int zvni_local_neigh_update(zebra_vni_t *zvni,
if (!neigh_on_hold) {
ZEBRA_NEIGH_SET_ACTIVE(n);
-
- return zvni_neigh_send_add_to_client(zvni->vni, ip, macaddr,
- n->flags, n->loc_seq);
+ new_bgp_ready =
+ zebra_vxlan_neigh_is_ready_for_bgp(n);
+ zebra_vxlan_neigh_send_add_del_to_client(n,
+ old_bgp_ready, new_bgp_ready);
} else {
if (IS_ZEBRA_DEBUG_VXLAN)
zlog_debug(" Neighbor on hold not sending");
@@ -3300,7 +3626,7 @@ static int zvni_remote_neigh_update(zebra_vni_t *zvni,
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) {
#ifdef GNU_LINUX
if (state & NUD_STALE)
- zvni_neigh_install(zvni, n);
+ zvni_rem_neigh_install(zvni, n, false /*was_static*/);
#endif
} else {
/* We got a "remote" neighbor notification for an entry
@@ -3318,7 +3644,7 @@ static int zvni_remote_neigh_update(zebra_vni_t *zvni,
return -1;
}
- UNSET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL);
+ UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_LOCAL_FLAGS);
SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE);
ZEBRA_NEIGH_SET_ACTIVE(n);
n->r_vtep_ip = zmac->fwd_info.r_vtep_ip;
@@ -3389,6 +3715,15 @@ static zebra_mac_t *zvni_mac_add(zebra_vni_t *zvni, struct ethaddr *macaddr)
mac->neigh_list = list_new();
mac->neigh_list->cmp = neigh_list_cmp;
+ if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) {
+ char buf[ETHER_ADDR_STRLEN];
+
+ zlog_debug("%s: MAC %s flags 0x%x",
+ __func__,
+ prefix_mac2str(&mac->macaddr,
+ buf, sizeof(buf)),
+ mac->flags);
+ }
return mac;
}
@@ -3399,6 +3734,22 @@ static int zvni_mac_del(zebra_vni_t *zvni, zebra_mac_t *mac)
{
zebra_mac_t *tmp_mac;
+ if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC) {
+ char buf[ETHER_ADDR_STRLEN];
+
+ zlog_debug("%s: MAC %s flags 0x%x",
+ __func__,
+ prefix_mac2str(&mac->macaddr,
+ buf, sizeof(buf)),
+ mac->flags);
+ }
+
+ /* force de-ref any ES entry linked to the MAC */
+ zebra_evpn_es_mac_deref_entry(mac);
+
+ /* Cancel proxy hold timer */
+ zebra_vxlan_mac_stop_hold_timer(mac);
+
/* Cancel auto recovery */
THREAD_OFF(mac->dad_mac_auto_recovery_timer);
@@ -3454,10 +3805,18 @@ static void zvni_mac_del_hash_entry(struct hash_bucket *bucket, void *arg)
if (zvni_check_mac_del_from_db(wctx, mac)) {
if (wctx->upd_client && (mac->flags & ZEBRA_MAC_LOCAL)) {
zvni_mac_send_del_to_client(wctx->zvni->vni,
- &mac->macaddr);
+ &mac->macaddr, mac->flags, false);
+ }
+ if (wctx->uninstall) {
+ if (zebra_vxlan_mac_is_static(mac))
+ zebra_vxlan_sync_mac_dp_install(mac,
+ false /* set_inactive */,
+ true /* force_clear_static */,
+ __func__);
+
+ if (mac->flags & ZEBRA_MAC_REMOTE)
+ zvni_rem_mac_uninstall(wctx->zvni, mac);
}
- if (wctx->uninstall)
- zvni_mac_uninstall(wctx->zvni, mac);
zvni_mac_del(wctx->zvni, mac);
}
@@ -3504,88 +3863,51 @@ static zebra_mac_t *zvni_mac_lookup(zebra_vni_t *zvni, struct ethaddr *mac)
* Inform BGP about local MAC addition.
*/
static int zvni_mac_send_add_to_client(vni_t vni, struct ethaddr *macaddr,
- uint8_t mac_flags, uint32_t seq)
+ uint32_t mac_flags, uint32_t seq, struct zebra_evpn_es *es)
{
uint8_t flags = 0;
+ if (CHECK_FLAG(mac_flags, ZEBRA_MAC_LOCAL_INACTIVE)) {
+ /* host reachability has not been verified locally */
+
+ /* if no ES peer is claiming reachability we can't advertise the
+ * entry
+ */
+ if (!CHECK_FLAG(mac_flags, ZEBRA_MAC_ES_PEER_ACTIVE))
+ return 0;
+
+ /* ES peers are claiming reachability; we will
+ * advertise the entry but with a proxy flag
+ */
+ SET_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT);
+ }
+
if (CHECK_FLAG(mac_flags, ZEBRA_MAC_STICKY))
SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY);
if (CHECK_FLAG(mac_flags, ZEBRA_MAC_DEF_GW))
SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW);
return zvni_macip_send_msg_to_client(vni, macaddr, NULL, flags,
- seq, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_ADD);
+ seq, ZEBRA_NEIGH_ACTIVE, es,
+ ZEBRA_MACIP_ADD);
}
/*
* Inform BGP about local MAC deletion.
*/
-static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr)
+static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr,
+ uint32_t flags, bool force)
{
- return zvni_macip_send_msg_to_client(vni, macaddr, NULL, 0 /* flags */,
- 0 /* seq */, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_DEL);
-}
-
-struct zvni_from_svi_param {
- struct interface *br_if;
- struct interface *svi_if;
- struct zebra_if *zif;
- uint8_t bridge_vlan_aware;
- vlanid_t vid;
-};
-
-static int zvni_map_vlan_ns(struct ns *ns,
- void *_in_param,
- void **_p_zvni)
-{
- struct zebra_ns *zns = ns->info;
- struct route_node *rn;
- struct interface *br_if;
- zebra_vni_t **p_zvni = (zebra_vni_t **)_p_zvni;
- zebra_vni_t *zvni;
- struct interface *tmp_if = NULL;
- struct zebra_if *zif;
- struct zebra_l2info_vxlan *vxl = NULL;
- struct zvni_from_svi_param *in_param =
- (struct zvni_from_svi_param *)_in_param;
- int found = 0;
-
- if (!in_param)
- return NS_WALK_STOP;
- br_if = in_param->br_if;
- zif = in_param->zif;
- assert(zif);
- assert(br_if);
-
- /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */
- /* TODO: Optimize with a hash. */
- for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) {
- tmp_if = (struct interface *)rn->info;
- if (!tmp_if)
- continue;
- zif = tmp_if->info;
- if (!zif || zif->zif_type != ZEBRA_IF_VXLAN)
- continue;
- if (!if_is_operative(tmp_if))
- continue;
- vxl = &zif->l2info.vxl;
-
- if (zif->brslave_info.br_if != br_if)
- continue;
-
- if (!in_param->bridge_vlan_aware
- || vxl->access_vlan == in_param->vid) {
- found = 1;
- break;
- }
+ if (!force) {
+ if (CHECK_FLAG(flags, ZEBRA_MAC_LOCAL_INACTIVE) &&
+ !CHECK_FLAG(flags, ZEBRA_MAC_ES_PEER_ACTIVE))
+ /* the host was not advertised - nothing to delete */
+ return 0;
}
- if (!found)
- return NS_WALK_CONTINUE;
- zvni = zvni_lookup(vxl->vni);
- if (p_zvni)
- *p_zvni = zvni;
- return NS_WALK_STOP;
+ return zvni_macip_send_msg_to_client(vni, macaddr, NULL, 0 /* flags */,
+ 0 /* seq */, ZEBRA_NEIGH_ACTIVE, NULL,
+ ZEBRA_MACIP_DEL);
}
/*
@@ -3595,51 +3917,25 @@ static int zvni_map_vlan_ns(struct ns *ns,
static zebra_vni_t *zvni_map_vlan(struct interface *ifp,
struct interface *br_if, vlanid_t vid)
{
- struct zebra_if *zif;
- struct zebra_l2info_bridge *br;
- zebra_vni_t **p_zvni;
- zebra_vni_t *zvni = NULL;
- struct zvni_from_svi_param in_param;
-
- /* Determine if bridge is VLAN-aware or not */
- zif = br_if->info;
- assert(zif);
- br = &zif->l2info.br;
- in_param.bridge_vlan_aware = br->vlan_aware;
- in_param.vid = vid;
- in_param.br_if = br_if;
- in_param.zif = zif;
- p_zvni = &zvni;
-
- ns_walk_func(zvni_map_vlan_ns,
- (void *)&in_param,
- (void **)p_zvni);
- return zvni;
-}
-
-static int zvni_from_svi_ns(struct ns *ns,
- void *_in_param,
- void **_p_zvni)
-{
- struct zebra_ns *zns = ns->info;
+ struct zebra_ns *zns;
struct route_node *rn;
- struct interface *br_if;
- zebra_vni_t **p_zvni = (zebra_vni_t **)_p_zvni;
- zebra_vni_t *zvni;
struct interface *tmp_if = NULL;
struct zebra_if *zif;
+ struct zebra_l2info_bridge *br;
struct zebra_l2info_vxlan *vxl = NULL;
- struct zvni_from_svi_param *in_param =
- (struct zvni_from_svi_param *)_in_param;
+ uint8_t bridge_vlan_aware;
+ zebra_vni_t *zvni;
int found = 0;
- if (!in_param)
- return NS_WALK_STOP;
- br_if = in_param->br_if;
- zif = in_param->zif;
+ /* Determine if bridge is VLAN-aware or not */
+ zif = br_if->info;
assert(zif);
+ br = &zif->l2info.br;
+ bridge_vlan_aware = br->vlan_aware;
+ /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */
/* TODO: Optimize with a hash. */
+ zns = zebra_ns_lookup(NS_DEFAULT);
for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) {
tmp_if = (struct interface *)rn->info;
if (!tmp_if)
@@ -3654,20 +3950,17 @@ static int zvni_from_svi_ns(struct ns *ns,
if (zif->brslave_info.br_if != br_if)
continue;
- if (!in_param->bridge_vlan_aware
- || vxl->access_vlan == !in_param->vid) {
+ if (!bridge_vlan_aware || vxl->access_vlan == vid) {
found = 1;
break;
}
}
if (!found)
- return NS_WALK_CONTINUE;
+ return NULL;
zvni = zvni_lookup(vxl->vni);
- if (p_zvni)
- *p_zvni = zvni;
- return NS_WALK_STOP;
+ return zvni;
}
/*
@@ -3677,11 +3970,16 @@ static int zvni_from_svi_ns(struct ns *ns,
static zebra_vni_t *zvni_from_svi(struct interface *ifp,
struct interface *br_if)
{
- struct zebra_l2info_bridge *br;
- zebra_vni_t *zvni = NULL;
- zebra_vni_t **p_zvni;
+ struct zebra_ns *zns;
+ struct route_node *rn;
+ struct interface *tmp_if = NULL;
struct zebra_if *zif;
- struct zvni_from_svi_param in_param;
+ struct zebra_l2info_bridge *br;
+ struct zebra_l2info_vxlan *vxl = NULL;
+ uint8_t bridge_vlan_aware;
+ vlanid_t vid = 0;
+ zebra_vni_t *zvni;
+ int found = 0;
if (!br_if)
return NULL;
@@ -3694,10 +3992,8 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp,
zif = br_if->info;
assert(zif);
br = &zif->l2info.br;
- in_param.bridge_vlan_aware = br->vlan_aware;
- in_param.vid = 0;
-
- if (in_param.bridge_vlan_aware) {
+ bridge_vlan_aware = br->vlan_aware;
+ if (bridge_vlan_aware) {
struct zebra_l2info_vlan *vl;
if (!IS_ZEBRA_IF_VLAN(ifp))
@@ -3706,54 +4002,37 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp,
zif = ifp->info;
assert(zif);
vl = &zif->l2info.vl;
- in_param.vid = vl->vid;
+ vid = vl->vid;
}
- in_param.br_if = br_if;
- in_param.zif = zif;
- p_zvni = &zvni;
/* See if this interface (or interface plus VLAN Id) maps to a VxLAN */
- ns_walk_func(zvni_from_svi_ns,
- (void *)&in_param,
- (void **)p_zvni);
- return zvni;
-}
-
-static int zvni_map_to_svi_ns(struct ns *ns,
- void *_in_param,
- void **_p_ifp)
-{
- struct zebra_ns *zns = ns->info;
- struct route_node *rn;
- struct zvni_from_svi_param *in_param =
- (struct zvni_from_svi_param *)_in_param;
- struct zebra_l2info_vlan *vl;
- struct interface *tmp_if = NULL;
- struct interface **p_ifp = (struct interface **)_p_ifp;
- struct zebra_if *zif;
-
- if (!in_param)
- return NS_WALK_STOP;
-
/* TODO: Optimize with a hash. */
+ zns = zebra_ns_lookup(NS_DEFAULT);
for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) {
tmp_if = (struct interface *)rn->info;
- /* Check oper status of the SVI. */
- if (!tmp_if || !if_is_operative(tmp_if))
+ if (!tmp_if)
continue;
zif = tmp_if->info;
- if (!zif || zif->zif_type != ZEBRA_IF_VLAN
- || zif->link != in_param->br_if)
+ if (!zif || zif->zif_type != ZEBRA_IF_VXLAN)
+ continue;
+ if (!if_is_operative(tmp_if))
+ continue;
+ vxl = &zif->l2info.vxl;
+
+ if (zif->brslave_info.br_if != br_if)
continue;
- vl = (struct zebra_l2info_vlan *)&zif->l2info.vl;
- if (vl->vid == in_param->vid) {
- if (p_ifp)
- *p_ifp = tmp_if;
- return NS_WALK_STOP;
+ if (!bridge_vlan_aware || vxl->access_vlan == vid) {
+ found = 1;
+ break;
}
}
- return NS_WALK_CONTINUE;
+
+ if (!found)
+ return NULL;
+
+ zvni = zvni_lookup(vxl->vni);
+ return zvni;
}
/* Map to SVI on bridge corresponding to specified VLAN. This can be one
@@ -3765,11 +4044,15 @@ static int zvni_map_to_svi_ns(struct ns *ns,
*/
static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if)
{
+ struct zebra_ns *zns;
+ struct route_node *rn;
struct interface *tmp_if = NULL;
struct zebra_if *zif;
struct zebra_l2info_bridge *br;
- struct zvni_from_svi_param in_param;
- struct interface **p_ifp;
+ struct zebra_l2info_vlan *vl;
+ uint8_t bridge_vlan_aware;
+ int found = 0;
+
/* Defensive check, caller expected to invoke only with valid bridge. */
if (!br_if)
return NULL;
@@ -3778,56 +4061,33 @@ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if)
zif = br_if->info;
assert(zif);
br = &zif->l2info.br;
- in_param.bridge_vlan_aware = br->vlan_aware;
+ bridge_vlan_aware = br->vlan_aware;
+
/* Check oper status of the SVI. */
- if (!in_param.bridge_vlan_aware)
+ if (!bridge_vlan_aware)
return if_is_operative(br_if) ? br_if : NULL;
- in_param.vid = vid;
- in_param.br_if = br_if;
- in_param.zif = NULL;
- p_ifp = &tmp_if;
- /* Identify corresponding VLAN interface. */
- ns_walk_func(zvni_map_to_svi_ns,
- (void *)&in_param,
- (void **)p_ifp);
- return tmp_if;
-}
-
-static int zvni_map_to_macvlan_ns(struct ns *ns,
- void *_in_param,
- void **_p_ifp)
-{
- struct zebra_ns *zns = ns->info;
- struct zvni_from_svi_param *in_param =
- (struct zvni_from_svi_param *)_in_param;
- struct interface **p_ifp = (struct interface **)_p_ifp;
- struct route_node *rn;
- struct interface *tmp_if = NULL;
- struct zebra_if *zif;
-
- if (!in_param)
- return NS_WALK_STOP;
-
/* Identify corresponding VLAN interface. */
+ /* TODO: Optimize with a hash. */
+ zns = zebra_ns_lookup(NS_DEFAULT);
for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) {
tmp_if = (struct interface *)rn->info;
/* Check oper status of the SVI. */
if (!tmp_if || !if_is_operative(tmp_if))
continue;
zif = tmp_if->info;
-
- if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN)
+ if (!zif || zif->zif_type != ZEBRA_IF_VLAN
+ || zif->link != br_if)
continue;
+ vl = &zif->l2info.vl;
- if (zif->link == in_param->svi_if) {
- if (p_ifp)
- *p_ifp = tmp_if;
- return NS_WALK_STOP;
+ if (vl->vid == vid) {
+ found = 1;
+ break;
}
}
- return NS_WALK_CONTINUE;
+ return found ? tmp_if : NULL;
}
/* Map to MAC-VLAN interface corresponding to specified SVI interface.
@@ -3835,10 +4095,11 @@ static int zvni_map_to_macvlan_ns(struct ns *ns,
static struct interface *zvni_map_to_macvlan(struct interface *br_if,
struct interface *svi_if)
{
+ struct zebra_ns *zns;
+ struct route_node *rn;
struct interface *tmp_if = NULL;
struct zebra_if *zif;
- struct interface **p_ifp;
- struct zvni_from_svi_param in_param;
+ int found = 0;
/* Defensive check, caller expected to invoke only with valid bridge. */
if (!br_if)
@@ -3853,23 +4114,33 @@ static struct interface *zvni_map_to_macvlan(struct interface *br_if,
zif = br_if->info;
assert(zif);
- in_param.vid = 0;
- in_param.br_if = br_if;
- in_param.zif = NULL;
- in_param.svi_if = svi_if;
- p_ifp = &tmp_if;
-
/* Identify corresponding VLAN interface. */
- ns_walk_func(zvni_map_to_macvlan_ns,
- (void *)&in_param,
- (void **)p_ifp);
- return tmp_if;
+ zns = zebra_ns_lookup(NS_DEFAULT);
+ for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) {
+ tmp_if = (struct interface *)rn->info;
+ /* Check oper status of the SVI. */
+ if (!tmp_if || !if_is_operative(tmp_if))
+ continue;
+ zif = tmp_if->info;
+
+ if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN)
+ continue;
+
+ if (zif->link == svi_if) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found ? tmp_if : NULL;
}
+
/*
* Install remote MAC into the forwarding plane.
*/
-static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)
+static int zvni_rem_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac,
+ bool was_static)
{
const struct zebra_if *zif, *br_zif;
const struct zebra_l2info_vxlan *vxl;
@@ -3877,6 +4148,8 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)
enum zebra_dplane_result res;
const struct interface *br_ifp;
vlanid_t vid;
+ uint32_t nhg_id;
+ struct in_addr vtep_ip;
if (!(mac->flags & ZEBRA_MAC_REMOTE))
return 0;
@@ -3894,6 +4167,19 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)
sticky = !!CHECK_FLAG(mac->flags,
(ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW));
+ /* If nexthop group for the FDB entry is inactive (not programmed in
+ * the dataplane) the MAC entry cannot be installed
+ */
+ if (mac->es) {
+ if (!(mac->es->flags & ZEBRA_EVPNES_NHG_ACTIVE))
+ return -1;
+ nhg_id = mac->es->nhg_id;
+ vtep_ip.s_addr = 0;
+ } else {
+ nhg_id = 0;
+ vtep_ip = mac->fwd_info.r_vtep_ip;
+ }
+
br_zif = (const struct zebra_if *)(br_ifp->info);
if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
@@ -3901,8 +4187,9 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)
else
vid = 0;
- res = dplane_mac_add(zvni->vxlan_if, br_ifp, vid,
- &mac->macaddr, mac->fwd_info.r_vtep_ip, sticky);
+ res = dplane_rem_mac_add(zvni->vxlan_if, br_ifp, vid,
+ &mac->macaddr, vtep_ip, sticky,
+ nhg_id, was_static);
if (res != ZEBRA_DPLANE_REQUEST_FAILURE)
return 0;
else
@@ -3912,7 +4199,7 @@ static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)
/*
* Uninstall remote MAC from the forwarding plane.
*/
-static int zvni_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac)
+static int zvni_rem_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac)
{
const struct zebra_if *zif, *br_zif;
const struct zebra_l2info_vxlan *vxl;
@@ -3951,7 +4238,7 @@ static int zvni_mac_uninstall(zebra_vni_t *zvni, zebra_mac_t *mac)
ifp = zvni->vxlan_if;
vtep_ip = mac->fwd_info.r_vtep_ip;
- res = dplane_mac_del(ifp, br_ifp, vid, &mac->macaddr, vtep_ip);
+ res = dplane_rem_mac_del(ifp, br_ifp, vid, &mac->macaddr, vtep_ip);
if (res != ZEBRA_DPLANE_REQUEST_FAILURE)
return 0;
else
@@ -3969,7 +4256,7 @@ static void zvni_install_mac_hash(struct hash_bucket *bucket, void *ctxt)
mac = (zebra_mac_t *)bucket->data;
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE))
- zvni_mac_install(wctx->zvni, mac);
+ zvni_rem_mac_install(wctx->zvni, mac, false);
}
/*
@@ -4003,7 +4290,8 @@ static void zvni_deref_ip2mac(zebra_vni_t *zvni, zebra_mac_t *mac)
*/
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) &&
remote_neigh_count(mac) == 0) {
- zvni_mac_uninstall(zvni, mac);
+ zvni_rem_mac_uninstall(zvni, mac);
+ zebra_evpn_es_mac_deref_entry(mac);
UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE);
}
@@ -4018,7 +4306,6 @@ static void zvni_deref_ip2mac(zebra_vni_t *zvni, zebra_mac_t *mac)
static void zvni_read_mac_neigh(zebra_vni_t *zvni, struct interface *ifp)
{
struct zebra_ns *zns;
- struct zebra_vrf *zvrf;
struct zebra_if *zif;
struct interface *vlan_if;
struct zebra_l2info_vxlan *vxl;
@@ -4026,10 +4313,7 @@ static void zvni_read_mac_neigh(zebra_vni_t *zvni, struct interface *ifp)
zif = ifp->info;
vxl = &zif->l2info.vxl;
- zvrf = zebra_vrf_lookup_by_id(zvni->vrf_id);
- if (!zvrf || !zvrf->zns)
- return;
- zns = zvrf->zns;
+ zns = zebra_ns_lookup(NS_DEFAULT);
if (IS_ZEBRA_DEBUG_VXLAN)
zlog_debug(
@@ -4074,7 +4358,7 @@ static bool vni_hash_cmp(const void *p1, const void *p2)
return (zvni1->vni == zvni2->vni);
}
-static int vni_list_cmp(void *p1, void *p2)
+int vni_list_cmp(void *p1, void *p2)
{
const zebra_vni_t *zvni1 = p1;
const zebra_vni_t *zvni2 = p2;
@@ -4100,7 +4384,7 @@ static void *zvni_alloc(void *p)
/*
* Look up VNI hash entry.
*/
-static zebra_vni_t *zvni_lookup(vni_t vni)
+zebra_vni_t *zvni_lookup(vni_t vni)
{
struct zebra_vrf *zvrf;
zebra_vni_t tmp_vni;
@@ -4131,6 +4415,8 @@ static zebra_vni_t *zvni_add(vni_t vni)
zvni = hash_get(zvrf->vni_table, &tmp_zvni, zvni_alloc);
assert(zvni);
+ zebra_evpn_vni_es_init(zvni);
+
/* Create hash table for MAC */
zvni->mac_table =
hash_create(mac_hash_keymake, mac_cmp, "Zebra VNI MAC Table");
@@ -4142,6 +4428,30 @@ static zebra_vni_t *zvni_add(vni_t vni)
return zvni;
}
+/* vni<=>vxlan_zif association */
+static void zvni_vxlan_if_set(zebra_vni_t *zvni, struct interface *ifp,
+ bool set)
+{
+ struct zebra_if *zif;
+
+ if (set) {
+ if (zvni->vxlan_if == ifp)
+ return;
+ zvni->vxlan_if = ifp;
+ } else {
+ if (!zvni->vxlan_if)
+ return;
+ zvni->vxlan_if = NULL;
+ }
+
+ if (ifp)
+ zif = ifp->info;
+ else
+ zif = NULL;
+
+ zebra_evpn_vxl_vni_set(zif, zvni, set);
+}
+
/*
* Delete VNI hash entry.
*/
@@ -4153,7 +4463,7 @@ static int zvni_del(zebra_vni_t *zvni)
zvrf = zebra_vrf_get_evpn();
assert(zvrf);
- zvni->vxlan_if = NULL;
+ zvni_vxlan_if_set(zvni, zvni->vxlan_if, false /* set */);
/* Remove references to the BUM mcast grp */
zebra_vxlan_sg_deref(zvni->local_vtep_ip, zvni->mcast_grp);
@@ -4166,6 +4476,8 @@ static int zvni_del(zebra_vni_t *zvni)
hash_free(zvni->mac_table);
zvni->mac_table = NULL;
+ zebra_evpn_vni_es_cleanup(zvni);
+
/* Free the VNI hash entry and allocated memory. */
tmp_zvni = hash_release(zvrf->vni_table, zvni);
XFREE(MTYPE_ZVNI, tmp_zvni);
@@ -4180,6 +4492,7 @@ static int zvni_send_add_to_client(zebra_vni_t *zvni)
{
struct zserv *client;
struct stream *s;
+ int rc;
client = zserv_find_client(ZEBRA_ROUTE_BGP, 0);
/* BGP may not be running. */
@@ -4204,13 +4517,22 @@ static int zvni_send_add_to_client(zebra_vni_t *zvni)
zebra_route_string(client->proto));
client->vniadd_cnt++;
- return zserv_send_message(client, s);
+ rc = zserv_send_message(client, s);
+
+ if (!(zvni->flags & ZVNI_READY_FOR_BGP)) {
+ zvni->flags |= ZVNI_READY_FOR_BGP;
+ /* once the VNI is sent the ES-EVIs can also be replayed
+ * to BGP
+ */
+ zebra_evpn_vni_update_all_es(zvni);
+ }
+ return rc;
}
/*
* Inform BGP about local VNI deletion.
*/
-static int zvni_send_del_to_client(vni_t vni)
+static int zvni_send_del_to_client(zebra_vni_t *zvni)
{
struct zserv *client;
struct stream *s;
@@ -4220,38 +4542,41 @@ static int zvni_send_del_to_client(vni_t vni)
if (!client)
return 0;
+ if (zvni->flags & ZVNI_READY_FOR_BGP) {
+ zvni->flags &= ~ZVNI_READY_FOR_BGP;
+ /* the ES-EVIs must be removed from BGP before the VNI is */
+ zebra_evpn_vni_update_all_es(zvni);
+ }
+
s = stream_new(ZEBRA_MAX_PACKET_SIZ);
stream_reset(s);
zclient_create_header(s, ZEBRA_VNI_DEL, zebra_vrf_get_evpn_id());
- stream_putl(s, vni);
+ stream_putl(s, zvni->vni);
/* Write packet size. */
stream_putw_at(s, 0, stream_get_endp(s));
if (IS_ZEBRA_DEBUG_VXLAN)
- zlog_debug("Send VNI_DEL %u to %s", vni,
+ zlog_debug("Send VNI_DEL %u to %s", zvni->vni,
zebra_route_string(client->proto));
client->vnidel_cnt++;
return zserv_send_message(client, s);
}
-static int zvni_build_hash_table_ns(struct ns *ns,
- void *param_in __attribute__((unused)),
- void **param_out __attribute__((unused)))
+/*
+ * Build the VNI hash table by going over the VxLAN interfaces. This
+ * is called when EVPN (advertise-all-vni) is enabled.
+ */
+static void zvni_build_hash_table(void)
{
- struct zebra_ns *zns = ns->info;
+ struct zebra_ns *zns;
struct route_node *rn;
struct interface *ifp;
- struct zebra_vrf *zvrf;
-
- zvrf = zebra_vrf_get_evpn();
-
- if (!zvrf)
- return NS_WALK_STOP;
/* Walk VxLAN interfaces and create VNI hash. */
+ zns = zebra_ns_lookup(NS_DEFAULT);
for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) {
vni_t vni;
zebra_vni_t *zvni = NULL;
@@ -4268,14 +4593,7 @@ static int zvni_build_hash_table_ns(struct ns *ns,
vxl = &zif->l2info.vxl;
vni = vxl->vni;
- /* link of VXLAN interface should be in zebra_evpn_vrf */
- if (zvrf->zns->ns_id != vxl->link_nsid) {
- if (IS_ZEBRA_DEBUG_VXLAN)
- zlog_debug(
- "Intf %s(%u) VNI %u, link not in same namespace than BGP EVPN core instance ",
- ifp->name, ifp->ifindex, vni);
- continue;
- }
+
/* L3-VNI and L2-VNI are handled seperately */
zl3vni = zl3vni_lookup(vni);
if (zl3vni) {
@@ -4344,7 +4662,7 @@ static int zvni_build_hash_table_ns(struct ns *ns,
zlog_debug(
"Failed to add VNI hash, IF %s(%u) L2-VNI %u",
ifp->name, ifp->ifindex, vni);
- return NS_WALK_CONTINUE;
+ return;
}
if (zvni->local_vtep_ip.s_addr !=
@@ -4358,8 +4676,12 @@ static int zvni_build_hash_table_ns(struct ns *ns,
vxl->mcast_grp);
zvni->local_vtep_ip = vxl->vtep_ip;
zvni->mcast_grp = vxl->mcast_grp;
+ /* on local vtep-ip check if ES
+ * orig-ip needs to be updated
+ */
+ zebra_evpn_es_set_base_vni(zvni);
}
- zvni->vxlan_if = ifp;
+ zvni_vxlan_if_set(zvni, ifp, true /* set */);
vlan_if = zvni_map_to_svi(vxl->access_vlan,
zif->brslave_info.br_if);
if (vlan_if) {
@@ -4381,19 +4703,6 @@ static int zvni_build_hash_table_ns(struct ns *ns,
}
}
}
- return NS_WALK_CONTINUE;
-}
-
-/*
- * Build the VNI hash table by going over the VxLAN interfaces. This
- * is called when EVPN (advertise-all-vni) is enabled.
- */
-
-static void zvni_build_hash_table(void)
-{
- ns_walk_func(zvni_build_hash_table_ns,
- (void *)NULL,
- (void **)NULL);
}
/*
@@ -4719,8 +5028,9 @@ static int zl3vni_rmac_install(zebra_l3vni_t *zl3vni, zebra_mac_t *zrmac)
else
vid = 0;
- res = dplane_mac_add(zl3vni->vxlan_if, br_ifp, vid,
- &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, 0);
+ res = dplane_rem_mac_add(zl3vni->vxlan_if, br_ifp, vid,
+ &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, 0, 0,
+ false /*was_static*/);
if (res != ZEBRA_DPLANE_REQUEST_FAILURE)
return 0;
else
@@ -4769,7 +5079,7 @@ static int zl3vni_rmac_uninstall(zebra_l3vni_t *zl3vni, zebra_mac_t *zrmac)
else
vid = 0;
- res = dplane_mac_del(zl3vni->vxlan_if, br_ifp, vid,
+ res = dplane_rem_mac_del(zl3vni->vxlan_if, br_ifp, vid,
&zrmac->macaddr, zrmac->fwd_info.r_vtep_ip);
if (res != ZEBRA_DPLANE_REQUEST_FAILURE)
return 0;
@@ -4948,7 +5258,8 @@ static int zl3vni_nh_install(zebra_l3vni_t *zl3vni, zebra_neigh_t *n)
if (n->flags & ZEBRA_NEIGH_ROUTER_FLAG)
flags |= DPLANE_NTF_ROUTER;
- dplane_neigh_add(zl3vni->svi_if, &n->ip, &n->emac, flags);
+ dplane_rem_neigh_add(zl3vni->svi_if, &n->ip, &n->emac, flags,
+ false /*was_static*/);
return ret;
}
@@ -4965,7 +5276,7 @@ static int zl3vni_nh_uninstall(zebra_l3vni_t *zl3vni, zebra_neigh_t *n)
if (!zl3vni->svi_if || !if_is_operative(zl3vni->svi_if))
return 0;
- dplane_neigh_delete(zl3vni->svi_if, &n->ip);
+ dplane_rem_neigh_delete(zl3vni->svi_if, &n->ip);
return 0;
}
@@ -5178,22 +5489,14 @@ static int zl3vni_del(zebra_l3vni_t *zl3vni)
return 0;
}
-static int zl3vni_map_to_vxlan_if_ns(struct ns *ns,
- void *_zl3vni,
- void **_pifp)
+struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni)
{
- struct zebra_ns *zns = ns->info;
- zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)_zl3vni;
+ struct zebra_ns *zns = NULL;
struct route_node *rn = NULL;
struct interface *ifp = NULL;
- struct zebra_vrf *zvrf;
-
- zvrf = zebra_vrf_get_evpn();
-
- if (!zvrf)
- return NS_WALK_STOP;
/* loop through all vxlan-interface */
+ zns = zebra_ns_lookup(NS_DEFAULT);
for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) {
struct zebra_if *zif = NULL;
@@ -5208,38 +5511,13 @@ static int zl3vni_map_to_vxlan_if_ns(struct ns *ns,
continue;
vxl = &zif->l2info.vxl;
- if (vxl->vni != zl3vni->vni)
- continue;
-
- /* link of VXLAN interface should be in zebra_evpn_vrf */
- if (zvrf->zns->ns_id != vxl->link_nsid) {
- if (IS_ZEBRA_DEBUG_VXLAN)
- zlog_debug(
- "Intf %s(%u) VNI %u, link not in same namespace than BGP EVPN core instance ",
- ifp->name, ifp->ifindex, vxl->vni);
- continue;
+ if (vxl->vni == zl3vni->vni) {
+ zl3vni->local_vtep_ip = vxl->vtep_ip;
+ return ifp;
}
-
-
- zl3vni->local_vtep_ip = vxl->vtep_ip;
- if (_pifp)
- *_pifp = (void *)ifp;
- return NS_WALK_STOP;
}
- return NS_WALK_CONTINUE;
-}
-
-struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni)
-{
- struct interface **p_ifp;
- struct interface *ifp = NULL;
-
- p_ifp = &ifp;
-
- ns_walk_func(zl3vni_map_to_vxlan_if_ns,
- (void *)zl3vni, (void **)p_ifp);
- return ifp;
+ return NULL;
}
struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni)
@@ -5524,7 +5802,7 @@ static int zebra_vxlan_handle_vni_transition(struct zebra_vrf *zvrf, vni_t vni,
zlog_debug("Del L2-VNI %u - transition to L3-VNI", vni);
/* Delete VNI from BGP. */
- zvni_send_del_to_client(zvni->vni);
+ zvni_send_del_to_client(zvni);
/* Free up all neighbors and MAC, if any. */
zvni_neigh_del_all(zvni, 0, 0, DEL_ALL_NEIGH);
@@ -5632,6 +5910,1167 @@ static int zebra_vxlan_readd_remote_rmac(zebra_l3vni_t *zl3vni,
return 0;
}
+/**************************** SYNC MAC handling *****************************/
+/* if the mac has been added of a mac-route from the peer
+ * or if it is being referenced by a neigh added by the
+ * peer we cannot let it age out i.e. we set the static bit
+ * in the dataplane
+ */
+static inline bool zebra_vxlan_mac_is_static(zebra_mac_t *mac)
+{
+ return ((mac->flags & ZEBRA_MAC_ALL_PEER_FLAGS) ||
+ mac->sync_neigh_cnt);
+}
+
+/* mac needs to be locally active or active on an ES peer */
+static inline bool zebra_vxlan_mac_is_ready_for_bgp(uint32_t flags)
+{
+ return (flags & ZEBRA_MAC_LOCAL) &&
+ (!(flags & ZEBRA_MAC_LOCAL_INACTIVE) ||
+ (flags & ZEBRA_MAC_ES_PEER_ACTIVE));
+}
+
+/* program sync mac flags in the dataplane */
+void zebra_vxlan_sync_mac_dp_install(zebra_mac_t *mac, bool set_inactive,
+ bool force_clear_static, const char *caller)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ struct interface *ifp;
+ bool sticky;
+ bool set_static;
+ zebra_vni_t *zvni = mac->zvni;
+ vlanid_t vid;
+ struct zebra_if *zif;
+ struct interface *br_ifp;
+
+ /* get the access vlan from the vxlan_device */
+ zebra_vxlan_mac_get_access_info(mac,
+ &ifp, &vid);
+
+ if (!ifp) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("%s: dp-install sync-mac vni %u mac %s es %s 0x%x %sskipped, no access-port",
+ caller,
+ zvni->vni,
+ prefix_mac2str(&mac->macaddr, macbuf,
+ sizeof(macbuf)),
+ mac->es ?
+ mac->es->esi_str : "-",
+ mac->flags,
+ set_inactive ? "inactive " : "");
+ return;
+ }
+
+ zif = ifp->info;
+ br_ifp = zif->brslave_info.br_if;
+ if (!br_ifp) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("%s: dp-install sync-mac vni %u mac %s es %s 0x%x %sskipped, no br",
+ caller,
+ zvni->vni,
+ prefix_mac2str(&mac->macaddr, macbuf,
+ sizeof(macbuf)),
+ mac->es ?
+ mac->es->esi_str : "-",
+ mac->flags,
+ set_inactive ? "inactive " : "");
+ return;
+ }
+
+ sticky = !!CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY);
+ if (force_clear_static)
+ set_static = false;
+ else
+ set_static = zebra_vxlan_mac_is_static(mac);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("dp-install sync-mac vni %u mac %s es %s 0x%x %s%s",
+ zvni->vni,
+ prefix_mac2str(&mac->macaddr, macbuf,
+ sizeof(macbuf)),
+ mac->es ?
+ mac->es->esi_str : "-", mac->flags,
+ set_static ? "static " : "",
+ set_inactive ? "inactive " : "");
+
+ dplane_local_mac_add(ifp, br_ifp, vid, &mac->macaddr, sticky,
+ set_static, set_inactive);
+
+}
+
+static void zebra_vxlan_mac_send_add_del_to_client(zebra_mac_t *mac,
+ bool old_bgp_ready, bool new_bgp_ready)
+{
+ if (new_bgp_ready)
+ zvni_mac_send_add_to_client(mac->zvni->vni,
+ &mac->macaddr, mac->flags,
+ mac->loc_seq, mac->es);
+ else if (old_bgp_ready)
+ zvni_mac_send_del_to_client(mac->zvni->vni,
+ &mac->macaddr, mac->flags,
+ true /* force */);
+}
+
+/* MAC hold timer is used to age out peer-active flag.
+ *
+ * During this wait time we expect the dataplane component or an
+ * external neighmgr daemon to probe existing hosts to independently
+ * establish their presence on the ES.
+ */
+static int zebra_vxlan_mac_hold_exp_cb(struct thread *t)
+{
+ zebra_mac_t *mac;
+ bool old_bgp_ready;
+ bool new_bgp_ready;
+ bool old_static;
+ bool new_static;
+ char macbuf[ETHER_ADDR_STRLEN];
+
+ mac = THREAD_ARG(t);
+ /* the purpose of the hold timer is to age out the peer-active
+ * flag
+ */
+ if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE))
+ return 0;
+
+ old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags);
+ old_static = zebra_vxlan_mac_is_static(mac);
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE);
+ new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags);
+ new_static = zebra_vxlan_mac_is_static(mac);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-mac vni %u mac %s es %s 0x%x hold expired",
+ mac->zvni->vni,
+ prefix_mac2str(&mac->macaddr, macbuf,
+ sizeof(macbuf)),
+ mac->es ?
+ mac->es->esi_str : "-",
+ mac->flags);
+
+ /* re-program the local mac in the dataplane if the mac is no
+ * longer static
+ */
+ if (old_static != new_static)
+ zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */,
+ false /* force_clear_static */, __func__);
+
+ /* inform bgp if needed */
+ if (old_bgp_ready != new_bgp_ready)
+ zebra_vxlan_mac_send_add_del_to_client(mac,
+ old_bgp_ready, new_bgp_ready);
+
+ return 0;
+}
+
+static inline void zebra_vxlan_mac_start_hold_timer(zebra_mac_t *mac)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+
+ if (mac->hold_timer)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-mac vni %u mac %s es %s 0x%x hold started",
+ mac->zvni->vni,
+ prefix_mac2str(&mac->macaddr, macbuf,
+ sizeof(macbuf)),
+ mac->es ?
+ mac->es->esi_str : "-",
+ mac->flags);
+ thread_add_timer(zrouter.master,
+ zebra_vxlan_mac_hold_exp_cb,
+ mac, zmh_info->mac_hold_time,
+ &mac->hold_timer);
+}
+
+static inline void zebra_vxlan_mac_stop_hold_timer(zebra_mac_t *mac)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+
+ if (!mac->hold_timer)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-mac vni %u mac %s es %s 0x%x hold stopped",
+ mac->zvni->vni,
+ prefix_mac2str(&mac->macaddr, macbuf,
+ sizeof(macbuf)),
+ mac->es ?
+ mac->es->esi_str : "-",
+ mac->flags);
+ THREAD_OFF(mac->hold_timer);
+}
+
+static inline void zebra_vxlan_mac_clear_sync_info(zebra_mac_t *mac)
+{
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_PEER_FLAGS);
+ zebra_vxlan_mac_stop_hold_timer(mac);
+}
+
+static void zebra_vxlan_sync_mac_del(zebra_mac_t *mac)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ bool old_static;
+ bool new_static;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-mac del vni %u mac %s es %s seq %d f 0x%x",
+ mac->zvni->vni,
+ prefix_mac2str(&mac->macaddr,
+ macbuf, sizeof(macbuf)),
+ mac->es ? mac->es->esi_str : "-",
+ mac->loc_seq,
+ mac->flags);
+ old_static = zebra_vxlan_mac_is_static(mac);
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY);
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE))
+ zebra_vxlan_mac_start_hold_timer(mac);
+ new_static = zebra_vxlan_mac_is_static(mac);
+
+ if (old_static != new_static)
+ /* program the local mac in the kernel */
+ zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */,
+ false /* force_clear_static */, __func__);
+}
+
+static inline bool zebra_vxlan_mac_is_bgp_seq_ok(zebra_vni_t *zvni,
+ zebra_mac_t *mac, uint32_t seq, uint16_t ipa_len,
+ struct ipaddr *ipaddr)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ uint32_t tmp_seq;
+
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL))
+ tmp_seq = mac->loc_seq;
+ else
+ tmp_seq = mac->rem_seq;
+
+ if (seq < tmp_seq) {
+ /* if the mac was never advertised to bgp we must accept
+ * whatever sequence number bgp sends
+ * XXX - check with Vivek
+ */
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) &&
+ !zebra_vxlan_mac_is_ready_for_bgp(mac->flags)) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-macip accept vni %u mac %s%s%s lower seq %u f 0x%x",
+ zvni->vni,
+ prefix_mac2str(&mac->macaddr,
+ macbuf, sizeof(macbuf)),
+ ipa_len ? " IP " : "",
+ ipa_len ?
+ ipaddr2str(ipaddr,
+ ipbuf, sizeof(ipbuf)) : "",
+ tmp_seq, mac->flags);
+ return true;
+ }
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-macip ignore vni %u mac %s%s%s as existing has higher seq %u f 0x%x",
+ zvni->vni,
+ prefix_mac2str(&mac->macaddr,
+ macbuf, sizeof(macbuf)),
+ ipa_len ? " IP " : "",
+ ipa_len ?
+ ipaddr2str(ipaddr,
+ ipbuf, sizeof(ipbuf)) : "",
+ tmp_seq, mac->flags);
+ return false;
+ }
+
+ return true;
+}
+
+/* sync-path that is active on an ES peer */
+static zebra_mac_t *zebra_vxlan_proc_sync_mac_update(zebra_vni_t *zvni,
+ struct ethaddr *macaddr, uint16_t ipa_len,
+ struct ipaddr *ipaddr, uint8_t flags,
+ uint32_t seq, esi_t *esi,
+ struct sync_mac_ip_ctx *ctx)
+{
+ zebra_mac_t *mac;
+ bool inform_bgp = false;
+ bool inform_dataplane = false;
+ bool seq_change = false;
+ bool es_change = false;
+ uint32_t tmp_seq;
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ bool old_local = false;
+ bool old_bgp_ready;
+ bool new_bgp_ready;
+
+ mac = zvni_mac_lookup(zvni, macaddr);
+ if (!mac) {
+ /* if it is a new local path we need to inform both
+ * the control protocol and the data-plane
+ */
+ inform_bgp = true;
+ inform_dataplane = true;
+ ctx->mac_created = true;
+ ctx->mac_inactive = true;
+
+ /* create the MAC and associate it with the dest ES */
+ mac = zvni_mac_add(zvni, macaddr);
+ zebra_evpn_es_mac_ref(mac, esi);
+
+ /* local mac activated by an ES peer */
+ SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL);
+ /* if mac-only route setup peer flags */
+ if (!ipa_len) {
+ if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT))
+ SET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_PROXY);
+ else
+ SET_FLAG(mac->flags, ZEBRA_MAC_ES_PEER_ACTIVE);
+ }
+ SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE);
+ old_bgp_ready = false;
+ new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags);
+ } else {
+ uint32_t old_flags;
+ uint32_t new_flags;
+ bool old_static;
+ bool new_static;
+ bool sticky;
+ bool remote_gw;
+
+ old_flags = mac->flags;
+ sticky = !!CHECK_FLAG(old_flags, ZEBRA_MAC_STICKY);
+ remote_gw = !!CHECK_FLAG(old_flags, ZEBRA_MAC_REMOTE_DEF_GW);
+ if (sticky || remote_gw) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("Ignore sync-macip vni %u mac %s%s%s%s%s",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ macbuf, sizeof(macbuf)),
+ ipa_len ? " IP " : "",
+ ipa_len ?
+ ipaddr2str(ipaddr, ipbuf,
+ sizeof(ipbuf)) : "",
+ sticky ? " sticky" : "",
+ remote_gw ? " remote_gw" : "");
+ ctx->ignore_macip = true;
+ return NULL;
+ }
+ if (!zebra_vxlan_mac_is_bgp_seq_ok(zvni, mac, seq,
+ ipa_len, ipaddr)) {
+ ctx->ignore_macip = true;
+ return NULL;
+ }
+
+ old_local = !!CHECK_FLAG(old_flags, ZEBRA_MAC_LOCAL);
+ old_static = zebra_vxlan_mac_is_static(mac);
+
+ /* re-build the mac flags */
+ new_flags = 0;
+ SET_FLAG(new_flags, ZEBRA_MAC_LOCAL);
+ /* retain old local activity flag */
+ if (old_flags & ZEBRA_MAC_LOCAL) {
+ new_flags |= (old_flags & ZEBRA_MAC_LOCAL_INACTIVE);
+ } else {
+ new_flags |= ZEBRA_MAC_LOCAL_INACTIVE;
+ ctx->mac_inactive = true;
+ }
+ if (ipa_len) {
+ /* if mac-ip route do NOT update the peer flags
+ * i.e. retain only flags as is
+ */
+ new_flags |= (old_flags & ZEBRA_MAC_ALL_PEER_FLAGS);
+ } else {
+ /* if mac-only route update peer flags */
+ if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) {
+ SET_FLAG(new_flags, ZEBRA_MAC_ES_PEER_PROXY);
+ /* if the mac was peer-active previously we
+ * need to keep the flag and start the
+ * holdtimer on it. the peer-active flag is
+ * cleared on holdtimer expiry.
+ */
+ if (CHECK_FLAG(old_flags,
+ ZEBRA_MAC_ES_PEER_ACTIVE)) {
+ SET_FLAG(new_flags,
+ ZEBRA_MAC_ES_PEER_ACTIVE);
+ zebra_vxlan_mac_start_hold_timer(mac);
+ }
+ } else {
+ SET_FLAG(new_flags, ZEBRA_MAC_ES_PEER_ACTIVE);
+ /* stop hold timer if a peer has verified
+ * reachability
+ */
+ zebra_vxlan_mac_stop_hold_timer(mac);
+ }
+ }
+ mac->rem_seq = 0;
+ memset(&mac->fwd_info, 0, sizeof(mac->fwd_info));
+ mac->flags = new_flags;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC &&
+ (old_flags != new_flags))
+ zlog_debug("sync-mac vni %u mac %s old_f 0x%x new_f 0x%x",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ macbuf, sizeof(macbuf)),
+ old_flags, mac->flags);
+
+ /* update es */
+ es_change = zebra_evpn_es_mac_ref(mac, esi);
+ /* if mac dest change - inform both sides */
+ if (es_change) {
+ inform_bgp = true;
+ inform_dataplane = true;
+ ctx->mac_inactive = true;
+ }
+ /* if peer-flag is being set notify dataplane that the
+ * entry must not be expired because of local inactivity
+ */
+ new_static = zebra_vxlan_mac_is_static(mac);
+ if (old_static != new_static)
+ inform_dataplane = true;
+
+ old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(old_flags);
+ new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags);
+ if (old_bgp_ready != new_bgp_ready)
+ inform_bgp = true;
+ }
+
+
+ /* update sequence number; if that results in a new local sequence
+ * inform bgp
+ */
+ tmp_seq = MAX(mac->loc_seq, seq);
+ if (tmp_seq != mac->loc_seq) {
+ mac->loc_seq = tmp_seq;
+ seq_change = true;
+ inform_bgp = true;
+ }
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-mac %s vni %u mac %s es %s seq %d f 0x%x%s%s",
+ ctx->mac_created ?
+ "created" : "updated",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ macbuf, sizeof(macbuf)),
+ mac->es ? mac->es->esi_str : "-",
+ mac->loc_seq, mac->flags,
+ inform_bgp ? " inform_bgp" : "",
+ inform_dataplane ? " inform_dp" : "");
+
+ if (inform_bgp)
+ zebra_vxlan_mac_send_add_del_to_client(mac,
+ old_bgp_ready, new_bgp_ready);
+
+ /* neighs using the mac may need to be re-sent to
+ * bgp with updated info
+ */
+ if (seq_change || es_change || !old_local)
+ zvni_process_neigh_on_local_mac_change(zvni, mac,
+ seq_change, es_change);
+
+ if (inform_dataplane) {
+ if (ipa_len)
+ /* if the mac is being created as a part of MAC-IP
+ * route wait for the neigh to be updated or
+ * created before programming the mac
+ */
+ ctx->mac_dp_update_deferred = true;
+ else
+ /* program the local mac in the kernel. when the ES
+ * change we need to force the dataplane to reset
+ * the activity as we are yet to establish activity
+ * locally
+ */
+ zebra_vxlan_sync_mac_dp_install(mac,
+ ctx->mac_inactive,
+ false /* force_clear_static */,
+ __func__);
+ }
+
+ return mac;
+}
+
+/**************************** SYNC neigh handling **************************/
+static inline bool zebra_vxlan_neigh_is_static(zebra_neigh_t *neigh)
+{
+ return !!(neigh->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS);
+}
+
+static inline bool zebra_vxlan_neigh_is_ready_for_bgp(zebra_neigh_t *n)
+{
+ bool mac_ready;
+ bool neigh_ready;
+
+ mac_ready = !!(n->mac->flags & ZEBRA_MAC_LOCAL);
+ neigh_ready = ((n->flags & ZEBRA_NEIGH_LOCAL) &&
+ IS_ZEBRA_NEIGH_ACTIVE(n) &&
+ (!(n->flags & ZEBRA_NEIGH_LOCAL_INACTIVE) ||
+ (n->flags & ZEBRA_NEIGH_ES_PEER_ACTIVE))) ?
+ true : false;
+
+ return mac_ready && neigh_ready;
+}
+
+static void zebra_vxlan_sync_neigh_dp_install(zebra_neigh_t *n,
+ bool set_inactive, bool force_clear_static, const char *caller)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ struct zebra_ns *zns;
+ struct interface *ifp;
+ bool set_static;
+ bool set_router;
+
+ zns = zebra_ns_lookup(NS_DEFAULT);
+ ifp = if_lookup_by_index_per_ns(zns, n->ifindex);
+ if (!ifp) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("%s: dp-install sync-neigh vni %u ip %s mac %s if %d f 0x%x skipped",
+ caller, n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ n->ifindex, n->flags);
+ return;
+ }
+
+ if (force_clear_static)
+ set_static = false;
+ else
+ set_static = zebra_vxlan_neigh_is_static(n);
+
+ set_router = !!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG);
+
+ /* XXX - this will change post integration with the new kernel */
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE))
+ set_inactive = true;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("%s: dp-install sync-neigh vni %u ip %s mac %s if %s(%d) f 0x%x%s%s%s",
+ caller, n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ ifp->name, n->ifindex, n->flags,
+ set_router ? " router":"",
+ set_static ? " static":"",
+ set_inactive ? " inactive":"");
+ dplane_local_neigh_add(ifp, &n->ip,
+ &n->emac, set_router, set_static, set_inactive);
+}
+
+static void zebra_vxlan_neigh_send_add_del_to_client(zebra_neigh_t *n,
+ bool old_bgp_ready, bool new_bgp_ready)
+{
+ if (new_bgp_ready)
+ zvni_neigh_send_add_to_client(n->zvni->vni, &n->ip,
+ &n->emac, n->mac, n->flags, n->loc_seq);
+ else if (old_bgp_ready)
+ zvni_neigh_send_del_to_client(n->zvni->vni, &n->ip,
+ &n->emac, n->flags, n->state, true /*force*/);
+}
+
+/* if the static flag associated with the neigh changes we need
+ * to update the sync-neigh references against the MAC
+ * and inform the dataplane about the static flag changes.
+ */
+static void zebra_vxlan_sync_neigh_static_chg(zebra_neigh_t *n,
+ bool old_n_static, bool new_n_static,
+ bool defer_n_dp, bool defer_mac_dp,
+ const char *caller)
+{
+ zebra_mac_t *mac = n->mac;
+ bool old_mac_static;
+ bool new_mac_static;
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+
+ if (old_n_static == new_n_static)
+ return;
+
+ /* update the neigh sync references in the dataplane. if
+ * the neigh is in the middle of updates the caller can
+ * request for a defer
+ */
+ if (!defer_n_dp)
+ zebra_vxlan_sync_neigh_dp_install(n, false /* set_inactive */,
+ false /* force_clear_static */, __func__);
+
+ if (!mac)
+ return;
+
+ /* update the mac sync ref cnt */
+ old_mac_static = zebra_vxlan_mac_is_static(mac);
+ if (new_n_static) {
+ ++mac->sync_neigh_cnt;
+ } else if (old_n_static) {
+ if (mac->sync_neigh_cnt)
+ --mac->sync_neigh_cnt;
+ }
+ new_mac_static = zebra_vxlan_mac_is_static(mac);
+
+ /* update the mac sync references in the dataplane */
+ if ((old_mac_static != new_mac_static) && !defer_mac_dp)
+ zebra_vxlan_sync_mac_dp_install(mac,
+ false /* set_inactive */,
+ false /* force_clear_static */,
+ __func__);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh ref-chg vni %u ip %s mac %s f 0x%x %d%s%s%s%s by %s",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ n->flags, mac->sync_neigh_cnt,
+ old_n_static ? " old_n_static" : "",
+ new_n_static ? " new_n_static" : "",
+ old_mac_static ? " old_mac_static" : "",
+ new_mac_static ? " new_mac_static" : "",
+ caller);
+}
+
+/* Neigh hold timer is used to age out peer-active flag.
+ *
+ * During this wait time we expect the dataplane component or an
+ * external neighmgr daemon to probe existing hosts to independently
+ * establish their presence on the ES.
+ */
+static int zebra_vxlan_neigh_hold_exp_cb(struct thread *t)
+{
+ zebra_neigh_t *n;
+ bool old_bgp_ready;
+ bool new_bgp_ready;
+ bool old_n_static;
+ bool new_n_static;
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+
+ n = THREAD_ARG(t);
+ /* the purpose of the hold timer is to age out the peer-active
+ * flag
+ */
+ if (!CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE))
+ return 0;
+
+ old_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n);
+ old_n_static = zebra_vxlan_neigh_is_static(n);
+ UNSET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE);
+ new_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n);
+ new_n_static = zebra_vxlan_neigh_is_static(n);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x hold expired",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ n->flags);
+
+ /* re-program the local neigh in the dataplane if the neigh is no
+ * longer static
+ */
+ if (old_n_static != new_n_static)
+ zebra_vxlan_sync_neigh_static_chg(n, old_n_static,
+ new_n_static, false /*defer_n_dp*/,
+ false /*defer_mac_dp*/, __func__);
+
+ /* inform bgp if needed */
+ if (old_bgp_ready != new_bgp_ready)
+ zebra_vxlan_neigh_send_add_del_to_client(n,
+ old_bgp_ready, new_bgp_ready);
+
+ return 0;
+}
+
+static inline void zebra_vxlan_neigh_start_hold_timer(zebra_neigh_t *n)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+
+ if (n->hold_timer)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x hold start",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ n->flags);
+ thread_add_timer(zrouter.master,
+ zebra_vxlan_neigh_hold_exp_cb,
+ n, zmh_info->neigh_hold_time,
+ &n->hold_timer);
+}
+
+static inline void zebra_vxlan_neigh_stop_hold_timer(zebra_neigh_t *n)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+
+ if (!n->hold_timer)
+ return;
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x hold stop",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ n->flags);
+ THREAD_OFF(n->hold_timer);
+}
+
+static inline bool zebra_vxlan_neigh_clear_sync_info(zebra_neigh_t *n)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ bool old_n_static = false;
+ bool new_n_static = false;
+
+ if (n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh vni %u ip %s mac %s 0x%x clear",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ n->flags);
+
+ old_n_static = zebra_vxlan_neigh_is_static(n);
+ UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_PEER_FLAGS);
+ new_n_static = zebra_vxlan_neigh_is_static(n);
+ if (old_n_static != new_n_static)
+ zebra_vxlan_sync_neigh_static_chg(n, old_n_static,
+ new_n_static, true /*defer_dp)*/,
+ false/*defer_mac_dp*/, __func__);
+ }
+ zebra_vxlan_neigh_stop_hold_timer(n);
+
+ /* if the neigh static flag changed inform that a dp
+ * re-install maybe needed
+ */
+ return old_n_static != new_n_static;
+}
+
+static void zebra_vxlan_local_neigh_deref_mac(zebra_neigh_t *n,
+ bool send_mac_update)
+{
+ zebra_mac_t *mac = n->mac;
+ zebra_vni_t *zvni = n->zvni;
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ bool old_static;
+ bool new_static;
+
+ n->mac = NULL;
+ if (!mac)
+ return;
+
+ if ((n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) &&
+ mac->sync_neigh_cnt){
+ old_static = zebra_vxlan_mac_is_static(mac);
+ --mac->sync_neigh_cnt;
+ new_static = zebra_vxlan_mac_is_static(mac);
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh deref mac vni %u ip %s mac %s ref %d",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf,
+ sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ mac->sync_neigh_cnt);
+ if ((old_static != new_static) && send_mac_update)
+ /* program the local mac in the kernel */
+ zebra_vxlan_sync_mac_dp_install(mac,
+ false /* set_inactive */,
+ false /* force_clear_static */,
+ __func__);
+ }
+
+ listnode_delete(mac->neigh_list, n);
+ zvni_deref_ip2mac(zvni, mac);
+}
+
+static void zebra_vxlan_local_neigh_ref_mac(zebra_neigh_t *n,
+ struct ethaddr *macaddr, zebra_mac_t *mac,
+ bool send_mac_update)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ bool old_static;
+ bool new_static;
+
+ memcpy(&n->emac, macaddr, ETH_ALEN);
+ n->mac = mac;
+
+ /* Link to new MAC */
+ if (!mac)
+ return;
+
+ listnode_add_sort(mac->neigh_list, n);
+ if (n->flags & ZEBRA_NEIGH_ALL_PEER_FLAGS) {
+ old_static = zebra_vxlan_mac_is_static(mac);
+ ++mac->sync_neigh_cnt;
+ new_static = zebra_vxlan_mac_is_static(mac);
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh ref mac vni %u ip %s mac %s ref %d",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf,
+ sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ mac->sync_neigh_cnt);
+ if ((old_static != new_static) && send_mac_update)
+ /* program the local mac in the kernel */
+ zebra_vxlan_sync_mac_dp_install(mac,
+ false /*set_inactive*/,
+ false /*force_clear_static*/,
+ __func__);
+ }
+}
+
+static inline bool zebra_vxlan_neigh_is_bgp_seq_ok(zebra_vni_t *zvni,
+ zebra_neigh_t *n, struct ethaddr *macaddr, uint32_t seq)
+{
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ uint32_t tmp_seq;
+
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL))
+ tmp_seq = n->loc_seq;
+ else
+ tmp_seq = n->rem_seq;
+
+ if (seq < tmp_seq) {
+ /* if the neigh was never advertised to bgp we must accept
+ * whatever sequence number bgp sends
+ * XXX - check with Vivek
+ */
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL) &&
+ !zebra_vxlan_neigh_is_ready_for_bgp(n)) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-macip accept vni %u mac %s IP %s lower seq %u f 0x%x",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ macbuf, sizeof(macbuf)),
+ ipaddr2str(&n->ip,
+ ipbuf, sizeof(ipbuf)),
+ tmp_seq, n->flags);
+ return true;
+ }
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-macip ignore vni %u mac %s IP %s as existing has higher seq %u f 0x%x",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ macbuf, sizeof(macbuf)),
+ ipaddr2str(&n->ip,
+ ipbuf, sizeof(ipbuf)),
+ tmp_seq, n->flags);
+ return false;
+ }
+
+ return true;
+}
+
+static void zebra_vxlan_sync_neigh_del(zebra_neigh_t *n)
+{
+ bool old_n_static;
+ bool new_n_static;
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh del vni %u ip %s mac %s f 0x%x",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ n->flags);
+
+ old_n_static = zebra_vxlan_neigh_is_static(n);
+ UNSET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY);
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE))
+ zebra_vxlan_neigh_start_hold_timer(n);
+ new_n_static = zebra_vxlan_neigh_is_static(n);
+
+ if (old_n_static != new_n_static)
+ zebra_vxlan_sync_neigh_static_chg(n, old_n_static,
+ new_n_static, false /*defer-dp*/,
+ false /*defer_mac_dp*/, __func__);
+}
+
+static zebra_neigh_t *zebra_vxlan_proc_sync_neigh_update(zebra_vni_t *zvni,
+ zebra_neigh_t *n, uint16_t ipa_len,
+ struct ipaddr *ipaddr, uint8_t flags, uint32_t seq,
+ esi_t *esi, struct sync_mac_ip_ctx *ctx)
+{
+ struct interface *ifp = NULL;
+ bool is_router;
+ zebra_mac_t *mac = ctx->mac;
+ uint32_t tmp_seq;
+ bool old_router = false;
+ bool old_bgp_ready = false;
+ bool new_bgp_ready;
+ bool inform_dataplane = false;
+ bool inform_bgp = false;
+ bool old_mac_static;
+ bool new_mac_static;
+ bool set_dp_inactive = false;
+ struct zebra_if *zif;
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ bool created;
+ ifindex_t ifindex = 0;
+
+ /* locate l3-svi */
+ zif = zvni->vxlan_if->info;
+ if (zif) {
+ struct zebra_l2info_vxlan *vxl;
+
+ vxl = &zif->l2info.vxl;
+ ifp = zvni_map_to_svi(vxl->access_vlan,
+ zif->brslave_info.br_if);
+ if (ifp)
+ ifindex = ifp->ifindex;
+ }
+
+ is_router = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG);
+ old_mac_static = zebra_vxlan_mac_is_static(mac);
+
+ if (!n) {
+ uint32_t n_flags = 0;
+
+ /* New neighbor - create */
+ SET_FLAG(n_flags, ZEBRA_NEIGH_LOCAL);
+ if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT))
+ SET_FLAG(n_flags, ZEBRA_NEIGH_ES_PEER_PROXY);
+ else
+ SET_FLAG(n_flags, ZEBRA_NEIGH_ES_PEER_ACTIVE);
+ SET_FLAG(n_flags, ZEBRA_NEIGH_LOCAL_INACTIVE);
+
+ n = zvni_neigh_add(zvni, ipaddr, &mac->macaddr, mac,
+ n_flags);
+ n->ifindex = ifindex;
+ ZEBRA_NEIGH_SET_ACTIVE(n);
+
+ created = true;
+ inform_dataplane = true;
+ inform_bgp = true;
+ set_dp_inactive = true;
+ } else {
+ bool mac_change;
+ uint32_t old_flags = n->flags;
+ bool old_n_static;
+ bool new_n_static;
+
+ created = false;
+ old_n_static = zebra_vxlan_neigh_is_static(n);
+ old_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n);
+ old_router = !!CHECK_FLAG(n->flags,
+ ZEBRA_NEIGH_ROUTER_FLAG);
+
+ mac_change = !!memcmp(&n->emac, &mac->macaddr, ETH_ALEN);
+
+ /* deref and clear old info */
+ if (mac_change) {
+ if (old_bgp_ready) {
+ zvni_neigh_send_del_to_client(zvni->vni, &n->ip,
+ &n->emac, n->flags, n->state,
+ false /*force*/);
+ old_bgp_ready = false;
+ }
+ if (n->mac)
+ zebra_vxlan_local_neigh_deref_mac(n,
+ false /*send_mac_update*/);
+ }
+ /* clear old fwd info */
+ n->rem_seq = 0;
+ n->r_vtep_ip.s_addr = 0;
+
+ /* setup new flags */
+ n->flags = 0;
+ SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL);
+ /* retain activity flag if the neigh was
+ * previously local
+ */
+ if (old_flags & ZEBRA_NEIGH_LOCAL) {
+ n->flags |= (old_flags & ZEBRA_NEIGH_LOCAL_INACTIVE);
+ } else {
+ inform_dataplane = true;
+ set_dp_inactive = true;
+ n->flags |= ZEBRA_NEIGH_LOCAL_INACTIVE;
+ }
+
+ if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT))
+ SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY);
+ else
+ SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE);
+
+ if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_PROXY_ADVERT)) {
+ SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_PROXY);
+ /* if the neigh was peer-active previously we
+ * need to keep the flag and start the
+ * holdtimer on it. the peer-active flag is
+ * cleared on holdtimer expiry.
+ */
+ if (CHECK_FLAG(old_flags,
+ ZEBRA_NEIGH_ES_PEER_ACTIVE)) {
+ SET_FLAG(n->flags,
+ ZEBRA_NEIGH_ES_PEER_ACTIVE);
+ zebra_vxlan_neigh_start_hold_timer(n);
+ }
+ } else {
+ SET_FLAG(n->flags, ZEBRA_NEIGH_ES_PEER_ACTIVE);
+ /* stop hold timer if a peer has verified
+ * reachability
+ */
+ zebra_vxlan_neigh_stop_hold_timer(n);
+ }
+ ZEBRA_NEIGH_SET_ACTIVE(n);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH &&
+ (old_flags != n->flags))
+ zlog_debug("sync-neigh vni %u ip %s mac %s old_f 0x%x new_f 0x%x",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ old_flags, n->flags);
+
+ new_n_static = zebra_vxlan_neigh_is_static(n);
+ if (mac_change) {
+ set_dp_inactive = true;
+ n->flags |= ZEBRA_NEIGH_LOCAL_INACTIVE;
+ inform_dataplane = true;
+ zebra_vxlan_local_neigh_ref_mac(n, &mac->macaddr,
+ mac, false /*send_mac_update*/);
+ } else if (old_n_static != new_n_static) {
+ inform_dataplane = true;
+ /* if static flags have changed without a mac change
+ * we need to create the correct sync-refs against
+ * the existing mac
+ */
+ zebra_vxlan_sync_neigh_static_chg(n,
+ old_n_static, new_n_static,
+ true /*defer_dp*/, true /*defer_mac_dp*/,
+ __func__);
+ }
+
+ /* Update the forwarding info. */
+ if (n->ifindex != ifindex) {
+ n->ifindex = ifindex;
+ inform_dataplane = true;
+ }
+ }
+
+ /* update the neigh seq. we don't bother with the mac seq as
+ * sync_mac_update already took care of that
+ */
+ tmp_seq = MAX(n->loc_seq, seq);
+ if (tmp_seq != n->loc_seq) {
+ n->loc_seq = tmp_seq;
+ inform_bgp = true;
+ }
+
+ /* Mark Router flag (R-bit) */
+ if (is_router)
+ SET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG);
+ else
+ UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG);
+
+ if (old_router != is_router)
+ inform_dataplane = true;
+
+ new_bgp_ready = zebra_vxlan_neigh_is_ready_for_bgp(n);
+ if (old_bgp_ready != new_bgp_ready)
+ inform_bgp = true;
+
+ new_mac_static = zebra_vxlan_mac_is_static(mac);
+ if ((old_mac_static != new_mac_static) ||
+ ctx->mac_dp_update_deferred)
+ zebra_vxlan_sync_mac_dp_install(mac,
+ ctx->mac_inactive,
+ false /* force_clear_static */,
+ __func__);
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync-neigh %s vni %u ip %s mac %s if %s(%d) seq %d f 0x%x%s%s",
+ created ?
+ "created" : "updated",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)),
+ prefix_mac2str(&n->emac, macbuf,
+ sizeof(macbuf)),
+ ifp ? ifp->name : "", ifindex,
+ n->loc_seq, n->flags,
+ inform_bgp ? " inform_bgp" : "",
+ inform_dataplane ? " inform_dp" : "");
+
+ if (inform_dataplane)
+ zebra_vxlan_sync_neigh_dp_install(n, set_dp_inactive,
+ false /* force_clear_static */, __func__);
+
+ if (inform_bgp)
+ zebra_vxlan_neigh_send_add_del_to_client(n,
+ old_bgp_ready, new_bgp_ready);
+
+ return n;
+}
+
+static void zebra_vxlan_process_sync_macip_add(zebra_vni_t *zvni,
+ struct ethaddr *macaddr,
+ uint16_t ipa_len,
+ struct ipaddr *ipaddr,
+ uint8_t flags,
+ uint32_t seq,
+ esi_t *esi)
+{
+ struct sync_mac_ip_ctx ctx;
+ char macbuf[ETHER_ADDR_STRLEN];
+ char ipbuf[INET6_ADDRSTRLEN];
+ bool sticky;
+ bool remote_gw;
+ zebra_neigh_t *n = NULL;
+
+ sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY);
+ remote_gw = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW);
+ /* if sticky or remote-gw ignore updates from the peer */
+ if (sticky || remote_gw) {
+ if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_NEIGH ||
+ IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("Ignore sync-macip vni %u mac %s%s%s%s%s",
+ zvni->vni,
+ prefix_mac2str(macaddr, macbuf, sizeof(macbuf)),
+ ipa_len ? " IP " : "",
+ ipa_len ?
+ ipaddr2str(ipaddr, ipbuf, sizeof(ipbuf)) : "",
+ sticky ? " sticky" : "",
+ remote_gw ? " remote_gw" : "");
+ return;
+ }
+
+ if (ipa_len) {
+ n = zvni_neigh_lookup(zvni, ipaddr);
+ if (n &&
+ !zebra_vxlan_neigh_is_bgp_seq_ok(zvni,
+ n, macaddr, seq))
+ return;
+ }
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.mac = zebra_vxlan_proc_sync_mac_update(zvni, macaddr, ipa_len,
+ ipaddr, flags, seq, esi, &ctx);
+ if (ctx.ignore_macip || !ctx.mac || !ipa_len)
+ return;
+
+ zebra_vxlan_proc_sync_neigh_update(zvni, n, ipa_len,
+ ipaddr, flags, seq, esi, &ctx);
+}
+
+/************************** remote mac-ip handling **************************/
/* Process a remote MACIP add from BGP. */
static void process_remote_macip_add(vni_t vni,
struct ethaddr *macaddr,
@@ -5639,7 +7078,8 @@ static void process_remote_macip_add(vni_t vni,
struct ipaddr *ipaddr,
uint8_t flags,
uint32_t seq,
- struct in_addr vtep_ip)
+ struct in_addr vtep_ip,
+ esi_t *esi)
{
zebra_vni_t *zvni;
zebra_vtep_t *zvtep;
@@ -5657,6 +7097,8 @@ static void process_remote_macip_add(vni_t vni,
bool is_router;
bool do_dad = false;
bool is_dup_detect = false;
+ esi_t *old_esi;
+ bool old_static = false;
/* Locate VNI hash entry - expected to exist. */
zvni = zvni_lookup(vni);
@@ -5677,22 +7119,36 @@ static void process_remote_macip_add(vni_t vni,
return;
}
+ /* Type-2 routes from another PE can be interpreted as remote or
+ * SYNC based on the destination ES -
+ * SYNC - if ES is local
+ * REMOTE - if ES is not local
+ */
+ if (flags & ZEBRA_MACIP_TYPE_SYNC_PATH) {
+ zebra_vxlan_process_sync_macip_add(zvni, macaddr, ipa_len,
+ ipaddr, flags, seq, esi);
+ return;
+ }
+
/* The remote VTEP specified should normally exist, but it is
* possible that when peering comes up, peer may advertise MACIP
* routes before advertising type-3 routes.
*/
- zvtep = zvni_vtep_find(zvni, &vtep_ip);
- if (!zvtep) {
- zvtep = zvni_vtep_add(zvni, &vtep_ip, VXLAN_FLOOD_DISABLED);
+ if (vtep_ip.s_addr) {
+ zvtep = zvni_vtep_find(zvni, &vtep_ip);
if (!zvtep) {
- flog_err(
- EC_ZEBRA_VTEP_ADD_FAILED,
- "Failed to add remote VTEP, VNI %u zvni %p upon remote MACIP ADD",
- vni, zvni);
- return;
- }
+ zvtep = zvni_vtep_add(zvni, &vtep_ip,
+ VXLAN_FLOOD_DISABLED);
+ if (!zvtep) {
+ flog_err(
+ EC_ZEBRA_VTEP_ADD_FAILED,
+ "Failed to add remote VTEP, VNI %u zvni %p upon remote MACIP ADD",
+ vni, zvni);
+ return;
+ }
- zvni_vtep_install(zvni, zvtep);
+ zvni_vtep_install(zvni, zvtep);
+ }
}
sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY);
@@ -5715,10 +7171,12 @@ static void process_remote_macip_add(vni_t vni,
return;
}
- zvrf = zebra_vrf_get_evpn();
+ zvrf = vrf_info_lookup(zvni->vxlan_if->vrf_id);
if (!zvrf)
return;
+ old_esi = (mac && mac->es) ? &mac->es->esi : zero_esi;
+
/* check if the remote MAC is unknown or has a change.
* If so, that needs to be updated first. Note that client could
* install MAC and MACIP separately or just install the latter.
@@ -5728,6 +7186,7 @@ static void process_remote_macip_add(vni_t vni,
|| sticky != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY)
|| remote_gw != !!CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE_DEF_GW)
|| !IPV4_ADDR_SAME(&mac->fwd_info.r_vtep_ip, &vtep_ip)
+ || memcmp(old_esi, esi, sizeof(esi_t))
|| seq != mac->rem_seq)
update_mac = 1;
@@ -5743,10 +7202,14 @@ static void process_remote_macip_add(vni_t vni,
return;
}
+ zebra_evpn_es_mac_ref(mac, esi);
+
/* Is this MAC created for a MACIP? */
if (ipa_len)
SET_FLAG(mac->flags, ZEBRA_MAC_AUTO);
} else {
+ zebra_evpn_es_mac_ref(mac, esi);
+
/* When host moves but changes its (MAC,IP)
* binding, BGP may install a MACIP entry that
* corresponds to "older" location of the host
@@ -5793,11 +7256,25 @@ static void process_remote_macip_add(vni_t vni,
do_dad = true;
/* Remove local MAC from BGP. */
- if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL))
- zvni_mac_send_del_to_client(zvni->vni, macaddr);
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {
+ /* force drop the sync flags */
+ old_static = zebra_vxlan_mac_is_static(mac);
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("sync-mac->remote vni %u mac %s es %s seq %d f 0x%x",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ buf, sizeof(buf)),
+ mac->es ?
+ mac->es->esi_str : "-",
+ mac->loc_seq,
+ mac->flags);
+ zebra_vxlan_mac_clear_sync_info(mac);
+ zvni_mac_send_del_to_client(zvni->vni, macaddr,
+ mac->flags, false /* force */);
+ }
/* Set "auto" and "remote" forwarding info. */
- UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL);
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS);
memset(&mac->fwd_info, 0, sizeof(mac->fwd_info));
SET_FLAG(mac->flags, ZEBRA_MAC_REMOTE);
mac->fwd_info.r_vtep_ip = vtep_ip;
@@ -5820,7 +7297,7 @@ static void process_remote_macip_add(vni_t vni,
if (!is_dup_detect) {
zvni_process_neigh_on_remote_mac_add(zvni, mac);
/* Install the entry. */
- zvni_mac_install(zvni, mac);
+ zvni_rem_mac_install(zvni, mac, old_static);
}
}
@@ -5835,6 +7312,7 @@ static void process_remote_macip_add(vni_t vni,
/* Reset flag */
do_dad = false;
+ old_static = false;
/* Check if the remote neighbor itself is unknown or has a
* change. If so, create or update and then install the entry.
@@ -5850,7 +7328,7 @@ static void process_remote_macip_add(vni_t vni,
if (update_neigh) {
if (!n) {
- n = zvni_neigh_add(zvni, ipaddr, macaddr);
+ n = zvni_neigh_add(zvni, ipaddr, macaddr, mac, 0);
if (!n) {
zlog_warn(
"Failed to add Neigh %s MAC %s VNI %u Remote VTEP %s",
@@ -5892,22 +7370,31 @@ static void process_remote_macip_add(vni_t vni,
tmp_seq);
return;
}
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) {
+ old_static = zebra_vxlan_neigh_is_static(n);
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("sync->remote neigh vni %u ip %s mac %s seq %d f0x%x",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, buf1,
+ sizeof(buf1)),
+ prefix_mac2str(&n->emac, buf,
+ sizeof(buf)),
+ seq, n->flags);
+ zebra_vxlan_neigh_clear_sync_info(n);
+ if (IS_ZEBRA_NEIGH_ACTIVE(n))
+ zvni_mac_send_del_to_client(zvni->vni,
+ macaddr, mac->flags,
+ false /*force*/);
+ }
if (memcmp(&n->emac, macaddr, sizeof(*macaddr)) != 0) {
- /* MAC change, send a delete for old
- * neigh if learnt locally.
- */
- if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL) &&
- IS_ZEBRA_NEIGH_ACTIVE(n))
- zvni_neigh_send_del_to_client(
- zvni->vni, &n->ip,
- &n->emac, 0, n->state);
-
/* update neigh list for macs */
old_mac = zvni_mac_lookup(zvni, &n->emac);
if (old_mac) {
listnode_delete(old_mac->neigh_list, n);
+ n->mac = NULL;
zvni_deref_ip2mac(zvni, old_mac);
}
+ n->mac = mac;
listnode_add_sort(mac->neigh_list, n);
memcpy(&n->emac, macaddr, ETH_ALEN);
@@ -5933,7 +7420,7 @@ static void process_remote_macip_add(vni_t vni,
}
/* Set "remote" forwarding info. */
- UNSET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL);
+ UNSET_FLAG(n->flags, ZEBRA_NEIGH_ALL_LOCAL_FLAGS);
n->r_vtep_ip = vtep_ip;
SET_FLAG(n->flags, ZEBRA_NEIGH_REMOTE);
@@ -5963,7 +7450,7 @@ static void process_remote_macip_add(vni_t vni,
false);
/* Install the entry. */
if (!is_dup_detect)
- zvni_neigh_install(zvni, n);
+ zvni_rem_neigh_install(zvni, n, old_static);
}
zvni_probe_neigh_on_mac_add(zvni, mac);
@@ -5972,6 +7459,32 @@ static void process_remote_macip_add(vni_t vni,
n->rem_seq = seq;
}
+static void zebra_vxlan_rem_mac_del(zebra_vni_t *zvni,
+ zebra_mac_t *mac)
+{
+ zvni_process_neigh_on_remote_mac_del(zvni, mac);
+ /* the remote sequence number in the auto mac entry
+ * needs to be reset to 0 as the mac entry may have
+ * been removed on all VTEPs (including
+ * the originating one)
+ */
+ mac->rem_seq = 0;
+
+ /* If all remote neighbors referencing a remote MAC
+ * go away, we need to uninstall the MAC.
+ */
+ if (remote_neigh_count(mac) == 0) {
+ zvni_rem_mac_uninstall(zvni, mac);
+ zebra_evpn_es_mac_deref_entry(mac);
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE);
+ }
+
+ if (list_isempty(mac->neigh_list))
+ zvni_mac_del(zvni, mac);
+ else
+ SET_FLAG(mac->flags, ZEBRA_MAC_AUTO);
+}
+
/* Process a remote MACIP delete from BGP. */
static void process_remote_macip_del(vni_t vni,
struct ethaddr *macaddr,
@@ -6013,11 +7526,6 @@ static void process_remote_macip_del(vni_t vni,
zns = zebra_ns_lookup(NS_DEFAULT);
vxl = &zif->l2info.vxl;
- /* It is possible remote vtep del request is processed prior to
- * remote macip route delete. remote_vtep_del does not clean up
- * the macip route delete. Explicite withdraw of the macip route
- * is expected to recieve. This handler removes the remote route.
- */
mac = zvni_mac_lookup(zvni, macaddr);
if (ipa_len)
n = zvni_neigh_lookup(zvni, ipaddr);
@@ -6076,11 +7584,14 @@ static void process_remote_macip_del(vni_t vni,
* "old" neighbor (as these are two different MACIP routes).
* Do the delete only if the MAC matches.
*/
- if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)
- && (memcmp(n->emac.octet, macaddr->octet, ETH_ALEN) == 0)) {
- zvni_neigh_uninstall(zvni, n);
- zvni_neigh_del(zvni, n);
- zvni_deref_ip2mac(zvni, mac);
+ if (!memcmp(n->emac.octet, macaddr->octet, ETH_ALEN)) {
+ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) {
+ zebra_vxlan_sync_neigh_del(n);
+ } else if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) {
+ zvni_neigh_uninstall(zvni, n);
+ zvni_neigh_del(zvni, n);
+ zvni_deref_ip2mac(zvni, mac);
+ }
}
} else {
/* DAD: when MAC is freeze state as remote learn event,
@@ -6103,27 +7614,11 @@ static void process_remote_macip_del(vni_t vni,
macaddr, vxl->access_vlan);
}
- if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {
- zvni_process_neigh_on_remote_mac_del(zvni, mac);
- /*
- * the remote sequence number in the auto mac entry
- * needs to be reset to 0 as the mac entry may have
- * been removed on all VTEPs (including
- * the originating one)
- */
- mac->rem_seq = 0;
-
- /* If all remote neighbors referencing a remote MAC
- * go away, we need to uninstall the MAC.
- */
- if (remote_neigh_count(mac) == 0) {
- zvni_mac_uninstall(zvni, mac);
- UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE);
- }
- if (list_isempty(mac->neigh_list))
- zvni_mac_del(zvni, mac);
- else
- SET_FLAG(mac->flags, ZEBRA_MAC_AUTO);
+ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {
+ if (!ipa_len)
+ zebra_vxlan_sync_mac_del(mac);
+ } else if (CHECK_FLAG(mac->flags, ZEBRA_NEIGH_REMOTE)) {
+ zebra_vxlan_rem_mac_del(zvni, mac);
}
}
}
@@ -6559,8 +8054,7 @@ void zebra_vxlan_print_neigh_vni(struct vty *vty, struct zebra_vrf *zvrf,
vty_out(vty,
"Number of ARPs (local and remote) known for this VNI: %u\n",
num_neigh);
- vty_out(vty, "%*s %-6s %-8s %-17s %-21s %s\n", -wctx.addr_width,
- "IP", "Type", "State", "MAC", "Remote VTEP", "Seq #'s");
+ zvni_print_neigh_hdr(vty, &wctx);
} else
json_object_int_add(json, "numArpNd", num_neigh);
@@ -6770,9 +8264,9 @@ void zebra_vxlan_print_neigh_vni_dad(struct vty *vty,
vty_out(vty,
"Number of ARPs (local and remote) known for this VNI: %u\n",
num_neigh);
- vty_out(vty, "%*s %-6s %-8s %-17s %-21s\n",
+ vty_out(vty, "%*s %-6s %-8s %-17s %-30s\n",
-wctx.addr_width, "IP", "Type",
- "State", "MAC", "Remote VTEP");
+ "State", "MAC", "Remote ES/VTEP");
} else
json_object_int_add(json, "numArpNd", num_neigh);
@@ -6825,8 +8319,11 @@ void zebra_vxlan_print_macs_vni(struct vty *vty, struct zebra_vrf *zvrf,
vty_out(vty,
"Number of MACs (local and remote) known for this VNI: %u\n",
num_macs);
- vty_out(vty, "%-17s %-6s %-21s %-5s %s\n", "MAC", "Type",
- "Intf/Remote VTEP", "VLAN", "Seq #'s");
+ vty_out(vty,
+ "Flags: N=sync-neighs, I=local-inactive, P=peer-active, X=peer-proxy\n");
+ vty_out(vty, "%-17s %-6s %-5s %-30s %-5s %s\n", "MAC",
+ "Type", "Flags", "Intf/Remote ES/VTEP",
+ "VLAN", "Seq #'s");
} else
json_object_int_add(json, "numMacs", num_macs);
@@ -7018,8 +8515,8 @@ void zebra_vxlan_print_macs_vni_dad(struct vty *vty,
vty_out(vty,
"Number of MACs (local and remote) known for this VNI: %u\n",
num_macs);
- vty_out(vty, "%-17s %-6s %-21s %-5s\n", "MAC", "Type",
- "Intf/Remote VTEP", "VLAN");
+ vty_out(vty, "%-17s %-6s %-5s %-30s %-5s\n", "MAC", "Type",
+ "Flags", "Intf/Remote ES/VTEP", "VLAN");
} else
json_object_int_add(json, "numMacs", num_macs);
@@ -7076,7 +8573,8 @@ int zebra_vxlan_clear_dup_detect_vni_mac(struct zebra_vrf *zvrf, vni_t vni,
if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL))
ZEBRA_NEIGH_SET_INACTIVE(nbr);
else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE))
- zvni_neigh_install(zvni, nbr);
+ zvni_rem_neigh_install(zvni, nbr,
+ false /*was_static*/);
}
UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE);
@@ -7102,17 +8600,18 @@ int zebra_vxlan_clear_dup_detect_vni_mac(struct zebra_vrf *zvrf, vni_t vni,
if (zvni_mac_send_add_to_client(zvni->vni,
&mac->macaddr,
mac->flags,
- mac->loc_seq))
+ mac->loc_seq, mac->es))
return 0;
/* Process all neighbors associated with this MAC. */
- zvni_process_neigh_on_local_mac_change(zvni, mac, 0);
+ zvni_process_neigh_on_local_mac_change(zvni, mac, 0,
+ 0 /*es_change*/);
} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {
zvni_process_neigh_on_remote_mac_add(zvni, mac);
/* Install the entry. */
- zvni_mac_install(zvni, mac);
+ zvni_rem_mac_install(zvni, mac, false /* was_static */);
}
return 0;
@@ -7172,10 +8671,10 @@ int zebra_vxlan_clear_dup_detect_vni_ip(struct zebra_vrf *zvrf, vni_t vni,
if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) {
zvni_neigh_send_add_to_client(zvni->vni, ip,
- &nbr->emac,
+ &nbr->emac, nbr->mac,
nbr->flags, nbr->loc_seq);
} else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) {
- zvni_neigh_install(zvni, nbr);
+ zvni_rem_neigh_install(zvni, nbr, false /*was_static*/);
}
return 0;
@@ -7222,17 +8721,18 @@ static void zvni_clear_dup_mac_hash(struct hash_bucket *bucket, void *ctxt)
/* Inform to BGP */
if (zvni_mac_send_add_to_client(zvni->vni,
&mac->macaddr,
- mac->flags, mac->loc_seq))
+ mac->flags, mac->loc_seq, mac->es))
return;
/* Process all neighbors associated with this MAC. */
- zvni_process_neigh_on_local_mac_change(zvni, mac, 0);
+ zvni_process_neigh_on_local_mac_change(zvni, mac, 0,
+ 0 /*es_change*/);
} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {
zvni_process_neigh_on_remote_mac_add(zvni, mac);
/* Install the entry. */
- zvni_mac_install(zvni, mac);
+ zvni_rem_mac_install(zvni, mac, false /* was_static */);
}
}
@@ -7267,10 +8767,10 @@ static void zvni_clear_dup_neigh_hash(struct hash_bucket *bucket, void *ctxt)
if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) {
zvni_neigh_send_add_to_client(zvni->vni, &nbr->ip,
- &nbr->emac,
+ &nbr->emac, nbr->mac,
nbr->flags, nbr->loc_seq);
} else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) {
- zvni_neigh_install(zvni, nbr);
+ zvni_rem_neigh_install(zvni, nbr, false /*was_static*/);
}
}
@@ -7666,6 +9166,8 @@ int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp,
zebra_mac_t *zmac = NULL;
zebra_l3vni_t *zl3vni = NULL;
struct zebra_vrf *zvrf;
+ bool old_bgp_ready;
+ bool new_bgp_ready;
/* check if this is a remote neigh entry corresponding to remote
* next-hop
@@ -7720,7 +9222,36 @@ int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp,
* deleted it, it needs to be re-installed as Quagga is the owner.
*/
if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_REMOTE)) {
- zvni_neigh_install(zvni, n);
+ zvni_rem_neigh_install(zvni, n, false /*was_static*/);
+ return 0;
+ }
+
+ /* if this is a sync entry it cannot be dropped re-install it in
+ * the dataplane
+ */
+ old_bgp_ready =
+ zebra_vxlan_neigh_is_ready_for_bgp(n);
+ if (zebra_vxlan_neigh_is_static(n)) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
+ zlog_debug("re-add sync neigh vni %u ip %s mac %s 0x%x",
+ n->zvni->vni,
+ ipaddr2str(&n->ip, buf, sizeof(buf)),
+ prefix_mac2str(&n->emac, buf2,
+ sizeof(buf2)),
+ n->flags);
+
+ if (!CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE))
+ SET_FLAG(n->flags, ZEBRA_NEIGH_LOCAL_INACTIVE);
+ /* inform-bgp about change in local-activity if any */
+ new_bgp_ready =
+ zebra_vxlan_neigh_is_ready_for_bgp(n);
+ zebra_vxlan_neigh_send_add_del_to_client(n,
+ old_bgp_ready, new_bgp_ready);
+
+ /* re-install the entry in the kernel */
+ zebra_vxlan_sync_neigh_dp_install(n, false /* set_inactive */,
+ false /* force_clear_static */, __func__);
+
return 0;
}
@@ -7740,7 +9271,9 @@ int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp,
ZEBRA_NEIGH_SET_INACTIVE(n);
/* Remove neighbor from BGP. */
- zvni_neigh_send_del_to_client(zvni->vni, &n->ip, &n->emac, 0, n->state);
+ zvni_neigh_send_del_to_client(zvni->vni, &n->ip,
+ &n->emac, n->flags, n->state,
+ false /* force */);
/* Delete this neighbor entry. */
zvni_neigh_del(zvni, n);
@@ -7765,7 +9298,8 @@ int zebra_vxlan_handle_kernel_neigh_update(struct interface *ifp,
struct ethaddr *macaddr,
uint16_t state,
bool is_ext,
- bool is_router)
+ bool is_router,
+ bool local_inactive, bool dp_static)
{
char buf[ETHER_ADDR_STRLEN];
char buf2[INET6_ADDRSTRLEN];
@@ -7786,19 +9320,20 @@ int zebra_vxlan_handle_kernel_neigh_update(struct interface *ifp,
if (!zvni)
return 0;
- if (IS_ZEBRA_DEBUG_VXLAN)
+ if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_NEIGH)
zlog_debug(
- "Add/Update neighbor %s MAC %s intf %s(%u) state 0x%x %s %s-> L2-VNI %u",
+ "Add/Update neighbor %s MAC %s intf %s(%u) state 0x%x %s%s%s-> L2-VNI %u",
ipaddr2str(ip, buf2, sizeof(buf2)),
prefix_mac2str(macaddr, buf, sizeof(buf)), ifp->name,
ifp->ifindex, state, is_ext ? "ext-learned " : "",
is_router ? "router " : "",
+ local_inactive ? "local_inactive " : "",
zvni->vni);
/* Is this about a local neighbor or a remote one? */
if (!is_ext)
return zvni_local_neigh_update(zvni, ifp, ip, macaddr,
- is_router);
+ is_router, local_inactive, dp_static);
return zvni_remote_neigh_update(zvni, ifp, ip, macaddr, state);
}
@@ -7807,7 +9342,7 @@ static int32_t
zebra_vxlan_remote_macip_helper(bool add, struct stream *s, vni_t *vni,
struct ethaddr *macaddr, uint16_t *ipa_len,
struct ipaddr *ip, struct in_addr *vtep_ip,
- uint8_t *flags, uint32_t *seq)
+ uint8_t *flags, uint32_t *seq, esi_t *esi)
{
uint16_t l = 0;
@@ -7845,6 +9380,8 @@ zebra_vxlan_remote_macip_helper(bool add, struct stream *s, vni_t *vni,
STREAM_GETC(s, *flags);
STREAM_GETL(s, *seq);
l += 5;
+ STREAM_GET(esi, s, sizeof(esi_t));
+ l += sizeof(esi_t);
}
return l;
@@ -7876,7 +9413,7 @@ void zebra_vxlan_remote_macip_del(ZAPI_HANDLER_ARGS)
while (l < hdr->length) {
int res_length = zebra_vxlan_remote_macip_helper(
false, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip, NULL,
- NULL);
+ NULL, NULL);
if (res_length == -1)
goto stream_failure;
@@ -7917,6 +9454,8 @@ void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS)
uint32_t seq;
char buf[ETHER_ADDR_STRLEN];
char buf1[INET6_ADDRSTRLEN];
+ esi_t esi;
+ char esi_buf[ESI_STR_LEN];
memset(&macaddr, 0, sizeof(struct ethaddr));
memset(&ip, 0, sizeof(struct ipaddr));
@@ -7932,25 +9471,32 @@ void zebra_vxlan_remote_macip_add(ZAPI_HANDLER_ARGS)
while (l < hdr->length) {
int res_length = zebra_vxlan_remote_macip_helper(
true, s, &vni, &macaddr, &ipa_len, &ip, &vtep_ip,
- &flags, &seq);
+ &flags, &seq, &esi);
if (res_length == -1)
goto stream_failure;
l += res_length;
- if (IS_ZEBRA_DEBUG_VXLAN)
+ if (IS_ZEBRA_DEBUG_VXLAN) {
+ if (memcmp(&esi, zero_esi, sizeof(esi_t)))
+ esi_to_str(&esi, esi_buf, sizeof(esi_buf));
+ else
+ strlcpy(esi_buf, "-", ESI_STR_LEN);
zlog_debug(
- "Recv MACIP ADD VNI %u MAC %s%s%s flags 0x%x seq %u VTEP %s from %s",
+ "Recv %sMACIP ADD VNI %u MAC %s%s%s flags 0x%x seq %u VTEP %s ESI %s from %s",
+ (flags & ZEBRA_MACIP_TYPE_SYNC_PATH) ?
+ "sync-" : "",
vni,
prefix_mac2str(&macaddr, buf, sizeof(buf)),
ipa_len ? " IP " : "",
ipa_len ?
ipaddr2str(&ip, buf1, sizeof(buf1)) : "",
- flags, seq, inet_ntoa(vtep_ip),
+ flags, seq, inet_ntoa(vtep_ip), esi_buf,
zebra_route_string(client->proto));
+ }
process_remote_macip_add(vni, &macaddr, ipa_len, &ip,
- flags, seq, vtep_ip);
+ flags, seq, vtep_ip, &esi);
}
stream_failure:
@@ -8049,7 +9595,8 @@ int zebra_vxlan_check_del_local_mac(struct interface *ifp,
ifp->ifindex, vni, mac->flags);
/* Remove MAC from BGP. */
- zvni_mac_send_del_to_client(zvni->vni, macaddr);
+ zvni_mac_send_del_to_client(zvni->vni, macaddr,
+ mac->flags, false /* force */);
/*
* If there are no neigh associated with the mac delete the mac
@@ -8058,7 +9605,7 @@ int zebra_vxlan_check_del_local_mac(struct interface *ifp,
if (!listcount(mac->neigh_list)) {
zvni_mac_del(zvni, mac);
} else {
- UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL);
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS);
UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY);
SET_FLAG(mac->flags, ZEBRA_MAC_AUTO);
}
@@ -8116,7 +9663,7 @@ int zebra_vxlan_check_readd_remote_mac(struct interface *ifp,
prefix_mac2str(macaddr, buf, sizeof(buf)), ifp->name,
ifp->ifindex, vni);
- zvni_mac_install(zvni, mac);
+ zvni_rem_mac_install(zvni, mac, false /* was_static */);
return 0;
}
@@ -8129,6 +9676,8 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if,
zebra_vni_t *zvni;
zebra_mac_t *mac;
char buf[ETHER_ADDR_STRLEN];
+ bool old_bgp_ready;
+ bool new_bgp_ready;
/* We are interested in MACs only on ports or (port, VLAN) that
* map to a VNI.
@@ -8158,11 +9707,46 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if,
ifp->ifindex, vid, zvni->vni, mac->loc_seq,
mac->flags, listcount(mac->neigh_list));
+ old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags);
+ if (zebra_vxlan_mac_is_static(mac)) {
+ /* this is a synced entry and can only be removed when the
+ * es-peers stop advertising it.
+ */
+ memset(&mac->fwd_info, 0, sizeof(mac->fwd_info));
+
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("re-add sync-mac vni %u mac %s es %s seq %d f 0x%x",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ buf, sizeof(buf)),
+ mac->es ? mac->es->esi_str : "-",
+ mac->loc_seq,
+ mac->flags);
+
+ /* inform-bgp about change in local-activity if any */
+ if (!CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE)) {
+ SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE);
+ new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags);
+ zebra_vxlan_mac_send_add_del_to_client(mac,
+ old_bgp_ready, new_bgp_ready);
+ }
+
+ /* re-install the entry in the kernel */
+ zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */,
+ false /* force_clear_static */,
+ __func__);
+
+ return 0;
+ }
+
/* Update all the neigh entries associated with this mac */
zvni_process_neigh_on_local_mac_del(zvni, mac);
/* Remove MAC from BGP. */
- zvni_mac_send_del_to_client(zvni->vni, macaddr);
+ zvni_mac_send_del_to_client(zvni->vni, macaddr,
+ mac->flags, false /* force */);
+
+ zebra_evpn_es_mac_deref_entry(mac);
/*
* If there are no neigh associated with the mac delete the mac
@@ -8171,7 +9755,7 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if,
if (!listcount(mac->neigh_list)) {
zvni_mac_del(zvni, mac);
} else {
- UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL);
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_ALL_LOCAL_FLAGS);
UNSET_FLAG(mac->flags, ZEBRA_MAC_STICKY);
SET_FLAG(mac->flags, ZEBRA_MAC_AUTO);
}
@@ -8179,13 +9763,36 @@ int zebra_vxlan_local_mac_del(struct interface *ifp, struct interface *br_if,
return 0;
}
+/* update local fowarding info. return true if a dest-ES change
+ * is detected
+ */
+static bool zebra_vxlan_local_mac_update_fwd_info(zebra_mac_t *mac,
+ struct interface *ifp, vlanid_t vid)
+{
+ struct zebra_if *zif = ifp->info;
+ bool es_change;
+
+ memset(&mac->fwd_info, 0, sizeof(mac->fwd_info));
+
+ es_change = zebra_evpn_es_mac_ref_entry(mac, zif->es_info.es);
+
+ if (!mac->es) {
+ /* if es is set fwd_info is not-relevant/taped-out */
+ mac->fwd_info.local.ifindex = ifp->ifindex;
+ mac->fwd_info.local.vid = vid;
+ }
+
+ return es_change;
+}
+
/*
* Handle local MAC add (on a port or VLAN corresponding to this VNI).
*/
int zebra_vxlan_local_mac_add_update(struct interface *ifp,
struct interface *br_if,
struct ethaddr *macaddr, vlanid_t vid,
- bool sticky)
+ bool sticky, bool local_inactive,
+ bool dp_static)
{
zebra_vni_t *zvni;
zebra_mac_t *mac;
@@ -8196,11 +9803,13 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,
bool upd_neigh = false;
bool is_dup_detect = false;
struct in_addr vtep_ip = {.s_addr = 0};
- ns_id_t local_ns_id = NS_DEFAULT;
-
- zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id);
- if (zvrf && zvrf->zns)
- local_ns_id = zvrf->zns->ns_id;
+ bool es_change = false;
+ bool new_bgp_ready;
+ /* assume inactive if not present or if not local */
+ bool old_local_inactive = true;
+ bool old_bgp_ready = false;
+ bool inform_dataplane = false;
+ bool new_static = false;
/* We are interested in MACs only on ports or (port, VLAN) that
* map to a VNI.
@@ -8224,22 +9833,24 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,
return -1;
}
- zvrf = zebra_vrf_get_evpn();
+ zvrf = vrf_info_lookup(zvni->vxlan_if->vrf_id);
if (!zvrf) {
if (IS_ZEBRA_DEBUG_VXLAN)
- zlog_debug(" No Evpn Global Vrf found");
+ zlog_debug(" No Vrf found for vrf_id: %d",
+ zvni->vxlan_if->vrf_id);
return -1;
}
/* Check if we need to create or update or it is a NO-OP. */
mac = zvni_mac_lookup(zvni, macaddr);
if (!mac) {
- if (IS_ZEBRA_DEBUG_VXLAN)
+ if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC)
zlog_debug(
- "ADD %sMAC %s intf %s(%u) VID %u -> VNI %u",
+ "ADD %sMAC %s intf %s(%u) VID %u -> VNI %u%s",
sticky ? "sticky " : "",
prefix_mac2str(macaddr, buf, sizeof(buf)),
- ifp->name, ifp->ifindex, vid, zvni->vni);
+ ifp->name, ifp->ifindex, vid, zvni->vni,
+ local_inactive ? " local-inactive" : "");
mac = zvni_mac_add(zvni, macaddr);
if (!mac) {
@@ -8251,23 +9862,33 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,
return -1;
}
SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL);
- mac->fwd_info.local.ifindex = ifp->ifindex;
- mac->fwd_info.local.ns_id = local_ns_id;
- mac->fwd_info.local.vid = vid;
+ es_change = zebra_vxlan_local_mac_update_fwd_info(mac,
+ ifp, vid);
if (sticky)
SET_FLAG(mac->flags, ZEBRA_MAC_STICKY);
inform_client = true;
-
} else {
- if (IS_ZEBRA_DEBUG_VXLAN)
+ if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_MAC)
zlog_debug(
- "UPD %sMAC %s intf %s(%u) VID %u -> VNI %u curFlags 0x%x",
+ "UPD %sMAC %s intf %s(%u) VID %u -> VNI %u %scurFlags 0x%x",
sticky ? "sticky " : "",
prefix_mac2str(macaddr, buf, sizeof(buf)),
ifp->name, ifp->ifindex, vid, zvni->vni,
+ local_inactive ? "local-inactive " : "",
mac->flags);
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {
+ struct interface *old_ifp;
+ vlanid_t old_vid;
+ bool old_static;
+
+ zebra_vxlan_mac_get_access_info(mac,
+ &old_ifp, &old_vid);
+ old_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(
+ mac->flags);
+ old_local_inactive = !!(mac->flags &
+ ZEBRA_MAC_LOCAL_INACTIVE);
+ old_static = zebra_vxlan_mac_is_static(mac);
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_STICKY))
mac_sticky = true;
@@ -8276,17 +9897,20 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,
* BGP, note it.
*/
if (mac_sticky == sticky
- && mac->fwd_info.local.ifindex == ifp->ifindex
- && mac->fwd_info.local.ns_id == local_ns_id
- && mac->fwd_info.local.vid == vid) {
+ && old_ifp == ifp
+ && old_vid == vid
+ && old_local_inactive == local_inactive
+ && dp_static == old_static) {
if (IS_ZEBRA_DEBUG_VXLAN)
zlog_debug(
- " Add/Update %sMAC %s intf %s(%u) VID %u -> VNI %u, entry exists and has not changed ",
+ " Add/Update %sMAC %s intf %s(%u) VID %u -> VNI %u%s, entry exists and has not changed ",
sticky ? "sticky " : "",
prefix_mac2str(macaddr, buf,
sizeof(buf)),
ifp->name, ifp->ifindex, vid,
- zvni->vni);
+ zvni->vni,
+ local_inactive ?
+ " local_inactive" : "");
return 0;
}
if (mac_sticky != sticky) {
@@ -8299,11 +9923,31 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,
inform_client = true;
}
- memset(&mac->fwd_info, 0, sizeof(mac->fwd_info));
- mac->fwd_info.local.ifindex = ifp->ifindex;
- mac->fwd_info.local.ns_id = local_ns_id;
- mac->fwd_info.local.vid = vid;
-
+ es_change = zebra_vxlan_local_mac_update_fwd_info(mac,
+ ifp, vid);
+ /* If an es_change is detected we need to advertise
+ * the route with a sequence that is one
+ * greater. This is need to indicate a mac-move
+ * to the ES peers
+ */
+ if (es_change) {
+ mac->loc_seq = mac->loc_seq + 1;
+ /* force drop the peer/sync info as it is
+ * simply no longer relevant
+ */
+ if (CHECK_FLAG(mac->flags,
+ ZEBRA_MAC_ALL_PEER_FLAGS)) {
+ zebra_vxlan_mac_clear_sync_info(mac);
+ new_static =
+ zebra_vxlan_mac_is_static(mac);
+ /* if we clear peer-flags we
+ * also need to notify the dataplane
+ * to drop the static flag
+ */
+ if (old_static != new_static)
+ inform_dataplane = true;
+ }
+ }
} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) ||
CHECK_FLAG(mac->flags, ZEBRA_MAC_AUTO)) {
bool do_dad = false;
@@ -8337,10 +9981,8 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,
UNSET_FLAG(mac->flags, ZEBRA_MAC_REMOTE);
UNSET_FLAG(mac->flags, ZEBRA_MAC_AUTO);
SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL);
- memset(&mac->fwd_info, 0, sizeof(mac->fwd_info));
- mac->fwd_info.local.ifindex = ifp->ifindex;
- mac->fwd_info.local.ns_id = local_ns_id;
- mac->fwd_info.local.vid = vid;
+ es_change = zebra_vxlan_local_mac_update_fwd_info(mac,
+ ifp, vid);
if (sticky)
SET_FLAG(mac->flags, ZEBRA_MAC_STICKY);
else
@@ -8363,16 +10005,57 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp,
}
}
- /* Inform BGP if required. */
- if (inform_client) {
- if (zvni_mac_send_add_to_client(zvni->vni, macaddr,
- mac->flags, mac->loc_seq))
- return -1;
+ /* if the dataplane thinks the entry is sync but it is
+ * not sync in zebra we need to re-install to fixup
+ */
+ if (dp_static) {
+ new_static = zebra_vxlan_mac_is_static(mac);
+ if (!new_static)
+ inform_dataplane = true;
}
+ if (local_inactive)
+ SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE);
+ else
+ UNSET_FLAG(mac->flags, ZEBRA_MAC_LOCAL_INACTIVE);
+
+ new_bgp_ready = zebra_vxlan_mac_is_ready_for_bgp(mac->flags);
+ /* if local-activity has changed we need update bgp
+ * even if bgp already knows about the mac
+ */
+ if ((old_local_inactive != local_inactive) ||
+ (new_bgp_ready != old_bgp_ready)) {
+ if (IS_ZEBRA_DEBUG_EVPN_MH_MAC)
+ zlog_debug("local mac vni %u mac %s es %s seq %d f 0x%x%s",
+ zvni->vni,
+ prefix_mac2str(macaddr,
+ buf, sizeof(buf)),
+ mac->es ? mac->es->esi_str : "",
+ mac->loc_seq,
+ mac->flags,
+ local_inactive ?
+ " local-inactive" : "");
+ inform_client = true;
+ }
+
+ if (es_change) {
+ inform_client = true;
+ upd_neigh = true;
+ }
+
+ /* Inform dataplane if required. */
+ if (inform_dataplane)
+ zebra_vxlan_sync_mac_dp_install(mac, false /* set_inactive */,
+ false /* force_clear_static */, __func__);
+
+ /* Inform BGP if required. */
+ if (inform_client)
+ zebra_vxlan_mac_send_add_del_to_client(mac,
+ old_bgp_ready, new_bgp_ready);
+
/* Process all neighbors associated with this MAC, if required. */
if (upd_neigh)
- zvni_process_neigh_on_local_mac_change(zvni, mac, 0);
+ zvni_process_neigh_on_local_mac_change(zvni, mac, 0, es_change);
return 0;
}
@@ -8892,7 +10575,7 @@ int zebra_vxlan_if_down(struct interface *ifp)
assert(zvni->vxlan_if == ifp);
/* Delete this VNI from BGP. */
- zvni_send_del_to_client(zvni->vni);
+ zvni_send_del_to_client(zvni);
/* Free up all neighbors and MACs, if any. */
zvni_neigh_del_all(zvni, 1, 0, DEL_ALL_NEIGH);
@@ -9033,9 +10716,8 @@ int zebra_vxlan_if_del(struct interface *ifp)
zl3vni = zl3vni_from_vrf(zvni->vrf_id);
if (zl3vni)
listnode_delete(zl3vni->l2vnis, zvni);
-
/* Delete VNI from BGP. */
- zvni_send_del_to_client(zvni->vni);
+ zvni_send_del_to_client(zvni);
/* Free up all neighbors and MAC, if any. */
zvni_neigh_del_all(zvni, 0, 0, DEL_ALL_NEIGH);
@@ -9154,7 +10836,7 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags)
&& (zif->brslave_info.bridge_ifindex == IFINDEX_INTERNAL)) {
/* Delete from client, remove all remote VTEPs */
/* Also, free up all MACs and neighbors. */
- zvni_send_del_to_client(zvni->vni);
+ zvni_send_del_to_client(zvni);
zvni_neigh_del_all(zvni, 1, 0, DEL_ALL_NEIGH);
zvni_mac_del_all(zvni, 1, 0, DEL_ALL_MAC);
zvni_vtep_del_all(zvni, 1);
@@ -9177,9 +10859,12 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags)
zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp);
zvni->local_vtep_ip = vxl->vtep_ip;
zvni->mcast_grp = vxl->mcast_grp;
+ /* on local vtep-ip check if ES orig-ip
+ * needs to be updated
+ */
+ zebra_evpn_es_set_base_vni(zvni);
}
- zvni->vxlan_if = ifp;
-
+ zvni_vxlan_if_set(zvni, ifp, true /* set */);
/* Take further actions needed.
* Note that if we are here, there is a change of interest.
*/
@@ -9290,8 +10975,12 @@ int zebra_vxlan_if_add(struct interface *ifp)
zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp);
zvni->local_vtep_ip = vxl->vtep_ip;
zvni->mcast_grp = vxl->mcast_grp;
+ /* on local vtep-ip check if ES orig-ip
+ * needs to be updated
+ */
+ zebra_evpn_es_set_base_vni(zvni);
}
- zvni->vxlan_if = ifp;
+ zvni_vxlan_if_set(zvni, ifp, true /* set */);
vlan_if = zvni_map_to_svi(vxl->access_vlan,
zif->brslave_info.br_if);
if (vlan_if) {
@@ -9811,25 +11500,6 @@ stream_failure:
return;
}
-static int macfdb_read_ns(struct ns *ns,
- void *_in_param __attribute__((unused)),
- void **out_param __attribute__((unused)))
-{
- struct zebra_ns *zns = ns->info;
-
- macfdb_read(zns);
- return NS_WALK_CONTINUE;
-}
-
-static int neigh_read_ns(struct ns *ns,
- void *_in_param __attribute__((unused)),
- void **out_param __attribute__((unused)))
-{
- struct zebra_ns *zns = ns->info;
-
- neigh_read(zns);
- return NS_WALK_CONTINUE;
-}
/*
* Handle message from client to learn (or stop learning) about VNIs and MACs.
@@ -9871,6 +11541,9 @@ void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS)
/* Note BUM handling */
zvrf->vxlan_flood_ctrl = flood_ctrl;
+ /* Replay all ESs */
+ zebra_evpn_es_send_all_to_client(true /* add */);
+
/* Build VNI hash table and inform BGP. */
zvni_build_hash_table();
@@ -9879,16 +11552,19 @@ void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS)
NULL);
/* Read the MAC FDB */
- ns_walk_func(macfdb_read_ns, NULL, NULL);
+ macfdb_read(zvrf->zns);
/* Read neighbors */
- ns_walk_func(neigh_read_ns, NULL, NULL);
+ neigh_read(zvrf->zns);
} else {
/* Cleanup VTEPs for all VNIs - uninstall from
* kernel and free entries.
*/
hash_iterate(zvrf->vni_table, zvni_cleanup_all, zvrf);
+ /* Delete all ESs in BGP */
+ zebra_evpn_es_send_all_to_client(false /* add */);
+
/* cleanup all l3vnis */
hash_iterate(zrouter.l3vni_table, zl3vni_cleanup_all, NULL);
@@ -9917,10 +11593,15 @@ void zebra_vxlan_init_tables(struct zebra_vrf *zvrf)
/* Cleanup VNI info, but don't free the table. */
void zebra_vxlan_cleanup_tables(struct zebra_vrf *zvrf)
{
+ struct zebra_vrf *evpn_zvrf = zebra_vrf_get_evpn();
+
if (!zvrf)
return;
hash_iterate(zvrf->vni_table, zvni_cleanup_all, zvrf);
hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_cleanup, NULL);
+
+ if (zvrf == evpn_zvrf)
+ zebra_evpn_es_cleanup();
}
/* Close all VNI handling */
@@ -9938,12 +11619,14 @@ void zebra_vxlan_init(void)
zrouter.l3vni_table = hash_create(l3vni_hash_keymake, l3vni_hash_cmp,
"Zebra VRF L3 VNI table");
zrouter.evpn_vrf = NULL;
+ zebra_evpn_mh_init();
}
/* free l3vni table */
void zebra_vxlan_disable(void)
{
hash_free(zrouter.l3vni_table);
+ zebra_evpn_mh_terminate();
}
/* get the l3vni svi ifindex */
@@ -10000,9 +11683,9 @@ static int zebra_vxlan_dad_ip_auto_recovery_exp(struct thread *t)
/* Send to BGP */
if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL)) {
zvni_neigh_send_add_to_client(zvni->vni, &nbr->ip, &nbr->emac,
- nbr->flags, nbr->loc_seq);
+ nbr->mac, nbr->flags, nbr->loc_seq);
} else if (!!CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE)) {
- zvni_neigh_install(zvni, nbr);
+ zvni_rem_neigh_install(zvni, nbr, false /*was_static*/);
}
return 0;
@@ -10045,7 +11728,8 @@ static int zebra_vxlan_dad_mac_auto_recovery_exp(struct thread *t)
if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_LOCAL))
ZEBRA_NEIGH_SET_INACTIVE(nbr);
else if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_REMOTE))
- zvni_neigh_install(zvni, nbr);
+ zvni_rem_neigh_install(zvni, nbr,
+ false /*was_static*/);
}
UNSET_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE);
@@ -10064,17 +11748,18 @@ static int zebra_vxlan_dad_mac_auto_recovery_exp(struct thread *t)
if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) {
/* Inform to BGP */
if (zvni_mac_send_add_to_client(zvni->vni, &mac->macaddr,
- mac->flags, mac->loc_seq))
+ mac->flags, mac->loc_seq, mac->es))
return -1;
/* Process all neighbors associated with this MAC. */
- zvni_process_neigh_on_local_mac_change(zvni, mac, 0);
+ zvni_process_neigh_on_local_mac_change(zvni, mac, 0,
+ 0 /*es_change*/);
} else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE)) {
zvni_process_neigh_on_remote_mac_add(zvni, mac);
/* Install the entry. */
- zvni_mac_install(zvni, mac);
+ zvni_rem_mac_install(zvni, mac, false /* was_static */);
}
return 0;
@@ -10344,7 +12029,7 @@ static void zvni_send_mac_hash_entry_to_client(struct hash_bucket *bucket,
if (CHECK_FLAG(zmac->flags, ZEBRA_MAC_LOCAL))
zvni_mac_send_add_to_client(wctx->zvni->vni, &zmac->macaddr,
- zmac->flags, zmac->loc_seq);
+ zmac->flags, zmac->loc_seq, zmac->es);
}
/* Iterator to Notify Local MACs of a L2VNI */
@@ -10380,7 +12065,7 @@ static void zvni_send_neigh_hash_entry_to_client(struct hash_bucket *bucket,
return;
zvni_neigh_send_add_to_client(wctx->zvni->vni, &zn->ip,
- &zn->emac, zn->flags,
+ &zn->emac, zn->mac, zn->flags,
zn->loc_seq);
}
}
diff --git a/zebra/zebra_vxlan.h b/zebra/zebra_vxlan.h
index 064dda6cd0..9c8af9d1fc 100644
--- a/zebra/zebra_vxlan.h
+++ b/zebra/zebra_vxlan.h
@@ -165,14 +165,15 @@ extern int zebra_vxlan_svi_down(struct interface *ifp,
extern int zebra_vxlan_handle_kernel_neigh_update(
struct interface *ifp, struct interface *link_if, struct ipaddr *ip,
struct ethaddr *macaddr, uint16_t state, bool is_ext,
- bool is_router);
+ bool is_router, bool local_inactive, bool dp_static);
extern int zebra_vxlan_handle_kernel_neigh_del(struct interface *ifp,
struct interface *link_if,
struct ipaddr *ip);
extern int zebra_vxlan_local_mac_add_update(struct interface *ifp,
struct interface *br_if,
struct ethaddr *mac, vlanid_t vid,
- bool sticky);
+ bool sticky, bool local_inactive,
+ bool dp_static);
extern int zebra_vxlan_local_mac_del(struct interface *ifp,
struct interface *br_if,
struct ethaddr *mac, vlanid_t vid);
@@ -217,6 +218,7 @@ extern void zebra_vxlan_handle_result(struct zebra_dplane_ctx *ctx);
extern void zebra_evpn_init(void);
extern void zebra_vxlan_macvlan_up(struct interface *ifp);
extern void zebra_vxlan_macvlan_down(struct interface *ifp);
+extern int vni_list_cmp(void *p1, void *p2);
#ifdef __cplusplus
}
diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h
index e4b06054b2..e2eae56873 100644
--- a/zebra/zebra_vxlan_private.h
+++ b/zebra/zebra_vxlan_private.h
@@ -62,6 +62,9 @@ struct zebra_vtep_t_ {
struct zebra_vtep_t_ *prev;
};
+RB_HEAD(zebra_es_evi_rb_head, zebra_evpn_es_evi);
+RB_PROTOTYPE(zebra_es_evi_rb_head, zebra_evpn_es_evi, rb_node,
+ zebra_es_evi_rb_cmp);
/*
* VNI hash table
@@ -73,6 +76,10 @@ struct zebra_vni_t_ {
/* VNI - key */
vni_t vni;
+ /* ES flags */
+ uint32_t flags;
+#define ZVNI_READY_FOR_BGP (1 << 0) /* ready to be sent to BGP */
+
/* Flag for advertising gw macip */
uint8_t advertise_gw_macip;
@@ -102,6 +109,12 @@ struct zebra_vni_t_ {
/* List of local or remote neighbors (MAC+IP) */
struct hash *neigh_table;
+
+ /* RB tree of ES-EVIs */
+ struct zebra_es_evi_rb_head es_evi_rb_tree;
+
+ /* List of local ESs */
+ struct list *local_es_evi_list;
};
/* L3 VNI hash table */
@@ -302,6 +315,23 @@ struct zebra_mac_t_ {
#define ZEBRA_MAC_REMOTE_DEF_GW 0x40
#define ZEBRA_MAC_DUPLICATE 0x80
#define ZEBRA_MAC_FPM_SENT 0x100 /* whether or not this entry was sent. */
+/* MAC is locally active on an ethernet segment peer */
+#define ZEBRA_MAC_ES_PEER_ACTIVE 0x200
+/* MAC has been proxy-advertised by peers. This means we need to
+ * keep the entry for forwarding but cannot advertise it
+ */
+#define ZEBRA_MAC_ES_PEER_PROXY 0x400
+/* We have not been able to independently establish that the host is
+ * local connected but one or more ES peers claims it is.
+ * We will maintain the entry for forwarding purposes and continue
+ * to advertise it as locally attached but with a "proxy" flag
+ */
+#define ZEBRA_MAC_LOCAL_INACTIVE 0x800
+
+#define ZEBRA_MAC_ALL_LOCAL_FLAGS (ZEBRA_MAC_LOCAL |\
+ ZEBRA_MAC_LOCAL_INACTIVE)
+#define ZEBRA_MAC_ALL_PEER_FLAGS (ZEBRA_MAC_ES_PEER_PROXY |\
+ ZEBRA_MAC_ES_PEER_ACTIVE)
/* back pointer to zvni */
zebra_vni_t *zvni;
@@ -310,13 +340,17 @@ struct zebra_mac_t_ {
union {
struct {
ifindex_t ifindex;
- ns_id_t ns_id;
vlanid_t vid;
} local;
struct in_addr r_vtep_ip;
} fwd_info;
+ /* Local or remote ES */
+ struct zebra_evpn_es *es;
+ /* memory used to link the mac to the es */
+ struct listnode es_listnode;
+
/* Mobility sequence numbers associated with this entry. */
uint32_t rem_seq;
uint32_t loc_seq;
@@ -335,6 +369,14 @@ struct zebra_mac_t_ {
struct timeval detect_start_time;
time_t dad_dup_detect_time;
+
+ /* used for ageing out the PEER_ACTIVE flag */
+ struct thread *hold_timer;
+
+ /* number of neigh entries (using this mac) that have
+ * ZEBRA_MAC_ES_PEER_ACTIVE or ZEBRA_NEIGH_ES_PEER_PROXY
+ */
+ uint32_t sync_neigh_cnt;
};
/*
@@ -366,6 +408,17 @@ struct rmac_walk_ctx {
struct json_object *json;
};
+/* temporary datastruct to pass info between the mac-update and
+ * neigh-update while handling mac-ip routes
+ */
+struct sync_mac_ip_ctx {
+ bool ignore_macip;
+ bool mac_created;
+ bool mac_inactive;
+ bool mac_dp_update_deferred;
+ zebra_mac_t *mac;
+};
+
#define IS_ZEBRA_NEIGH_ACTIVE(n) (n->state == ZEBRA_NEIGH_ACTIVE)
#define IS_ZEBRA_NEIGH_INACTIVE(n) (n->state == ZEBRA_NEIGH_INACTIVE)
@@ -392,6 +445,9 @@ struct zebra_neigh_t_ {
/* MAC address. */
struct ethaddr emac;
+ /* Back pointer to MAC. Only applicable to hosts in a L2-VNI. */
+ zebra_mac_t *mac;
+
/* Underlying interface. */
ifindex_t ifindex;
@@ -405,6 +461,18 @@ struct zebra_neigh_t_ {
#define ZEBRA_NEIGH_ROUTER_FLAG 0x10
#define ZEBRA_NEIGH_DUPLICATE 0x20
#define ZEBRA_NEIGH_SVI_IP 0x40
+/* rxed from an ES peer */
+#define ZEBRA_NEIGH_ES_PEER_ACTIVE 0x80
+/* rxed from an ES peer as a proxy advertisement */
+#define ZEBRA_NEIGH_ES_PEER_PROXY 0x100
+/* We have not been able to independently establish that the host
+ * is local connected
+ */
+#define ZEBRA_NEIGH_LOCAL_INACTIVE 0x200
+#define ZEBRA_NEIGH_ALL_LOCAL_FLAGS (ZEBRA_NEIGH_LOCAL |\
+ ZEBRA_NEIGH_LOCAL_INACTIVE)
+#define ZEBRA_NEIGH_ALL_PEER_FLAGS (ZEBRA_NEIGH_ES_PEER_PROXY |\
+ ZEBRA_NEIGH_ES_PEER_ACTIVE)
enum zebra_neigh_state state;
@@ -432,6 +500,9 @@ struct zebra_neigh_t_ {
struct timeval detect_start_time;
time_t dad_dup_detect_time;
+
+ /* used for ageing out the PEER_ACTIVE flag */
+ struct thread *hold_timer;
};
/*
@@ -508,4 +579,8 @@ typedef struct zebra_vxlan_sg_ {
uint32_t ref_cnt;
} zebra_vxlan_sg_t;
+extern zebra_vni_t *zvni_lookup(vni_t vni);
+extern void zebra_vxlan_sync_mac_dp_install(zebra_mac_t *mac, bool set_inactive,
+ bool force_clear_static, const char *caller);
+
#endif /* _ZEBRA_VXLAN_PRIVATE_H */
diff --git a/zebra/zserv.c b/zebra/zserv.c
index 99a85fd2ce..f1b7dcc848 100644
--- a/zebra/zserv.c
+++ b/zebra/zserv.c
@@ -1075,6 +1075,12 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client)
vty_out(vty, "L3-VNI delete notifications: %u\n", client->l3vnidel_cnt);
vty_out(vty, "MAC-IP add notifications: %u\n", client->macipadd_cnt);
vty_out(vty, "MAC-IP delete notifications: %u\n", client->macipdel_cnt);
+ vty_out(vty, "ES add notifications: %u\n", client->local_es_add_cnt);
+ vty_out(vty, "ES delete notifications: %u\n", client->local_es_del_cnt);
+ vty_out(vty, "ES-EVI add notifications: %u\n",
+ client->local_es_evi_add_cnt);
+ vty_out(vty, "ES-EVI delete notifications: %u\n",
+ client->local_es_evi_del_cnt);
TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
vty_out(vty, "VRF : %s\n", vrf_id_to_name(info->vrf_id));
diff --git a/zebra/zserv.h b/zebra/zserv.h
index f2a4523818..e904460782 100644
--- a/zebra/zserv.h
+++ b/zebra/zserv.h
@@ -194,6 +194,10 @@ struct zserv {
uint32_t v6_nh_watch_rem_cnt;
uint32_t vxlan_sg_add_cnt;
uint32_t vxlan_sg_del_cnt;
+ uint32_t local_es_add_cnt;
+ uint32_t local_es_del_cnt;
+ uint32_t local_es_evi_add_cnt;
+ uint32_t local_es_evi_del_cnt;
uint32_t error_cnt;
time_t nh_reg_time;