diff options
70 files changed, 4024 insertions, 257 deletions
diff --git a/Makefile.am b/Makefile.am index 851cefc85c..34f112bf01 100644 --- a/Makefile.am +++ b/Makefile.am @@ -125,6 +125,7 @@ include doc/manpages/subdir.am include doc/developer/subdir.am include include/subdir.am include lib/subdir.am +include mlag/subdir.am include zebra/subdir.am include watchfrr/subdir.am include qpb/subdir.am diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index f3c514fb15..d9d83335d0 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -494,6 +494,39 @@ static void unmap_vni_from_rt(struct bgp *bgp, struct bgpevpn *vpn, } } +static void bgp_evpn_get_rmac_nexthop(struct bgpevpn *vpn, + struct prefix_evpn *p, + struct attr *attr, uint8_t flags) +{ + struct bgp *bgp_vrf = vpn->bgp_vrf; + + memset(&attr->rmac, 0, sizeof(struct ethaddr)); + if (!bgp_vrf) + return; + + if (p->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + return; + + /* Copy sys (pip) RMAC and PIP IP as nexthop + * in case of route is self MAC-IP, + * advertise-pip and advertise-svi-ip features + * are enabled. + * Otherwise, for all host MAC-IP route's + * copy anycast RMAC. + */ + if (CHECK_FLAG(flags, BGP_EVPN_MACIP_TYPE_SVI_IP) + && bgp_vrf->evpn_info->advertise_pip && + bgp_vrf->evpn_info->is_anycast_mac) { + /* copy sys rmac */ + memcpy(&attr->rmac, &bgp_vrf->evpn_info->pip_rmac, + ETH_ALEN); + attr->nexthop = bgp_vrf->evpn_info->pip_ip; + attr->mp_nexthop_global_in = + bgp_vrf->evpn_info->pip_ip; + } else + memcpy(&attr->rmac, &bgp_vrf->rmac, ETH_ALEN); +} + /* * Create RT extended community automatically from passed information: * of the form AS:VNI. @@ -1543,11 +1576,47 @@ static int update_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp, memset(&attr, 0, sizeof(struct attr)); bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); } - /* Set nexthop to ourselves and fill in the Router MAC. */ - attr.nexthop = bgp_vrf->originator_ip; - attr.mp_nexthop_global_in = bgp_vrf->originator_ip; + + /* Advertise Primary IP (PIP) is enabled, send individual + * IP (default instance router-id) as nexthop. + * PIP is disabled or vrr interface is not present + * use anycast-IP as nexthop and anycast RMAC. + */ + if (!bgp_vrf->evpn_info->advertise_pip || + (!bgp_vrf->evpn_info->is_anycast_mac)) { + attr.nexthop = bgp_vrf->originator_ip; + attr.mp_nexthop_global_in = bgp_vrf->originator_ip; + memcpy(&attr.rmac, &bgp_vrf->rmac, ETH_ALEN); + } else { + /* copy sys rmac */ + memcpy(&attr.rmac, &bgp_vrf->evpn_info->pip_rmac, ETH_ALEN); + if (bgp_vrf->evpn_info->pip_ip.s_addr != INADDR_ANY) { + attr.nexthop = bgp_vrf->evpn_info->pip_ip; + attr.mp_nexthop_global_in = bgp_vrf->evpn_info->pip_ip; + } else if (bgp_vrf->evpn_info->pip_ip.s_addr == INADDR_ANY) + if (bgp_debug_zebra(NULL)) { + char buf1[PREFIX_STRLEN]; + + zlog_debug("VRF %s evp %s advertise-pip primary ip is not configured", + vrf_id_to_name(bgp_vrf->vrf_id), + prefix2str(evp, buf1, sizeof(buf1))); + } + } + + if (bgp_debug_zebra(NULL)) { + char buf[ETHER_ADDR_STRLEN]; + char buf1[PREFIX_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + + zlog_debug("VRF %s type-5 route evp %s RMAC %s nexthop %s", + vrf_id_to_name(bgp_vrf->vrf_id), + prefix2str(evp, buf1, sizeof(buf1)), + prefix_mac2str(&attr.rmac, buf, sizeof(buf)), + inet_ntop(AF_INET, &attr.nexthop, buf2, + INET_ADDRSTRLEN)); + } + attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - memcpy(&attr.rmac, &bgp_vrf->rmac, sizeof(struct ethaddr)); /* Setup RT and encap extended community */ build_evpn_type5_route_extcomm(bgp_vrf, &attr); @@ -1652,6 +1721,9 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, memcpy(&tmp_pi->extra->label, label, sizeof(label)); tmp_pi->extra->num_labels = num_labels; + /* Mark route as self type-2 route */ + if (flags && CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP)) + tmp_pi->extra->af_flags = BGP_EVPN_MACIP_TYPE_SVI_IP; bgp_path_info_add(rn, tmp_pi); } else { tmp_pi = local_pi; @@ -1795,8 +1867,29 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, } /* router mac is only needed for type-2 routes here. */ - if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) - bgpevpn_get_rmac(vpn, &attr.rmac); + if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) { + uint8_t af_flags = 0; + + if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP)) + SET_FLAG(af_flags, BGP_EVPN_MACIP_TYPE_SVI_IP); + + bgp_evpn_get_rmac_nexthop(vpn, p, &attr, af_flags); + + if (bgp_debug_zebra(NULL)) { + char buf[ETHER_ADDR_STRLEN]; + char buf1[PREFIX_STRLEN]; + + zlog_debug("VRF %s vni %u type-2 route evp %s RMAC %s nexthop %s", + vpn->bgp_vrf ? + vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ", + vpn->vni, + prefix2str(p, buf1, sizeof(buf1)), + prefix_mac2str(&attr.rmac, buf, + sizeof(buf)), + inet_ntoa(attr.mp_nexthop_global_in)); + } + } + vni2label(vpn->vni, &(attr.label)); /* Include L3 VNI related RTs and RMAC for type-2 routes, if they're @@ -2071,7 +2164,8 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) attr.nexthop = vpn->originator_ip; attr.mp_nexthop_global_in = vpn->originator_ip; attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - bgpevpn_get_rmac(vpn, &attr.rmac); + bgp_evpn_get_rmac_nexthop(vpn, evp, &attr, + tmp_pi->extra->af_flags); if (evpn_route_is_sticky(bgp, rn)) attr.sticky = 1; @@ -2081,6 +2175,19 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) attr.router_flag = 1; } + if (bgp_debug_zebra(NULL)) { + char buf[ETHER_ADDR_STRLEN]; + char buf1[PREFIX_STRLEN]; + + zlog_debug("VRF %s vni %u evp %s RMAC %s nexthop %s", + vpn->bgp_vrf ? + vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ", + vpn->vni, + prefix2str(evp, buf1, sizeof(buf1)), + prefix_mac2str(&attr.rmac, buf, sizeof(buf)), + inet_ntoa(attr.mp_nexthop_global_in)); + } + /* Add L3 VNI RTs and RMAC for non IPv6 link-local if * using L3 VNI for type-2 routes also. */ @@ -2268,7 +2375,7 @@ static int bgp_evpn_vni_flood_mode_get(struct bgp *bgp, * situations need the route in the per-VNI table as well as the global * table to be updated (as attributes change). */ -static int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) +int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) { int ret; struct prefix_evpn p; @@ -3527,8 +3634,14 @@ static void delete_withdraw_vrf_routes(struct bgp *bgp_vrf) * update and advertise all ipv4 and ipv6 routes in thr vrf table as type-5 * routes */ -static void update_advertise_vrf_routes(struct bgp *bgp_vrf) +void update_advertise_vrf_routes(struct bgp *bgp_vrf) { + struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ + + bgp_evpn = bgp_get_evpn(); + if (!bgp_evpn) + return; + /* update all ipv4 routes */ if (advertise_type5_routes(bgp_vrf, AFI_IP)) bgp_evpn_advertise_type5_routes(bgp_vrf, AFI_IP, SAFI_UNICAST); @@ -4586,6 +4699,9 @@ void bgp_evpn_unconfigure_export_rt_for_vrf(struct bgp *bgp_vrf, */ void bgp_evpn_handle_router_id_update(struct bgp *bgp, int withdraw) { + struct listnode *node; + struct bgp *bgp_vrf; + if (withdraw) { /* delete and withdraw all the type-5 routes @@ -4600,8 +4716,34 @@ void bgp_evpn_handle_router_id_update(struct bgp *bgp, int withdraw) (void (*)(struct hash_bucket *, void *))withdraw_router_id_vni, bgp); + + if (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) { + for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) { + if (bgp_vrf->evpn_info->advertise_pip && + (bgp_vrf->evpn_info->pip_ip_static.s_addr + == INADDR_ANY)) + bgp_vrf->evpn_info->pip_ip.s_addr + = INADDR_ANY; + } + } } else { + /* Assign new default instance router-id */ + if (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) { + for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) { + if (bgp_vrf->evpn_info->advertise_pip && + (bgp_vrf->evpn_info->pip_ip_static.s_addr + == INADDR_ANY)) { + bgp_vrf->evpn_info->pip_ip = + bgp->router_id; + /* advertise type-5 routes with + * new nexthop + */ + update_advertise_vrf_routes(bgp_vrf); + } + } + } + /* advertise all routes in the vrf as type-5 routes with the new * RD */ @@ -5513,9 +5655,12 @@ static void link_l2vni_hash_to_l3vni(struct hash_bucket *bucket, bgpevpn_link_to_l3vni(vpn); } -int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, struct ethaddr *rmac, +int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, + struct ethaddr *svi_rmac, + struct ethaddr *vrr_rmac, struct in_addr originator_ip, int filter, - ifindex_t svi_ifindex) + ifindex_t svi_ifindex, + bool is_anycast_mac) { struct bgp *bgp_vrf = NULL; /* bgp VRF instance */ struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ @@ -5562,10 +5707,35 @@ int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, struct ethaddr *rmac, /* associate the vrf with l3vni and related parameters */ bgp_vrf->l3vni = l3vni; - memcpy(&bgp_vrf->rmac, rmac, sizeof(struct ethaddr)); bgp_vrf->originator_ip = originator_ip; bgp_vrf->l3vni_svi_ifindex = svi_ifindex; + bgp_vrf->evpn_info->is_anycast_mac = is_anycast_mac; + /* copy anycast MAC from VRR MAC */ + memcpy(&bgp_vrf->rmac, vrr_rmac, ETH_ALEN); + /* copy sys RMAC from SVI MAC */ + memcpy(&bgp_vrf->evpn_info->pip_rmac_zebra, svi_rmac, ETH_ALEN); + /* PIP user configured mac is not present use svi mac as sys mac */ + if (is_zero_mac(&bgp_vrf->evpn_info->pip_rmac_static)) + memcpy(&bgp_vrf->evpn_info->pip_rmac, svi_rmac, ETH_ALEN); + + if (bgp_debug_zebra(NULL)) { + char buf[ETHER_ADDR_STRLEN]; + char buf1[ETHER_ADDR_STRLEN]; + char buf2[ETHER_ADDR_STRLEN]; + + zlog_debug("VRF %s vni %u pip %s RMAC %s sys RMAC %s static RMAC %s is_anycast_mac %s", + vrf_id_to_name(bgp_vrf->vrf_id), + bgp_vrf->l3vni, + bgp_vrf->evpn_info->advertise_pip ? "enable" + : "disable", + prefix_mac2str(&bgp_vrf->rmac, buf, sizeof(buf)), + prefix_mac2str(&bgp_vrf->evpn_info->pip_rmac, + buf1, sizeof(buf1)), + prefix_mac2str(&bgp_vrf->evpn_info->pip_rmac_static, + buf2, sizeof(buf2)), + is_anycast_mac ? "Enable" : "Disable"); + } /* set the right filter - are we using l3vni only for prefix routes? */ if (filter) SET_FLAG(bgp_vrf->vrf_flags, BGP_VRF_L3VNI_PREFIX_ROUTES_ONLY); @@ -5646,6 +5816,10 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) /* remove the Rmac from the BGP vrf */ memset(&bgp_vrf->rmac, 0, sizeof(struct ethaddr)); + memset(&bgp_vrf->evpn_info->pip_rmac_zebra, 0, ETH_ALEN); + if (is_zero_mac(&bgp_vrf->evpn_info->pip_rmac_static) && + !is_zero_mac(&bgp_vrf->evpn_info->pip_rmac)) + memset(&bgp_vrf->evpn_info->pip_rmac, 0, ETH_ALEN); /* remove default import RT or Unmap non-default import RT */ if (!list_isempty(bgp_vrf->vrf_import_rtl)) { @@ -6005,6 +6179,15 @@ void bgp_evpn_init(struct bgp *bgp) bgp->evpn_info->dad_freeze_time = 0; /* Initialize zebra vxlan */ bgp_zebra_dup_addr_detection(bgp); + /* Enable PIP feature by default for bgp vrf instance */ + if (bgp->inst_type == BGP_INSTANCE_TYPE_VRF) { + struct bgp *bgp_default; + + bgp->evpn_info->advertise_pip = true; + bgp_default = bgp_get_default(); + if (bgp_default) + bgp->evpn_info->pip_ip = bgp_default->router_id; + } } /* Default BUM handling is to do head-end replication. */ diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 798c3e59bc..b030f0a33e 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -174,8 +174,9 @@ extern int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, uint8_t flags, uint32_t seq); extern int bgp_evpn_local_l3vni_add(vni_t vni, vrf_id_t vrf_id, struct ethaddr *rmac, + struct ethaddr *vrr_rmac, struct in_addr originator_ip, int filter, - ifindex_t svi_ifindex); + ifindex_t svi_ifindex, bool is_anycast_mac); extern int bgp_evpn_local_l3vni_del(vni_t vni, vrf_id_t vrf_id); extern int bgp_evpn_local_vni_del(struct bgp *bgp, vni_t vni); extern int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, @@ -192,5 +193,6 @@ extern void bgp_evpn_cleanup(struct bgp *bgp); extern void bgp_evpn_init(struct bgp *bgp); extern int bgp_evpn_get_type5_prefixlen(struct prefix *pfx); extern bool bgp_evpn_is_prefix_nht_supported(struct prefix *pfx); +extern void update_advertise_vrf_routes(struct bgp *bgp_vrf); #endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index f6bde2e9fa..76cf8b2cd6 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -188,6 +188,16 @@ struct bgp_evpn_info { /* EVPN enable - advertise svi macip routes */ int advertise_svi_macip; + /* PIP feature knob */ + bool advertise_pip; + /* PIP IP (sys ip) */ + struct in_addr pip_ip; + struct in_addr pip_ip_static; + /* PIP MAC (sys MAC) */ + struct ethaddr pip_rmac; + struct ethaddr pip_rmac_static; + struct ethaddr pip_rmac_zebra; + bool is_anycast_mac; }; static inline int is_vrf_rd_configured(struct bgp *bgp_vrf) @@ -501,6 +511,16 @@ static inline int is_es_local(struct evpnes *es) return CHECK_FLAG(es->flags, EVPNES_LOCAL) ? 1 : 0; } +static inline bool bgp_evpn_is_svi_macip_enabled(struct bgpevpn *vpn) +{ + struct bgp *bgp_evpn = NULL; + + bgp_evpn = bgp_get_evpn(); + + return (bgp_evpn->evpn_info->advertise_svi_macip || + vpn->advertise_svi_macip); +} + extern void bgp_evpn_install_uninstall_default_route(struct bgp *bgp_vrf, afi_t afi, safi_t safi, bool add); @@ -543,4 +563,5 @@ extern struct evpnes *bgp_evpn_es_new(struct bgp *bgp, esi_t *esi, struct ipaddr *originator_ip); extern void bgp_evpn_es_free(struct bgp *bgp, struct evpnes *es); extern bool bgp_evpn_lookup_l3vni_l2vni_table(vni_t vni); +extern int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn); #endif /* _BGP_EVPN_PRIVATE_H */ diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c index 3bc8345140..d316a28dcb 100644 --- a/bgpd/bgp_evpn_vty.c +++ b/bgpd/bgp_evpn_vty.c @@ -364,6 +364,7 @@ static void display_l3vni(struct vty *vty, struct bgp *bgp_vrf, struct ecommunity *ecom; json_object *json_import_rtl = NULL; json_object *json_export_rtl = NULL; + char buf2[ETHER_ADDR_STRLEN]; json_import_rtl = json_export_rtl = 0; @@ -382,6 +383,19 @@ static void display_l3vni(struct vty *vty, struct bgp *bgp_vrf, json_object_string_add(json, "advertiseSviMacip", "n/a"); json_object_to_json_string_ext(json, JSON_C_TO_STRING_NOSLASHESCAPE); + json_object_string_add(json, "advertisePip", + bgp_vrf->evpn_info->advertise_pip ? + "Enabled" : "Disabled"); + json_object_string_add(json, "sysIP", + inet_ntop(AF_INET, + &bgp_vrf->evpn_info->pip_ip, + buf1, INET_ADDRSTRLEN)); + json_object_string_add(json, "sysMac", + prefix_mac2str(&bgp_vrf->evpn_info->pip_rmac, + buf2, sizeof(buf2))); + json_object_string_add(json, "rmac", + prefix_mac2str(&bgp_vrf->rmac, + buf2, sizeof(buf2))); } else { vty_out(vty, "VNI: %d", bgp_vrf->l3vni); vty_out(vty, " (known to the kernel)"); @@ -396,6 +410,17 @@ static void display_l3vni(struct vty *vty, struct bgp *bgp_vrf, inet_ntoa(bgp_vrf->originator_ip)); vty_out(vty, " Advertise-gw-macip : %s\n", "n/a"); vty_out(vty, " Advertise-svi-macip : %s\n", "n/a"); + vty_out(vty, " Advertise-pip: %s\n", + bgp_vrf->evpn_info->advertise_pip ? "Yes" : "No"); + vty_out(vty, " System-IP: %s\n", + inet_ntop(AF_INET, &bgp_vrf->evpn_info->pip_ip, + buf1, INET_ADDRSTRLEN)); + vty_out(vty, " System-MAC: %s\n", + prefix_mac2str(&bgp_vrf->evpn_info->pip_rmac, + buf2, sizeof(buf2))); + vty_out(vty, " Router-MAC: %s\n", + prefix_mac2str(&bgp_vrf->rmac, + buf2, sizeof(buf2))); } if (!json) @@ -3650,6 +3675,139 @@ DEFUN (no_bgp_evpn_advertise_type5, return CMD_SUCCESS; } +DEFPY (bgp_evpn_advertise_pip_ip_mac, + bgp_evpn_advertise_pip_ip_mac_cmd, + "[no$no] advertise-pip [ip <A.B.C.D> [mac <X:X:X:X:X:X|X:X:X:X:X:X/M>]]", + NO_STR + "evpn system primary IP\n" + IP_STR + "ip address\n" + MAC_STR MAC_STR MAC_STR) +{ + struct bgp *bgp_vrf = VTY_GET_CONTEXT(bgp); /* bgp vrf instance */ + struct bgp *bgp_evpn = NULL; + + if (EVPN_ENABLED(bgp_vrf)) { + vty_out(vty, + "This command is supported under L3VNI BGP EVPN VRF\n"); + return CMD_WARNING_CONFIG_FAILED; + } + bgp_evpn = bgp_get_evpn(); + + if (!no) { + /* pip is already enabled */ + if (argc == 1 && bgp_vrf->evpn_info->advertise_pip) + return CMD_SUCCESS; + + bgp_vrf->evpn_info->advertise_pip = true; + if (ip.s_addr != INADDR_ANY) { + /* Already configured with same IP */ + if (IPV4_ADDR_SAME(&ip, + &bgp_vrf->evpn_info->pip_ip_static)) + return CMD_SUCCESS; + + bgp_vrf->evpn_info->pip_ip_static = ip; + bgp_vrf->evpn_info->pip_ip = ip; + } else { + bgp_vrf->evpn_info->pip_ip_static.s_addr + = INADDR_ANY; + /* default instance router-id assignemt */ + if (bgp_evpn) + bgp_vrf->evpn_info->pip_ip = + bgp_evpn->router_id; + } + /* parse sys mac */ + if (!is_zero_mac(&mac->eth_addr)) { + /* Already configured with same MAC */ + if (memcmp(&bgp_vrf->evpn_info->pip_rmac_static, + &mac->eth_addr, ETH_ALEN) == 0) + return CMD_SUCCESS; + + memcpy(&bgp_vrf->evpn_info->pip_rmac_static, + &mac->eth_addr, ETH_ALEN); + memcpy(&bgp_vrf->evpn_info->pip_rmac, + &bgp_vrf->evpn_info->pip_rmac_static, + ETH_ALEN); + } else { + /* Copy zebra sys mac */ + if (!is_zero_mac(&bgp_vrf->evpn_info->pip_rmac_zebra)) + memcpy(&bgp_vrf->evpn_info->pip_rmac, + &bgp_vrf->evpn_info->pip_rmac_zebra, + ETH_ALEN); + } + } else { + if (argc == 2) { + if (!bgp_vrf->evpn_info->advertise_pip) + return CMD_SUCCESS; + /* Disable PIP feature */ + bgp_vrf->evpn_info->advertise_pip = false; + /* copy anycast mac */ + memcpy(&bgp_vrf->evpn_info->pip_rmac, + &bgp_vrf->rmac, ETH_ALEN); + } else { + /* remove MAC-IP option retain PIP knob. */ + if ((ip.s_addr != INADDR_ANY) && + !IPV4_ADDR_SAME(&ip, + &bgp_vrf->evpn_info->pip_ip_static)) { + vty_out(vty, + "%% BGP EVPN PIP IP does not match\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + if (!is_zero_mac(&mac->eth_addr) && + memcmp(&bgp_vrf->evpn_info->pip_rmac_static, + &mac->eth_addr, ETH_ALEN) != 0) { + vty_out(vty, + "%% BGP EVPN PIP MAC does not match\n"); + return CMD_WARNING_CONFIG_FAILED; + } + /* pip_rmac can carry vrr_rmac reset only if it matches + * with static value. + */ + if (memcmp(&bgp_vrf->evpn_info->pip_rmac, + &bgp_vrf->evpn_info->pip_rmac_static, + ETH_ALEN) == 0) { + /* Copy zebra sys mac */ + if (!is_zero_mac( + &bgp_vrf->evpn_info->pip_rmac_zebra)) + memcpy(&bgp_vrf->evpn_info->pip_rmac, + &bgp_vrf->evpn_info->pip_rmac_zebra, + ETH_ALEN); + else { + /* copy anycast mac */ + memcpy(&bgp_vrf->evpn_info->pip_rmac, + &bgp_vrf->rmac, ETH_ALEN); + } + } + } + /* reset user configured sys MAC */ + memset(&bgp_vrf->evpn_info->pip_rmac_static, 0, ETH_ALEN); + /* reset user configured sys IP */ + bgp_vrf->evpn_info->pip_ip_static.s_addr = INADDR_ANY; + /* Assign default PIP IP (bgp instance router-id) */ + if (bgp_evpn) + bgp_vrf->evpn_info->pip_ip = bgp_evpn->router_id; + else + bgp_vrf->evpn_info->pip_ip.s_addr = INADDR_ANY; + } + + if (is_evpn_enabled()) { + struct listnode *node = NULL; + struct bgpevpn *vpn = NULL; + + update_advertise_vrf_routes(bgp_vrf); + + /* Update (svi) type-2 routes */ + for (ALL_LIST_ELEMENTS_RO(bgp_vrf->l2vnis, node, vpn)) { + if (!bgp_evpn_is_svi_macip_enabled(vpn)) + continue; + update_routes_for_vni(bgp_evpn, vpn); + } + } + + return CMD_SUCCESS; +} + /* * Display VNI information - for all or a specific VNI */ @@ -5383,6 +5541,7 @@ void bgp_config_write_evpn_info(struct vty *vty, struct bgp *bgp, afi_t afi, safi_t safi) { char buf1[RD_ADDRSTRLEN]; + char buf2[INET6_ADDRSTRLEN]; if (bgp->vnihash) { struct list *vnilist = hash_to_list(bgp->vnihash); @@ -5457,6 +5616,25 @@ void bgp_config_write_evpn_info(struct vty *vty, struct bgp *bgp, afi_t afi, BGP_L2VPN_EVPN_DEFAULT_ORIGINATE_IPV6)) vty_out(vty, " default-originate ipv6\n"); + if (bgp->inst_type == BGP_INSTANCE_TYPE_VRF) { + if (!bgp->evpn_info->advertise_pip) + vty_out(vty, " no advertise-pip\n"); + if (bgp->evpn_info->advertise_pip) { + if (bgp->evpn_info->pip_ip_static.s_addr != INADDR_ANY) + vty_out(vty, " advertise-pip ip %s", + inet_ntop(AF_INET, + &bgp->evpn_info->pip_ip_static, + buf2, INET_ADDRSTRLEN)); + if (!is_zero_mac(&(bgp->evpn_info->pip_rmac_static))) { + char buf[ETHER_ADDR_STRLEN]; + + vty_out(vty, " mac %s", + prefix_mac2str(&bgp->evpn_info->pip_rmac, + buf, sizeof(buf))); + } + vty_out(vty, "\n"); + } + } if (CHECK_FLAG(bgp->vrf_flags, BGP_VRF_RD_CFGD)) vty_out(vty, " rd %s\n", prefix_rd2str(&bgp->vrf_prd, buf1, sizeof(buf1))); @@ -5527,6 +5705,7 @@ void bgp_ethernetvpn_init(void) install_element(BGP_EVPN_NODE, &dup_addr_detection_auto_recovery_cmd); install_element(BGP_EVPN_NODE, &no_dup_addr_detection_cmd); install_element(BGP_EVPN_NODE, &bgp_evpn_flood_control_cmd); + install_element(BGP_EVPN_NODE, &bgp_evpn_advertise_pip_ip_mac_cmd); /* test commands */ install_element(BGP_EVPN_NODE, &test_adv_evpn_type4_route_cmd); diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 2e5b2e115c..1e58838144 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -101,7 +101,7 @@ static int bgp_peer_reg_with_nht(struct peer *peer) { int connected = 0; - if (peer->sort == BGP_PEER_EBGP && peer->ttl == 1 + if (peer->sort == BGP_PEER_EBGP && peer->ttl == BGP_DEFAULT_TTL && !CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) && !bgp_flag_check(peer->bgp, BGP_FLAG_DISABLE_NH_CONNECTED_CHK)) connected = 1; diff --git a/bgpd/bgp_label.h b/bgpd/bgp_label.h index 89bc9aabb0..523671468b 100644 --- a/bgpd/bgp_label.h +++ b/bgpd/bgp_label.h @@ -58,7 +58,7 @@ static inline int bgp_is_withdraw_label(mpls_label_t *label) /* The check on pkt[2] for 0x00 or 0x02 is in case bgp_set_valid_label() * was called on the withdraw label */ - if ((pkt[0] == 0x80) && (pkt[1] == 0x00) + if (((pkt[0] == 0x80) || (pkt[0] == 0x00)) && (pkt[1] == 0x00) && ((pkt[2] == 0x00) || (pkt[2] == 0x02))) return 1; return 0; diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index fb2eb10dd9..f1bd4c77d1 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -3454,7 +3454,8 @@ int bgp_update(struct peer *peer, struct prefix *p, uint32_t addpath_id, && (safi == SAFI_UNICAST || safi == SAFI_LABELED_UNICAST)) || (safi == SAFI_EVPN && bgp_evpn_is_prefix_nht_supported(p))) { - if (peer->sort == BGP_PEER_EBGP && peer->ttl == 1 + if (safi != SAFI_EVPN && peer->sort == BGP_PEER_EBGP + && peer->ttl == BGP_DEFAULT_TTL && !CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) && !bgp_flag_check( @@ -3596,7 +3597,8 @@ int bgp_update(struct peer *peer, struct prefix *p, uint32_t addpath_id, if (((afi == AFI_IP || afi == AFI_IP6) && (safi == SAFI_UNICAST || safi == SAFI_LABELED_UNICAST)) || (safi == SAFI_EVPN && bgp_evpn_is_prefix_nht_supported(p))) { - if (peer->sort == BGP_PEER_EBGP && peer->ttl == 1 + if (safi != SAFI_EVPN && peer->sort == BGP_PEER_EBGP + && peer->ttl == BGP_DEFAULT_TTL && !CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) && !bgp_flag_check(bgp, BGP_FLAG_DISABLE_NH_CONNECTED_CHK)) @@ -4481,7 +4483,7 @@ int bgp_nlri_parse_ip(struct peer *peer, struct attr *attr, if (addpath_encoded) { /* When packet overflow occurs return immediately. */ - if (pnt + BGP_ADDPATH_ID_LEN > lim) + if (pnt + BGP_ADDPATH_ID_LEN >= lim) return BGP_NLRI_PARSE_ERROR_PACKET_OVERFLOW; addpath_id = ntohl(*((uint32_t *)pnt)); @@ -12183,6 +12185,12 @@ DEFUN (bgp_damp_set, max = 4 * half; } + /* + * These can't be 0 but our SA doesn't understand the + * way our cli is constructed + */ + assert(reuse); + assert(half); if (suppress < reuse) { vty_out(vty, "Suppress value cannot be less than reuse value \n"); diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index a710873ea7..b9f3f3f762 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -114,6 +114,10 @@ struct bgp_path_info_extra { mpls_label_t label[BGP_MAX_LABELS]; uint32_t num_labels; + /* af specific flags */ + uint16_t af_flags; +#define BGP_EVPN_MACIP_TYPE_SVI_IP (1 << 0) + #if ENABLE_BGP_VNC union { diff --git a/bgpd/bgp_vpn.c b/bgpd/bgp_vpn.c index f922d066c3..b67b0c322e 100644 --- a/bgpd/bgp_vpn.c +++ b/bgpd/bgp_vpn.c @@ -85,9 +85,13 @@ int show_adj_route_vpn(struct vty *vty, struct peer *peer, if (table == NULL) continue; - + /* + * Initialize variables for each RD + * All prefixes under an RD is aggregated within "json_routes" + */ rd_header = 1; memset(rd_str, 0, sizeof(rd_str)); + json_routes = NULL; for (rm = bgp_table_top(table); rm; rm = bgp_route_next(rm)) { struct bgp_adj_out *adj = NULL; @@ -223,7 +227,7 @@ int show_adj_route_vpn(struct vty *vty, struct peer *peer, output_count++; } - if (use_json) + if (use_json && json_routes) json_object_object_add(json_adv, rd_str, json_routes); } diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index 3116e7cad0..57d481eafa 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -10896,7 +10896,7 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json, json_object_int_add(json_neigh, "externalBgpNbrMaxHopsAway", p->gtsm_hops); - else if (p->ttl > 1) + else if (p->ttl > BGP_DEFAULT_TTL) json_object_int_add(json_neigh, "externalBgpNbrMaxHopsAway", p->ttl); @@ -10905,7 +10905,7 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json, vty_out(vty, " External BGP neighbor may be up to %d hops away.\n", p->gtsm_hops); - else if (p->ttl > 1) + else if (p->ttl > BGP_DEFAULT_TTL) vty_out(vty, " External BGP neighbor may be up to %d hops away.\n", p->ttl); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index d0a732b153..e886733ced 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -278,12 +278,14 @@ static int bgp_ifp_down(struct interface *ifp) * 1-hop BFD * tracked (directly connected) IBGP peers. */ - if ((peer->ttl != 1) && (peer->gtsm_hops != 1) + if ((peer->ttl != BGP_DEFAULT_TTL) + && (peer->gtsm_hops != 1) && (!peer->bfd_info || bgp_bfd_is_peer_multihop(peer))) #else /* Take down directly connected EBGP peers */ - if ((peer->ttl != 1) && (peer->gtsm_hops != 1)) + if ((peer->ttl != BGP_DEFAULT_TTL) + && (peer->gtsm_hops != 1)) #endif continue; @@ -448,7 +450,8 @@ static int bgp_interface_vrf_update(ZAPI_CALLBACK_ARGS) /* Fast external-failover */ if (!CHECK_FLAG(bgp->flags, BGP_FLAG_NO_FAST_EXT_FAILOVER)) { for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) { - if ((peer->ttl != 1) && (peer->gtsm_hops != 1)) + if ((peer->ttl != BGP_DEFAULT_TTL) + && (peer->gtsm_hops != 1)) continue; if (ifp == peer->nexthop.ifp) @@ -1222,7 +1225,7 @@ void bgp_zebra_announce(struct bgp_node *rn, struct prefix *p, SET_FLAG(api.flags, ZEBRA_FLAG_ALLOW_RECURSION); } - if ((peer->sort == BGP_PEER_EBGP && peer->ttl != 1) + if ((peer->sort == BGP_PEER_EBGP && peer->ttl != BGP_DEFAULT_TTL) || CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) || bgp_flag_check(bgp, BGP_FLAG_DISABLE_NH_CONNECTED_CHK)) @@ -2469,30 +2472,37 @@ static int bgp_zebra_process_local_l3vni(ZAPI_CALLBACK_ARGS) int filter = 0; char buf[ETHER_ADDR_STRLEN]; vni_t l3vni = 0; - struct ethaddr rmac; + struct ethaddr svi_rmac, vrr_rmac = {.octet = {0} }; struct in_addr originator_ip; struct stream *s; ifindex_t svi_ifindex; + bool is_anycast_mac = false; + char buf1[ETHER_ADDR_STRLEN]; - memset(&rmac, 0, sizeof(struct ethaddr)); + memset(&svi_rmac, 0, sizeof(struct ethaddr)); memset(&originator_ip, 0, sizeof(struct in_addr)); s = zclient->ibuf; l3vni = stream_getl(s); if (cmd == ZEBRA_L3VNI_ADD) { - stream_get(&rmac, s, sizeof(struct ethaddr)); + stream_get(&svi_rmac, s, sizeof(struct ethaddr)); originator_ip.s_addr = stream_get_ipv4(s); stream_get(&filter, s, sizeof(int)); svi_ifindex = stream_getl(s); + stream_get(&vrr_rmac, s, sizeof(struct ethaddr)); + is_anycast_mac = stream_getl(s); if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("Rx L3-VNI ADD VRF %s VNI %u RMAC %s filter %s svi-if %u", + zlog_debug("Rx L3-VNI ADD VRF %s VNI %u RMAC svi-mac %s vrr-mac %s filter %s svi-if %u", vrf_id_to_name(vrf_id), l3vni, - prefix_mac2str(&rmac, buf, sizeof(buf)), + prefix_mac2str(&svi_rmac, buf, sizeof(buf)), + prefix_mac2str(&vrr_rmac, buf1, + sizeof(buf1)), filter ? "prefix-routes-only" : "none", svi_ifindex); - bgp_evpn_local_l3vni_add(l3vni, vrf_id, &rmac, originator_ip, - filter, svi_ifindex); + bgp_evpn_local_l3vni_add(l3vni, vrf_id, &svi_rmac, &vrr_rmac, + originator_ip, filter, svi_ifindex, + is_anycast_mac); } else { if (BGP_DEBUG(zebra, ZEBRA)) zlog_debug("Rx L3-VNI DEL VRF %s VNI %u", diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index d4e60b4093..a74923c302 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -6557,16 +6557,17 @@ int is_ebgp_multihop_configured(struct peer *peer) if (CHECK_FLAG(peer->sflags, PEER_STATUS_GROUP)) { group = peer->group; if ((peer_sort(peer) != BGP_PEER_IBGP) - && (group->conf->ttl != 1)) + && (group->conf->ttl != BGP_DEFAULT_TTL)) return 1; for (ALL_LIST_ELEMENTS(group->peer, node, nnode, peer1)) { if ((peer_sort(peer1) != BGP_PEER_IBGP) - && (peer1->ttl != 1)) + && (peer1->ttl != BGP_DEFAULT_TTL)) return 1; } } else { - if ((peer_sort(peer) != BGP_PEER_IBGP) && (peer->ttl != 1)) + if ((peer_sort(peer) != BGP_PEER_IBGP) + && (peer->ttl != BGP_DEFAULT_TTL)) return 1; } return 0; @@ -7109,7 +7110,7 @@ static void bgp_config_write_peer_global(struct vty *vty, struct bgp *bgp, vty_out(vty, " neighbor %s passive\n", addr); /* ebgp-multihop */ - if (peer->sort != BGP_PEER_IBGP && peer->ttl != 1 + if (peer->sort != BGP_PEER_IBGP && peer->ttl != BGP_DEFAULT_TTL && !(peer->gtsm_hops != 0 && peer->ttl == MAXTTL)) { if (!peer_group_active(peer) || g_peer->ttl != peer->ttl) { vty_out(vty, " neighbor %s ebgp-multihop %d\n", addr, diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst index 4b3113cf3b..0741b1d619 100644 --- a/doc/user/bgp.rst +++ b/doc/user/bgp.rst @@ -2056,6 +2056,61 @@ address-family: the VPN RIB as intermediary. +.. _bgp-evpn: + +Ethernet Virtual Network - EVPN +------------------------------- + +.. _bgp-evpn-advertise-pip: + +EVPN advertise-PIP +^^^^^^^^^^^^^^^^^^ + +In a EVPN symmetric routing MLAG deployment, all EVPN routes advertised +with anycast-IP as next-hop IP and anycast MAC as the Router MAC (RMAC - in +BGP EVPN Extended-Community). +EVPN picks up the next-hop IP from the VxLAN interface's local tunnel IP and +the RMAC is obtained from the MAC of the L3VNI's SVI interface. +Note: Next-hop IP is used for EVPN routes whether symmetric routing is +deployed or not but the RMAC is only relevant for symmetric routing scenario. + +Current behavior is not ideal for Prefix (type-5) and self (type-2) +routes. This is because the traffic from remote VTEPs routed sub optimally +if they land on the system where the route does not belong. + +The advertise-pip feature advertises Prefix (type-5) and self (type-2) +routes with system's individual (primary) IP as the next-hop and individual +(system) MAC as Router-MAC (RMAC), while leaving the behavior unchanged for +other EVPN routes. + +To support this feature there needs to have ability to co-exist a +(system-MAC, system-IP) pair with a (anycast-MAC, anycast-IP) pair with the +ability to terminate VxLAN-encapsulated packets received for either pair on +the same L3VNI (i.e associated VLAN). This capability is need per tenant +VRF instance. + +To derive the system-MAC and the anycast MAC, there needs to have a +separate/additional MAC-VLAN interface corresponding to L3VNI’s SVI. +The SVI interface’s MAC address can be interpreted as system-MAC +and MAC-VLAN interface's MAC as anycast MAC. + +To derive system-IP and anycast-IP, the default BGP instance's router-id is used +as system-IP and the VxLAN interface’s local tunnel IP as the anycast-IP. + +User has an option to configure the system-IP and/or system-MAC value if the +auto derived value is not preferred. + +Note: By default, advertise-pip feature is enabled and user has an option to +disable the feature via configuration CLI. Once the feature is disable under +bgp vrf instance or MAC-VLAN interface is not configured, all the routes follow +the same behavior of using same next-hop and RMAC values. + +.. index:: [no] advertise-pip [ip <addr> [mac <addr>]] +.. clicmd:: [no] advertise-pip [ip <addr> [mac <addr>]] + +Enables or disables advertise-pip feature, specifiy system-IP and/or system-MAC +parameters. + .. _bgp-cisco-compatibility: Cisco Compatibility diff --git a/doc/user/ipv6.rst b/doc/user/ipv6.rst index cc8fd18fee..f3f064b850 100644 --- a/doc/user/ipv6.rst +++ b/doc/user/ipv6.rst @@ -77,6 +77,20 @@ Router Advertisement Default: ``600000`` .. index:: + single: ipv6 nd ra-fast-retrans + single: no ipv6 nd ra-fast-retrans +.. clicmd:: [no] ipv6 nd ra-fast-retrans + + RFC4861 states that consecutive RA packets should be sent no more + frequently than three seconds apart. FRR by default allows faster + transmissions of RA packets in order to speed convergence and + neighbor establishment, particularly for unnumbered peering. By + turning off ipv6 nd ra-fast-retrans, the implementation is + compliant with the RFC at the cost of slower convergence + and neighbor establishment. + Default: enabled + +.. index:: single: ipv6 nd ra-lifetime (0-9000) single: no ipv6 nd ra-lifetime [(0-9000)] .. clicmd:: [no] ipv6 nd ra-lifetime [(0-9000)] diff --git a/doc/user/pbr.rst b/doc/user/pbr.rst index fab4343f50..68e460748c 100644 --- a/doc/user/pbr.rst +++ b/doc/user/pbr.rst @@ -107,6 +107,14 @@ end destination. Use this individual nexthop as the place to forward packets when the match commands have matched a packet. +.. clicmd:: set vrf unchanged|NAME + + If unchanged is set, the rule will use the vrf table the interface is in + as its lookup. If NAME is specified, the rule will use that vrf table as + its lookup. + + Not supported with NETNS VRF backend. + .. _pbr-policy: PBR Policy diff --git a/eigrpd/eigrp_dump.c b/eigrpd/eigrp_dump.c index 583db6622d..7278b002d8 100644 --- a/eigrpd/eigrp_dump.c +++ b/eigrpd/eigrp_dump.c @@ -144,55 +144,10 @@ void eigrp_header_dump(struct eigrp_header *eigrph) const char *eigrp_if_name_string(struct eigrp_interface *ei) { - static char buf[EIGRP_IF_STRING_MAXLEN] = ""; - - if (!ei) - return "inactive"; - - snprintf(buf, EIGRP_IF_STRING_MAXLEN, "%s", ei->ifp->name); - return buf; -} - -const char *eigrp_topology_ip_string(struct eigrp_prefix_entry *tn) -{ - static char buf[EIGRP_IF_STRING_MAXLEN] = ""; - uint32_t ifaddr; - - ifaddr = ntohl(tn->destination->u.prefix4.s_addr); - snprintf(buf, EIGRP_IF_STRING_MAXLEN, "%u.%u.%u.%u", - (ifaddr >> 24) & 0xff, (ifaddr >> 16) & 0xff, - (ifaddr >> 8) & 0xff, ifaddr & 0xff); - return buf; -} - - -const char *eigrp_if_ip_string(struct eigrp_interface *ei) -{ - static char buf[EIGRP_IF_STRING_MAXLEN] = ""; - uint32_t ifaddr; - if (!ei) return "inactive"; - ifaddr = ntohl(ei->address.u.prefix4.s_addr); - snprintf(buf, EIGRP_IF_STRING_MAXLEN, "%u.%u.%u.%u", - (ifaddr >> 24) & 0xff, (ifaddr >> 16) & 0xff, - (ifaddr >> 8) & 0xff, ifaddr & 0xff); - - return buf; -} - -const char *eigrp_neigh_ip_string(struct eigrp_neighbor *nbr) -{ - static char buf[EIGRP_IF_STRING_MAXLEN] = ""; - uint32_t ifaddr; - - ifaddr = ntohl(nbr->src.s_addr); - snprintf(buf, EIGRP_IF_STRING_MAXLEN, "%u.%u.%u.%u", - (ifaddr >> 24) & 0xff, (ifaddr >> 16) & 0xff, - (ifaddr >> 8) & 0xff, ifaddr & 0xff); - - return buf; + return ei->ifp->name; } void show_ip_eigrp_interface_header(struct vty *vty, struct eigrp *eigrp) @@ -209,7 +164,7 @@ void show_ip_eigrp_interface_header(struct vty *vty, struct eigrp *eigrp) void show_ip_eigrp_interface_sub(struct vty *vty, struct eigrp *eigrp, struct eigrp_interface *ei) { - vty_out(vty, "%-11s ", eigrp_if_name_string(ei)); + vty_out(vty, "%-11s ", IF_NAME(ei)); vty_out(vty, "%-11u", ei->params.bandwidth); vty_out(vty, "%-11u", ei->params.delay); vty_out(vty, "%-7u", ei->nbrs->count); @@ -250,7 +205,7 @@ void show_ip_eigrp_neighbor_sub(struct vty *vty, struct eigrp_neighbor *nbr, { vty_out(vty, "%-3u %-17s %-21s", 0, eigrp_neigh_ip_string(nbr), - eigrp_if_name_string(nbr->ei)); + IF_NAME(nbr->ei)); if (nbr->t_holddown) vty_out(vty, "%-7lu", thread_timer_remain_second(nbr->t_holddown)); @@ -313,11 +268,11 @@ void show_ip_eigrp_nexthop_entry(struct vty *vty, struct eigrp *eigrp, if (te->adv_router == eigrp->neighbor_self) vty_out(vty, "%-7s%s, %s\n", " ", "via Connected", - eigrp_if_name_string(te->ei)); + IF_NAME(te->ei)); else { vty_out(vty, "%-7s%s%s (%u/%u), %s\n", " ", "via ", inet_ntoa(te->adv_router->src), te->distance, - te->reported_distance, eigrp_if_name_string(te->ei)); + te->reported_distance, IF_NAME(te->ei)); } } diff --git a/eigrpd/eigrp_dump.h b/eigrpd/eigrp_dump.h index 34b55ab419..f141f3cbc6 100644 --- a/eigrpd/eigrp_dump.h +++ b/eigrpd/eigrp_dump.h @@ -138,9 +138,21 @@ extern unsigned long term_debug_eigrp_zebra; /* Prototypes. */ extern const char *eigrp_if_name_string(struct eigrp_interface *); -extern const char *eigrp_if_ip_string(struct eigrp_interface *); -extern const char *eigrp_neigh_ip_string(struct eigrp_neighbor *); -extern const char *eigrp_topology_ip_string(struct eigrp_prefix_entry *); +static inline const char +*eigrp_topology_ip_string(struct eigrp_prefix_entry *tn) +{ + return inet_ntoa(tn->destination->u.prefix4); +} + +static inline const char *eigrp_if_ip_string(struct eigrp_interface *ei) +{ + return ei ? inet_ntoa(ei->address.u.prefix4) : "inactive"; +} + +static inline const char *eigrp_neigh_ip_string(struct eigrp_neighbor *nbr) +{ + return inet_ntoa(nbr->src); +} extern void eigrp_ip_header_dump(struct ip *); extern void eigrp_header_dump(struct eigrp_header *); @@ -1677,3 +1677,13 @@ const struct frr_yang_module_info frr_interface_info = { }, } }; + +#if defined(__GNUC__) && ((__GNUC__ - 0) < 5) && !defined(__clang__) +/* gcc versions before 5.x miscalculate the size for structs with variable + * length arrays (they just count it as size 0) + * + * NB: the "." below means "current position", i.e. this line must be + * immediately after the frr_interface_info variable! + */ +__asm__(".size\tfrr_interface_info, .-frr_interface_info\n"); +#endif diff --git a/lib/mlag.c b/lib/mlag.c index acdc662924..1daf290725 100644 --- a/lib/mlag.c +++ b/lib/mlag.c @@ -39,3 +39,129 @@ char *mlag_role2str(enum mlag_role role, char *buf, size_t size) return buf; } + +char *mlag_lib_msgid_to_str(enum mlag_msg_type msg_type, char *buf, size_t size) +{ + switch (msg_type) { + case MLAG_REGISTER: + snprintf(buf, size, "Register"); + break; + case MLAG_DEREGISTER: + snprintf(buf, size, "De-Register"); + break; + case MLAG_MROUTE_ADD: + snprintf(buf, size, "Mroute add"); + break; + case MLAG_MROUTE_DEL: + snprintf(buf, size, "Mroute del"); + break; + case MLAG_DUMP: + snprintf(buf, size, "Mlag Replay"); + break; + case MLAG_MROUTE_ADD_BULK: + snprintf(buf, size, "Mroute Add Batch"); + break; + case MLAG_MROUTE_DEL_BULK: + snprintf(buf, size, "Mroute Del Batch"); + break; + case MLAG_STATUS_UPDATE: + snprintf(buf, size, "Mlag Status"); + break; + case MLAG_VXLAN_UPDATE: + snprintf(buf, size, "Mlag vxlan update"); + break; + case MLAG_PEER_FRR_STATUS: + snprintf(buf, size, "Mlag Peer FRR Status"); + break; + default: + snprintf(buf, size, "Unknown %d", msg_type); + break; + } + return buf; +} + + +int mlag_lib_decode_mlag_hdr(struct stream *s, struct mlag_msg *msg) +{ + if (s == NULL || msg == NULL) + return -1; + + STREAM_GETL(s, msg->msg_type); + STREAM_GETW(s, msg->data_len); + STREAM_GETW(s, msg->msg_cnt); + return 0; +stream_failure: + return -1; +} + +int mlag_lib_decode_mroute_add(struct stream *s, struct mlag_mroute_add *msg) +{ + if (s == NULL || msg == NULL) + return -1; + + STREAM_GET(msg->vrf_name, s, VRF_NAMSIZ); + STREAM_GETL(s, msg->source_ip); + STREAM_GETL(s, msg->group_ip); + STREAM_GETL(s, msg->cost_to_rp); + STREAM_GETL(s, msg->owner_id); + STREAM_GETC(s, msg->am_i_dr); + STREAM_GETC(s, msg->am_i_dual_active); + STREAM_GETL(s, msg->vrf_id); + STREAM_GET(msg->intf_name, s, INTERFACE_NAMSIZ); + return 0; +stream_failure: + return -1; +} + +int mlag_lib_decode_mroute_del(struct stream *s, struct mlag_mroute_del *msg) +{ + if (s == NULL || msg == NULL) + return -1; + + STREAM_GET(msg->vrf_name, s, VRF_NAMSIZ); + STREAM_GETL(s, msg->source_ip); + STREAM_GETL(s, msg->group_ip); + STREAM_GETL(s, msg->owner_id); + STREAM_GETL(s, msg->vrf_id); + STREAM_GET(msg->intf_name, s, INTERFACE_NAMSIZ); + return 0; +stream_failure: + return -1; +} + +int mlag_lib_decode_mlag_status(struct stream *s, struct mlag_status *msg) +{ + if (s == NULL || msg == NULL) + return -1; + + STREAM_GET(msg->peerlink_rif, s, INTERFACE_NAMSIZ); + STREAM_GETL(s, msg->my_role); + STREAM_GETL(s, msg->peer_state); + return 0; +stream_failure: + return -1; +} + +int mlag_lib_decode_vxlan_update(struct stream *s, struct mlag_vxlan *msg) +{ + if (s == NULL || msg == NULL) + return -1; + + STREAM_GETL(s, msg->anycast_ip); + STREAM_GETL(s, msg->local_ip); + return 0; + +stream_failure: + return -1; +} + +int mlag_lib_decode_frr_status(struct stream *s, struct mlag_frr_status *msg) +{ + if (s == NULL || msg == NULL) + return -1; + + STREAM_GETL(s, msg->frr_state); + return 0; +stream_failure: + return -1; +} diff --git a/lib/mlag.h b/lib/mlag.h index 2b904d44f4..c531fb5b68 100644 --- a/lib/mlag.h +++ b/lib/mlag.h @@ -26,14 +26,116 @@ extern "C" { #endif +#include "lib/if.h" +#include "lib/vrf.h" +#include "lib/stream.h" + +#define MLAG_MSG_NULL_PAYLOAD 0 +#define MLAG_MSG_NO_BATCH 1 +#define MLAG_BUF_LIMIT 2048 + enum mlag_role { MLAG_ROLE_NONE, MLAG_ROLE_PRIMARY, MLAG_ROLE_SECONDARY }; -extern char *mlag_role2str(enum mlag_role role, char *buf, size_t size); +enum mlag_state { + MLAG_STATE_DOWN, + MLAG_STATE_RUNNING, +}; + +enum mlag_frr_state { + MLAG_FRR_STATE_NONE, + MLAG_FRR_STATE_DOWN, + MLAG_FRR_STATE_UP, +}; + +enum mlag_owner { + MLAG_OWNER_NONE, + MLAG_OWNER_INTERFACE, + MLAG_OWNER_VXLAN, +}; + +/* + * This message definition should match mlag.proto + * Because message registration is based on this + */ +enum mlag_msg_type { + MLAG_MSG_NONE = 0, + MLAG_REGISTER = 1, + MLAG_DEREGISTER = 2, + MLAG_STATUS_UPDATE = 3, + MLAG_MROUTE_ADD = 4, + MLAG_MROUTE_DEL = 5, + MLAG_DUMP = 6, + MLAG_MROUTE_ADD_BULK = 7, + MLAG_MROUTE_DEL_BULK = 8, + MLAG_PIM_CFG_DUMP = 10, + MLAG_VXLAN_UPDATE = 11, + MLAG_PEER_FRR_STATUS = 12, +}; + +struct mlag_frr_status { + enum mlag_frr_state frr_state; +}; +struct mlag_status { + char peerlink_rif[INTERFACE_NAMSIZ]; + enum mlag_role my_role; + enum mlag_state peer_state; +}; + +#define MLAG_ROLE_STRSIZE 16 + +struct mlag_vxlan { + uint32_t anycast_ip; + uint32_t local_ip; +}; + +struct mlag_mroute_add { + char vrf_name[VRF_NAMSIZ]; + uint32_t source_ip; + uint32_t group_ip; + uint32_t cost_to_rp; + enum mlag_owner owner_id; + bool am_i_dr; + bool am_i_dual_active; + vrf_id_t vrf_id; + char intf_name[INTERFACE_NAMSIZ]; +}; + +struct mlag_mroute_del { + char vrf_name[VRF_NAMSIZ]; + uint32_t source_ip; + uint32_t group_ip; + enum mlag_owner owner_id; + vrf_id_t vrf_id; + char intf_name[INTERFACE_NAMSIZ]; +}; + +struct mlag_msg { + enum mlag_msg_type msg_type; + uint16_t data_len; + uint16_t msg_cnt; + uint8_t data[0]; +} __attribute__((packed)); + + +extern char *mlag_role2str(enum mlag_role role, char *buf, size_t size); +extern char *mlag_lib_msgid_to_str(enum mlag_msg_type msg_type, char *buf, + size_t size); +extern int mlag_lib_decode_mlag_hdr(struct stream *s, struct mlag_msg *msg); +extern int mlag_lib_decode_mroute_add(struct stream *s, + struct mlag_mroute_add *msg); +extern int mlag_lib_decode_mroute_del(struct stream *s, + struct mlag_mroute_del *msg); +extern int mlag_lib_decode_mlag_status(struct stream *s, + struct mlag_status *msg); +extern int mlag_lib_decode_vxlan_update(struct stream *s, + struct mlag_vxlan *msg); +extern int mlag_lib_decode_frr_status(struct stream *s, + struct mlag_frr_status *msg); #ifdef __cplusplus } #endif diff --git a/lib/zclient.c b/lib/zclient.c index a135d18744..7a62e408ea 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -2721,6 +2721,57 @@ stream_failure: return; } +void zclient_send_mlag_register(struct zclient *client, uint32_t bit_map) +{ + struct stream *s; + + s = client->obuf; + stream_reset(s); + + zclient_create_header(s, ZEBRA_MLAG_CLIENT_REGISTER, VRF_DEFAULT); + stream_putl(s, bit_map); + + stream_putw_at(s, 0, stream_get_endp(s)); + zclient_send_message(client); +} + +void zclient_send_mlag_deregister(struct zclient *client) +{ + zebra_message_send(client, ZEBRA_MLAG_CLIENT_UNREGISTER, VRF_DEFAULT); +} + +void zclient_send_mlag_data(struct zclient *client, struct stream *client_s) +{ + struct stream *s; + + s = client->obuf; + stream_reset(s); + + zclient_create_header(s, ZEBRA_MLAG_FORWARD_MSG, VRF_DEFAULT); + stream_put(s, client_s->data, client_s->endp); + + stream_putw_at(s, 0, stream_get_endp(s)); + zclient_send_message(client); +} + +static void zclient_mlag_process_up(ZAPI_CALLBACK_ARGS) +{ + if (zclient->mlag_process_up) + (*zclient->mlag_process_up)(); +} + +static void zclient_mlag_process_down(ZAPI_CALLBACK_ARGS) +{ + if (zclient->mlag_process_down) + (*zclient->mlag_process_down)(); +} + +static void zclient_mlag_handle_msg(ZAPI_CALLBACK_ARGS) +{ + if (zclient->mlag_handle_msg) + (*zclient->mlag_handle_msg)(zclient->ibuf, length); +} + /* Zebra client message read function. */ static int zclient_read(struct thread *thread) { @@ -3015,6 +3066,15 @@ static int zclient_read(struct thread *thread) (*zclient->vxlan_sg_del)(command, zclient, length, vrf_id); break; + case ZEBRA_MLAG_PROCESS_UP: + zclient_mlag_process_up(command, zclient, length, vrf_id); + break; + case ZEBRA_MLAG_PROCESS_DOWN: + zclient_mlag_process_down(command, zclient, length, vrf_id); + break; + case ZEBRA_MLAG_FORWARD_MSG: + zclient_mlag_handle_msg(command, zclient, length, vrf_id); + break; default: break; } diff --git a/lib/zclient.h b/lib/zclient.h index 2131d4d47a..7adb294a31 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -178,6 +178,11 @@ typedef enum { ZEBRA_VXLAN_SG_ADD, ZEBRA_VXLAN_SG_DEL, ZEBRA_VXLAN_SG_REPLAY, + ZEBRA_MLAG_PROCESS_UP, + ZEBRA_MLAG_PROCESS_DOWN, + ZEBRA_MLAG_CLIENT_REGISTER, + ZEBRA_MLAG_CLIENT_UNREGISTER, + ZEBRA_MLAG_FORWARD_MSG, } zebra_message_types_t; struct redist_proto { @@ -272,6 +277,9 @@ struct zclient { int (*iptable_notify_owner)(ZAPI_CALLBACK_ARGS); int (*vxlan_sg_add)(ZAPI_CALLBACK_ARGS); int (*vxlan_sg_del)(ZAPI_CALLBACK_ARGS); + int (*mlag_process_up)(void); + int (*mlag_process_down)(void); + int (*mlag_handle_msg)(struct stream *msg, int len); }; /* Zebra API message flag. */ @@ -482,6 +490,7 @@ enum zapi_iptable_notify_owner { #define ZEBRA_MACIP_TYPE_GW 0x02 /* gateway (SVI) mac*/ #define ZEBRA_MACIP_TYPE_ROUTER_FLAG 0x04 /* Router Flag - proxy NA */ #define ZEBRA_MACIP_TYPE_OVERRIDE_FLAG 0x08 /* Override Flag */ +#define ZEBRA_MACIP_TYPE_SVI_IP 0x10 /* SVI MAC-IP */ enum zebra_neigh_state { ZEBRA_NEIGH_INACTIVE = 0, ZEBRA_NEIGH_ACTIVE = 1 }; @@ -694,5 +703,11 @@ static inline void zapi_route_set_blackhole(struct zapi_route *api, SET_FLAG(api->message, ZAPI_MESSAGE_NEXTHOP); }; +extern void zclient_send_mlag_register(struct zclient *client, + uint32_t bit_map); +extern void zclient_send_mlag_deregister(struct zclient *client); + +extern void zclient_send_mlag_data(struct zclient *client, + struct stream *client_s); #endif /* _ZEBRA_ZCLIENT_H */ diff --git a/mlag/mlag.proto b/mlag/mlag.proto new file mode 100644 index 0000000000..1e302151f8 --- /dev/null +++ b/mlag/mlag.proto @@ -0,0 +1,186 @@ +// See README.txt for information and build instructions. +// +// Note: START and END tags are used in comments to define sections used in +// tutorials. They are not part of the syntax for Protocol Buffers. +// +// To get an in-depth walkthrough of this file and the related examples, see: +// https://developers.google.com/protocol-buffers/docs/tutorials + +// [START declaration] +syntax = "proto3"; +//package tutorial; + +/* + * This Contains the Message structures used for PIM MLAG Active-Active support. + * Mainly there were two types of messages + * + * 1. Messages sent from PIM (Node-1) to PIM (Node-2) + * 2. Messages sent from CLAG to PIM (status Messages) + * + * ProtoBuf supports maximum 32 fields, so to make it more generic message + * encoding is like below. + * __________________________________________ + * | | | + * | Header | bytes | + * ___________________________________________ + * + * + * Header carries Information about + * 1) what Message it is carrying + * 2) Bytes carries the actual payload encoded with protobuf + * + * + * Limitations + *============= + * Since message-type is 32-bit, there were no real limitations on number of + * messages Infra can support, but each message can carry only 32 fields. + * + */ + + +// [START messages] +message ZebraMlag_Header { + enum MessageType { + ZEBRA_MLAG_NONE = 0; //Invalid message-type + ZEBRA_MLAG_REGISTER = 1; + ZEBRA_MLAG_DEREGISTER = 2; + ZEBRA_MLAG_STATUS_UPDATE = 3; + ZEBRA_MLAG_MROUTE_ADD = 4; + ZEBRA_MLAG_MROUTE_DEL = 5; + ZEBRA_MLAG_DUMP = 6; + ZEBRA_MLAG_MROUTE_ADD_BULK = 7; + ZEBRA_MLAG_MROUTE_DEL_BULK = 8; + ZEBRA_MLAG_PIM_CFG_DUMP = 10; + ZEBRA_MLAG_VXLAN_UPDATE = 11; + ZEBRA_MLAG_ZEBRA_STATUS_UPDATE = 12; + } + + /* + * tells what type of message this payload carries + */ + MessageType type = 1; + + /* + * Length of payload + */ + uint32 len = 2; + + /* + * Actual Encoded payload + */ + bytes data = 3; +} + + +/* + * ZEBRA_MLAG_REGISTER & ZEBRA_MLAG_DEREGISTER + * + * After the MLAGD is up, First Zebra has to register to send any data, + * otherwise MLAGD will not accept any data from the client. + * De-register will be used for the Data cleanup at MLAGD + * These are NULL payload message currently + */ + +/* + * ZEBRA_MLAG_STATUS_UPDATE + * + * This message will be posted by CLAGD(an external control plane manager + * which monitors CLAG failures) to inform peerlink/CLAG Failure + * to zebra, after the failure Notification Node with primary role will + * forward the Traffic and Node with standby will drop the traffic + */ + +message ZebraMlagStatusUpdate { + enum ClagState { + CLAG_STATE_DOWN = 0; + CLAG_STATE_RUNNING = 1; + } + + enum ClagRole { + CLAG_ROLE_NONE = 0; + CLAG_ROLE_PRIMAY = 1; + CLAG_ROLE_SECONDARY = 2; + } + + string peerlink = 1; + ClagRole my_role = 2; + ClagState peer_state = 3; +} + +/* + * ZEBRA_MLAG_VXLAN_UPDATE + * + * This message will be posted by CLAGD(an external control plane Manager + * which is responsible for MCLAG) to inform zebra obout anycast/local + * ip updates. + */ +message ZebraMlagVxlanUpdate { + uint32 anycast_ip = 1; + uint32 local_ip = 2; +} + +/* + * ZebraMlagZebraStatusUpdate + * + * This message will be posted by CLAGD to advertise FRR state + * Change Information to peer + */ + +message ZebraMlagZebraStatusUpdate{ + enum FrrState { + FRR_STATE_NONE = 0; + FRR_STATE_DOWN = 1; + FRR_STATE_UP = 2; + } + + FrrState peer_frrstate = 1; +} + +/* + * ZEBRA_MLAG_MROUTE_ADD & ZEBRA_MLAG_MROUTE_DEL + * + * These messages will be sent from PIM (Node-1) to PIM (Node-2) to perform + * DF Election for each Mcast flow. Elected DF will forward the traffic + * towards the host and loser will keep the OIL as empty, so that only single + * copy will be sent to host + * This message will be posted with any change in the params. + * + * ZEBRA_MLAG_MROUTE_DEL is mainly to delete the record at MLAGD when the + * mcast flow is deleted. + * key for the MLAGD lookup is (vrf_id, source_ip & group_ip) + */ + +message ZebraMlagMrouteAdd { + string vrf_name = 1; + uint32 source_ip = 2; + uint32 group_ip = 3; + /* + * This is the IGP Cost to reach Configured RP in case of (*,G) or + * Cost to the source in case of (S,G) entry + */ + uint32 cost_to_rp = 4; + uint32 owner_id = 5; + bool am_i_DR = 6; + bool am_i_Dual_active = 7; + uint32 vrf_id = 8; + string intf_name = 9; +} + +message ZebraMlagMrouteDel { + string vrf_name = 1; + uint32 source_ip = 2; + uint32 group_ip = 3; + uint32 owner_id = 4; + uint32 vrf_id = 5; + string intf_name = 6; +} + +message ZebraMlagMrouteAddBulk { + repeated ZebraMlagMrouteAdd mroute_add = 1; +} + +message ZebraMlagMrouteDelBulk { + repeated ZebraMlagMrouteDel mroute_del = 1; +} + +// [END messages] diff --git a/mlag/subdir.am b/mlag/subdir.am new file mode 100644 index 0000000000..9fab662860 --- /dev/null +++ b/mlag/subdir.am @@ -0,0 +1,19 @@ +if HAVE_PROTOBUF +lib_LTLIBRARIES += mlag/libmlag_pb.la +endif + +mlag_libmlag_pb_la_LDFLAGS = -version-info 0:0:0 +mlag_libmlag_pb_la_CPPFLAGS = $(AM_CPPFLAGS) $(PROTOBUF_C_CFLAGS) +mlag_libmlag_pb_la_SOURCES = \ + # end + +nodist_mlag_libmlag_pb_la_SOURCES = \ + mlag/mlag.pb-c.c \ + # end + +CLEANFILES += \ + mlag/mlag.pb-c.c \ + mlag/mlag.pb-c.h \ + # end + +EXTRA_DIST += mlag/mlag.proto diff --git a/ospfd/ospf_apiserver.c b/ospfd/ospf_apiserver.c index d6f1fba28b..bd703bc89d 100644 --- a/ospfd/ospf_apiserver.c +++ b/ospfd/ospf_apiserver.c @@ -2330,9 +2330,7 @@ void ospf_apiserver_clients_notify_nsm_change(struct ospf_neighbor *nbr) assert(nbr); - if (nbr->oi) { - ifaddr = nbr->oi->address->u.prefix4; - } + ifaddr = nbr->oi->address->u.prefix4; nbraddr = nbr->address.u.prefix4; diff --git a/ospfd/ospf_flood.c b/ospfd/ospf_flood.c index 381fb6820f..c29b464cab 100644 --- a/ospfd/ospf_flood.c +++ b/ospfd/ospf_flood.c @@ -328,8 +328,7 @@ int ospf_flood(struct ospf *ospf, struct ospf_neighbor *nbr, ospf_ls_retransmit_delete_nbr_as(ospf, current); break; default: - ospf_ls_retransmit_delete_nbr_area(nbr->oi->area, - current); + ospf_ls_retransmit_delete_nbr_area(oi->area, current); break; } } @@ -345,7 +344,7 @@ int ospf_flood(struct ospf *ospf, struct ospf_neighbor *nbr, procedure cannot overwrite the newly installed LSA until MinLSArrival seconds have elapsed. */ - if (!(new = ospf_lsa_install(ospf, nbr->oi, new))) + if (!(new = ospf_lsa_install(ospf, oi, new))) return -1; /* unknown LSA type or any other error condition */ /* Acknowledge the receipt of the LSA by sending a Link State diff --git a/ospfd/ospf_neighbor.c b/ospfd/ospf_neighbor.c index a9247dd0ec..46dfc505ef 100644 --- a/ospfd/ospf_neighbor.c +++ b/ospfd/ospf_neighbor.c @@ -141,6 +141,8 @@ void ospf_nbr_free(struct ospf_neighbor *nbr) thread_cancel_event(master, nbr); ospf_bfd_info_free(&nbr->bfd_info); + + nbr->oi = NULL; XFREE(MTYPE_OSPF_NEIGHBOR, nbr); } @@ -446,7 +448,7 @@ static struct ospf_neighbor *ospf_nbr_add(struct ospf_interface *oi, nbr->crypt_seqnum = ospfh->u.crypt.crypt_seqnum; if (IS_DEBUG_OSPF_EVENT) - zlog_debug("NSM[%s:%s]: start", IF_NAME(nbr->oi), + zlog_debug("NSM[%s:%s]: start", IF_NAME(oi), inet_ntoa(nbr->router_id)); return nbr; diff --git a/ospfd/ospf_nsm.c b/ospfd/ospf_nsm.c index 110738802c..9f6be3cbc7 100644 --- a/ospfd/ospf_nsm.c +++ b/ospfd/ospf_nsm.c @@ -224,7 +224,7 @@ static int ospf_db_summary_add(struct ospf_neighbor *nbr, struct ospf_lsa *lsa) case OSPF_OPAQUE_LINK_LSA: /* Exclude type-9 LSAs that does not have the same "oi" with * "nbr". */ - if (nbr->oi && ospf_if_exists(lsa->oi) != nbr->oi) + if (ospf_if_exists(lsa->oi) != nbr->oi) return 0; break; case OSPF_OPAQUE_AREA_LSA: diff --git a/ospfd/ospf_packet.c b/ospfd/ospf_packet.c index 8634589b11..80ffc3f361 100644 --- a/ospfd/ospf_packet.c +++ b/ospfd/ospf_packet.c @@ -3746,8 +3746,6 @@ int ospf_hello_reply_timer(struct thread *thread) nbr = THREAD_ARG(thread); nbr->t_hello_reply = NULL; - assert(nbr->oi); - if (IS_DEBUG_OSPF(nsm, NSM_TIMERS)) zlog_debug("NSM[%s:%s]: Timer (hello-reply timer expire)", IF_NAME(nbr->oi), inet_ntoa(nbr->router_id)); @@ -4335,7 +4333,7 @@ void ospf_proactively_arp(struct ospf_neighbor *nbr) char ping_nbr[OSPF_PING_NBR_STR_MAX]; int ret; - if (!nbr || !nbr->oi || !nbr->oi->ifp) + if (!nbr) return; snprintf(ping_nbr, sizeof(ping_nbr), diff --git a/ospfd/ospf_snmp.c b/ospfd/ospf_snmp.c index c26545344a..da3bc6f581 100644 --- a/ospfd/ospf_snmp.c +++ b/ospfd/ospf_snmp.c @@ -2257,8 +2257,6 @@ static uint8_t *ospfNbrEntry(struct variable *v, oid *name, size_t *length, if (!nbr) return NULL; oi = nbr->oi; - if (!oi) - return NULL; /* Return the current value of the variable */ switch (v->magic) { diff --git a/pbrd/pbr_main.c b/pbrd/pbr_main.c index bb92703ae4..faa3de42f2 100644 --- a/pbrd/pbr_main.c +++ b/pbrd/pbr_main.c @@ -48,6 +48,7 @@ #include "pbr_zebra.h" #include "pbr_vty.h" #include "pbr_debug.h" +#include "pbr_vrf.h" zebra_capabilities_t _caps_p[] = { ZCAP_NET_RAW, ZCAP_BIND, ZCAP_NET_ADMIN, @@ -153,7 +154,6 @@ int main(int argc, char **argv, char **envp) pbr_debug_init(); - vrf_init(NULL, NULL, NULL, NULL, NULL); nexthop_group_init(pbr_nhgroup_add_cb, pbr_nhgroup_add_nexthop_cb, pbr_nhgroup_del_nexthop_cb, @@ -169,6 +169,7 @@ int main(int argc, char **argv, char **envp) if_zapi_callbacks(pbr_ifp_create, pbr_ifp_up, pbr_ifp_down, pbr_ifp_destroy); pbr_zebra_init(); + pbr_vrf_init(); pbr_vty_init(); frr_config_fork(); diff --git a/pbrd/pbr_map.c b/pbrd/pbr_map.c index 1a8461c6c1..4df0c790b1 100644 --- a/pbrd/pbr_map.c +++ b/pbrd/pbr_map.c @@ -35,6 +35,7 @@ #include "pbr_zebra.h" #include "pbr_memory.h" #include "pbr_debug.h" +#include "pbr_vrf.h" DEFINE_MTYPE_STATIC(PBRD, PBR_MAP, "PBR Map") DEFINE_MTYPE_STATIC(PBRD, PBR_MAP_SEQNO, "PBR Map Sequence") @@ -42,6 +43,7 @@ DEFINE_MTYPE_STATIC(PBRD, PBR_MAP_INTERFACE, "PBR Map Interface") static uint32_t pbr_map_sequence_unique; +static bool pbr_map_check_valid_internal(struct pbr_map *pbrm); static inline int pbr_map_compare(const struct pbr_map *pbrmap1, const struct pbr_map *pbrmap2); @@ -98,9 +100,55 @@ static void pbr_map_interface_list_delete(struct pbr_map_interface *pmi) } } +static bool pbr_map_interface_is_valid(const struct pbr_map_interface *pmi) +{ + /* Don't install rules without a real ifindex on the incoming interface. + * + * This can happen when we have config for an interface that does not + * exist or when an interface is changing vrfs. + */ + if (pmi->ifp && pmi->ifp->ifindex != IFINDEX_INTERNAL) + return true; + + return false; +} + +static void pbr_map_pbrms_update_common(struct pbr_map_sequence *pbrms, + bool install) +{ + struct pbr_map *pbrm; + struct listnode *node; + struct pbr_map_interface *pmi; + + pbrm = pbrms->parent; + + if (pbrms->nhs_installed && pbrm->incoming->count) { + for (ALL_LIST_ELEMENTS_RO(pbrm->incoming, node, pmi)) { + if (!pmi->ifp) + continue; + + if (install && !pbr_map_interface_is_valid(pmi)) + continue; + + pbr_send_pbr_map(pbrms, pmi, install); + } + } +} + +static void pbr_map_pbrms_install(struct pbr_map_sequence *pbrms) +{ + pbr_map_pbrms_update_common(pbrms, true); +} + +static void pbr_map_pbrms_uninstall(struct pbr_map_sequence *pbrms) +{ + pbr_map_pbrms_update_common(pbrms, false); +} + static const char *pbr_map_reason_str[] = { "Invalid NH-group", "Invalid NH", "No Nexthops", - "Both NH and NH-Group", "Invalid Src or Dst", "Deleting Sequence", + "Both NH and NH-Group", "Invalid Src or Dst", "Invalid VRF", + "Deleting Sequence", }; void pbr_map_reason_string(unsigned int reason, char *buf, int size) @@ -168,6 +216,93 @@ void pbr_map_add_interface(struct pbr_map *pbrm, struct interface *ifp_add) pbr_map_install(pbrm); } +static int +pbr_map_policy_interface_update_common(const struct interface *ifp, + struct pbr_interface **pbr_ifp, + struct pbr_map **pbrm) +{ + if (!ifp->info) { + DEBUGD(&pbr_dbg_map, "%s: %s has no pbr_interface info", + __func__, ifp->name); + return -1; + } + + *pbr_ifp = ifp->info; + + *pbrm = pbrm_find((*pbr_ifp)->mapname); + + if (!*pbrm) { + DEBUGD(&pbr_dbg_map, "%s: applied PBR-MAP(%s) does not exist?", + __func__, (*pbr_ifp)->mapname); + return -1; + } + + return 0; +} + +void pbr_map_policy_interface_update(const struct interface *ifp, bool state_up) +{ + struct pbr_interface *pbr_ifp; + struct pbr_map_sequence *pbrms; + struct pbr_map *pbrm; + struct listnode *node, *inode; + struct pbr_map_interface *pmi; + + if (pbr_map_policy_interface_update_common(ifp, &pbr_ifp, &pbrm)) + return; + + DEBUGD(&pbr_dbg_map, "%s: %s %s rules on interface %s", __func__, + pbr_ifp->mapname, (state_up ? "installing" : "removing"), + ifp->name); + + /* + * Walk the list and install/remove maps on the interface. + */ + for (ALL_LIST_ELEMENTS_RO(pbrm->seqnumbers, node, pbrms)) + for (ALL_LIST_ELEMENTS_RO(pbrm->incoming, inode, pmi)) + if (pmi->ifp == ifp && pbr_map_interface_is_valid(pmi)) + pbr_send_pbr_map(pbrms, pmi, state_up); +} + +static void pbrms_vrf_update(struct pbr_map_sequence *pbrms, + const struct pbr_vrf *pbr_vrf) +{ + const char *vrf_name = pbr_vrf_name(pbr_vrf); + + if (pbrms->vrf_lookup + && (strncmp(vrf_name, pbrms->vrf_name, sizeof(pbrms->vrf_name)) + == 0)) { + DEBUGD(&pbr_dbg_map, "\tSeq %u uses vrf %s (%u), updating map", + pbrms->seqno, vrf_name, pbr_vrf_id(pbr_vrf)); + + pbr_map_check(pbrms); + } +} + +/* Vrf enabled/disabled */ +void pbr_map_vrf_update(const struct pbr_vrf *pbr_vrf) +{ + struct pbr_map *pbrm; + struct pbr_map_sequence *pbrms; + struct listnode *node; + + if (!pbr_vrf) + return; + + bool enabled = pbr_vrf_is_enabled(pbr_vrf); + + DEBUGD(&pbr_dbg_map, "%s: %s (%u) %s, updating pbr maps", __func__, + pbr_vrf_name(pbr_vrf), pbr_vrf_id(pbr_vrf), + enabled ? "enabled" : "disabled"); + + RB_FOREACH (pbrm, pbr_map_entry_head, &pbr_maps) { + DEBUGD(&pbr_dbg_map, "%s: Looking at %s", __PRETTY_FUNCTION__, + pbrm->name); + for (ALL_LIST_ELEMENTS_RO(pbrm->seqnumbers, node, pbrms)) + pbrms_vrf_update(pbrms, pbr_vrf); + } +} + void pbr_map_write_interfaces(struct vty *vty, struct interface *ifp) { struct pbr_interface *pbr_ifp = ifp->info; @@ -210,16 +345,11 @@ extern void pbr_map_delete(struct pbr_map_sequence *pbrms) } } -void pbr_map_delete_nexthop_group(struct pbr_map_sequence *pbrms) +static void pbr_map_delete_common(struct pbr_map_sequence *pbrms) { struct pbr_map *pbrm = pbrms->parent; - struct listnode *node; - struct pbr_map_interface *pmi; - if (pbrm->valid && pbrms->nhs_installed && pbrm->incoming->count) { - for (ALL_LIST_ELEMENTS_RO(pbrm->incoming, node, pmi)) - pbr_send_pbr_map(pbrms, pmi, false); - } + pbr_map_pbrms_uninstall(pbrms); pbrm->valid = false; pbrms->nhs_installed = false; @@ -227,6 +357,16 @@ void pbr_map_delete_nexthop_group(struct pbr_map_sequence *pbrms) pbrms->nhgrp_name = NULL; } +void pbr_map_delete_nexthops(struct pbr_map_sequence *pbrms) +{ + pbr_map_delete_common(pbrms); +} + +void pbr_map_delete_vrf(struct pbr_map_sequence *pbrms) +{ + pbr_map_delete_common(pbrms); +} + struct pbr_map_sequence *pbrms_lookup_unique(uint32_t unique, ifindex_t ifindex, struct pbr_map_interface **ppmi) { @@ -318,6 +458,7 @@ struct pbr_map_sequence *pbrms_get(const char *name, uint32_t seqno) pbrms->reason = PBR_MAP_INVALID_EMPTY | PBR_MAP_INVALID_NO_NEXTHOPS; + pbrms->vrf_name[0] = '\0'; QOBJ_REG(pbrms, pbr_map_sequence); listnode_add_sort(pbrm->seqnumbers, pbrms); @@ -329,12 +470,36 @@ struct pbr_map_sequence *pbrms_get(const char *name, uint32_t seqno) static void pbr_map_sequence_check_nexthops_valid(struct pbr_map_sequence *pbrms) { + /* Check if any are present first */ + if (!pbrms->vrf_unchanged && !pbrms->vrf_lookup && !pbrms->nhg + && !pbrms->nhgrp_name) { + pbrms->reason |= PBR_MAP_INVALID_NO_NEXTHOPS; + return; + } + + /* + * Check validness of vrf. + */ + + /* This one can be considered always valid */ + if (pbrms->vrf_unchanged) + pbrms->nhs_installed = true; + + if (pbrms->vrf_lookup) { + struct pbr_vrf *pbr_vrf = + pbr_vrf_lookup_by_name(pbrms->vrf_name); + + if (pbr_vrf && pbr_vrf_is_valid(pbr_vrf)) + pbrms->nhs_installed = true; + else + pbrms->reason |= PBR_MAP_INVALID_VRF; + } + /* * Check validness of the nexthop or nexthop-group */ - if (!pbrms->nhg && !pbrms->nhgrp_name) - pbrms->reason |= PBR_MAP_INVALID_NO_NEXTHOPS; + /* Only nexthop or nexthop group allowed */ if (pbrms->nhg && pbrms->nhgrp_name) pbrms->reason |= PBR_MAP_INVALID_BOTH_NHANDGRP; @@ -458,11 +623,13 @@ void pbr_map_policy_install(const char *name) __PRETTY_FUNCTION__, name, pbrms->seqno, pbrm->valid, pbrms->nhs_installed); - if (pbrm->valid && pbrms->nhs_installed && pbrm->incoming->count) { - DEBUGD(&pbr_dbg_map, "\tInstalling %s %u", - pbrm->name, pbrms->seqno); + if (pbrm->valid && pbrms->nhs_installed + && pbrm->incoming->count) { + DEBUGD(&pbr_dbg_map, "\tInstalling %s %u", pbrm->name, + pbrms->seqno); for (ALL_LIST_ELEMENTS_RO(pbrm->incoming, inode, pmi)) - pbr_send_pbr_map(pbrms, pmi, true); + if (pbr_map_interface_is_valid(pmi)) + pbr_send_pbr_map(pbrms, pmi, true); } } } @@ -525,8 +692,6 @@ void pbr_map_check_nh_group_change(const char *nh_group) void pbr_map_check(struct pbr_map_sequence *pbrms) { struct pbr_map *pbrm; - struct listnode *inode; - struct pbr_map_interface *pmi; bool install; pbrm = pbrms->parent; @@ -551,23 +716,22 @@ void pbr_map_check(struct pbr_map_sequence *pbrms) pbrms->seqno, pbrms->reason); } - for (ALL_LIST_ELEMENTS_RO(pbrm->incoming, inode, pmi)) { - pbr_send_pbr_map(pbrms, pmi, install); - } + if (install) + pbr_map_pbrms_install(pbrms); + else + pbr_map_pbrms_uninstall(pbrms); } void pbr_map_install(struct pbr_map *pbrm) { - struct listnode *node, *inode; struct pbr_map_sequence *pbrms; - struct pbr_map_interface *pmi; + struct listnode *node; if (!pbrm->incoming->count) return; for (ALL_LIST_ELEMENTS_RO(pbrm->seqnumbers, node, pbrms)) - for (ALL_LIST_ELEMENTS_RO(pbrm->incoming, inode, pmi)) - pbr_send_pbr_map(pbrms, pmi, true); + pbr_map_pbrms_install(pbrms); } void pbr_map_init(void) diff --git a/pbrd/pbr_map.h b/pbrd/pbr_map.h index 112acfe44e..8bd22cbf2a 100644 --- a/pbrd/pbr_map.h +++ b/pbrd/pbr_map.h @@ -22,6 +22,8 @@ #include <bitfield.h> +#include "pbr_vrf.h" + struct pbr_map { /* * RB Tree of the pbr_maps @@ -95,6 +97,21 @@ struct pbr_map_sequence { unsigned char family; /* + * Use interface's vrf. + */ + bool vrf_unchanged; + + /* + * The vrf to lookup in was directly configured. + */ + bool vrf_lookup; + + /* + * VRF to lookup. + */ + char vrf_name[VRF_NAMSIZ + 1]; + + /* * The nexthop group we auto create * for when the user specifies a individual * nexthop @@ -122,12 +139,13 @@ struct pbr_map_sequence { * A reason of 0 means we think the pbr_map_sequence is good to go * We can accumuluate multiple failure states */ -#define PBR_MAP_VALID_SEQUENCE_NUMBER 0 -#define PBR_MAP_INVALID_NEXTHOP_GROUP (1 << 0) -#define PBR_MAP_INVALID_NEXTHOP (1 << 1) -#define PBR_MAP_INVALID_NO_NEXTHOPS (1 << 2) -#define PBR_MAP_INVALID_BOTH_NHANDGRP (1 << 3) -#define PBR_MAP_INVALID_EMPTY (1 << 4) +#define PBR_MAP_VALID_SEQUENCE_NUMBER 0 +#define PBR_MAP_INVALID_NEXTHOP_GROUP (1 << 0) +#define PBR_MAP_INVALID_NEXTHOP (1 << 1) +#define PBR_MAP_INVALID_NO_NEXTHOPS (1 << 2) +#define PBR_MAP_INVALID_BOTH_NHANDGRP (1 << 3) +#define PBR_MAP_INVALID_EMPTY (1 << 4) +#define PBR_MAP_INVALID_VRF (1 << 5) uint64_t reason; QOBJ_FIELDS @@ -144,12 +162,21 @@ pbrms_lookup_unique(uint32_t unique, ifindex_t ifindex, extern struct pbr_map *pbrm_find(const char *name); extern void pbr_map_delete(struct pbr_map_sequence *pbrms); -extern void pbr_map_delete_nexthop_group(struct pbr_map_sequence *pbrms); +extern void pbr_map_delete_nexthops(struct pbr_map_sequence *pbrms); +extern void pbr_map_delete_vrf(struct pbr_map_sequence *pbrms); extern void pbr_map_add_interface(struct pbr_map *pbrm, struct interface *ifp); extern void pbr_map_interface_delete(struct pbr_map *pbrm, struct interface *ifp); + +/* Update maps installed on interface */ +extern void pbr_map_policy_interface_update(const struct interface *ifp, + bool state_up); + extern void pbr_map_final_interface_deletion(struct pbr_map *pbrm, struct pbr_map_interface *pmi); + +extern void pbr_map_vrf_update(const struct pbr_vrf *pbr_vrf); + extern void pbr_map_write_interfaces(struct vty *vty, struct interface *ifp); extern void pbr_map_init(void); diff --git a/pbrd/pbr_nht.c b/pbrd/pbr_nht.c index 7ccd14d1f1..5ab714e617 100644 --- a/pbrd/pbr_nht.c +++ b/pbrd/pbr_nht.c @@ -548,20 +548,10 @@ void pbr_nht_delete_individual_nexthop(struct pbr_map_sequence *pbrms) struct pbr_nexthop_group_cache find; struct pbr_nexthop_cache *pnhc; struct pbr_nexthop_cache lup; - struct pbr_map *pbrm = pbrms->parent; - struct listnode *node; - struct pbr_map_interface *pmi; struct nexthop *nh; enum nexthop_types_t nh_type = 0; - if (pbrm->valid && pbrms->nhs_installed && pbrm->incoming->count) { - for (ALL_LIST_ELEMENTS_RO(pbrm->incoming, node, pmi)) - pbr_send_pbr_map(pbrms, pmi, false); - } - - pbrm->valid = false; - pbrms->nhs_installed = false; - pbrms->reason |= PBR_MAP_INVALID_NO_NEXTHOPS; + pbr_map_delete_nexthops(pbrms); memset(&find, 0, sizeof(find)); snprintf(find.name, sizeof(find.name), "%s", pbrms->internal_nhg_name); diff --git a/pbrd/pbr_vrf.c b/pbrd/pbr_vrf.c new file mode 100644 index 0000000000..d5a2bd0fef --- /dev/null +++ b/pbrd/pbr_vrf.c @@ -0,0 +1,137 @@ +/* + * PBR - vrf code + * Copyright (C) 2019 Cumulus Networks, Inc. + * Stephen Worley + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <zebra.h> + +#include "vrf.h" + +#include "pbr_vrf.h" +#include "pbr_memory.h" +#include "pbr_map.h" +#include "pbr_debug.h" + +DEFINE_MTYPE_STATIC(PBRD, PBR_MAP_VRF, "PBR Map VRF") + +static struct pbr_vrf *pbr_vrf_alloc(void) +{ + struct pbr_vrf *pbr_vrf; + + pbr_vrf = XCALLOC(MTYPE_PBR_MAP_VRF, sizeof(struct pbr_vrf)); + + return pbr_vrf; +} + +static void pbr_vrf_free(struct pbr_vrf *pbr_vrf) +{ + XFREE(MTYPE_PBR_MAP_VRF, pbr_vrf); +} + +static int pbr_vrf_new(struct vrf *vrf) +{ + struct pbr_vrf *pbr_vrf; + + DEBUGD(&pbr_dbg_event, "%s: %u (%s)", __func__, vrf->vrf_id, vrf->name); + + pbr_vrf = pbr_vrf_alloc(); + vrf->info = pbr_vrf; + pbr_vrf->vrf = vrf; + + return 0; +} + +static int pbr_vrf_enable(struct vrf *vrf) +{ + DEBUGD(&pbr_dbg_event, "%s: %u (%s)", __func__, vrf->vrf_id, vrf->name); + + pbr_map_vrf_update(vrf->info); + + return 0; +} + +static int pbr_vrf_disable(struct vrf *vrf) +{ + DEBUGD(&pbr_dbg_event, "%s: %u (%s)", __func__, vrf->vrf_id, vrf->name); + + pbr_map_vrf_update(vrf->info); + + return 0; +} + +static int pbr_vrf_delete(struct vrf *vrf) +{ + DEBUGD(&pbr_dbg_event, "%s: %u (%s)", __func__, vrf->vrf_id, vrf->name); + + /* + * Make sure vrf is always marked disabled first so we handle + * pbr rules using it. + */ + assert(!vrf_is_enabled(vrf)); + + pbr_vrf_free(vrf->info); + vrf->info = NULL; + + return 0; +} + +struct pbr_vrf *pbr_vrf_lookup_by_id(vrf_id_t vrf_id) +{ + struct vrf *vrf; + + vrf = vrf_lookup_by_id(vrf_id); + if (vrf) + return ((struct pbr_vrf *)vrf->info); + + return NULL; +} + +struct pbr_vrf *pbr_vrf_lookup_by_name(const char *name) +{ + struct vrf *vrf; + + if (!name) + name = VRF_DEFAULT_NAME; + + vrf = vrf_lookup_by_name(name); + if (vrf) + return ((struct pbr_vrf *)vrf->info); + + return NULL; +} + +bool pbr_vrf_is_enabled(const struct pbr_vrf *pbr_vrf) +{ + return vrf_is_enabled(pbr_vrf->vrf) ? true : false; +} + +bool pbr_vrf_is_valid(const struct pbr_vrf *pbr_vrf) +{ + if (vrf_is_backend_netns()) + return false; + + if (!pbr_vrf->vrf) + return false; + + return pbr_vrf_is_enabled(pbr_vrf); +} + +void pbr_vrf_init(void) +{ + vrf_init(pbr_vrf_new, pbr_vrf_enable, pbr_vrf_disable, pbr_vrf_delete, + NULL); +} diff --git a/pbrd/pbr_vrf.h b/pbrd/pbr_vrf.h new file mode 100644 index 0000000000..c9448762eb --- /dev/null +++ b/pbrd/pbr_vrf.h @@ -0,0 +1,43 @@ +/* + * VRF library for PBR + * Copyright (C) 2019 Cumulus Networks, Inc. + * Stephen Worley + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __PBR_VRF_H__ +#define __PBR_VRF_H__ + +struct pbr_vrf { + struct vrf *vrf; +}; + +static inline const char *pbr_vrf_name(const struct pbr_vrf *pbr_vrf) +{ + return pbr_vrf->vrf->name; +} + +static inline vrf_id_t pbr_vrf_id(const struct pbr_vrf *pbr_vrf) +{ + return pbr_vrf->vrf->vrf_id; +} + +extern struct pbr_vrf *pbr_vrf_lookup_by_id(vrf_id_t vrf_id); +extern struct pbr_vrf *pbr_vrf_lookup_by_name(const char *name); +extern bool pbr_vrf_is_valid(const struct pbr_vrf *pbr_vrf); +extern bool pbr_vrf_is_enabled(const struct pbr_vrf *pbr_vrf); + +extern void pbr_vrf_init(void); +#endif diff --git a/pbrd/pbr_vty.c b/pbrd/pbr_vty.c index e0fd147b0e..bc4aa947a9 100644 --- a/pbrd/pbr_vty.c +++ b/pbrd/pbr_vty.c @@ -193,14 +193,17 @@ DEFPY(pbr_map_match_mark, pbr_map_match_mark_cmd, pbr_map_check(pbrms); return CMD_SUCCESS; - } +} + +#define SET_VRF_EXISTS_STR \ + "A `set vrf XX` command already exists, please remove that first\n" DEFPY(pbr_map_nexthop_group, pbr_map_nexthop_group_cmd, - "[no] set nexthop-group NHGNAME$name", - NO_STR - "Set for the PBR-MAP\n" - "nexthop-group to use\n" - "The name of the nexthop-group\n") + "[no] set nexthop-group NHGNAME$name", + NO_STR + "Set for the PBR-MAP\n" + "nexthop-group to use\n" + "The name of the nexthop-group\n") { struct pbr_map_sequence *pbrms = VTY_GET_CONTEXT(pbr_map_sequence); struct nexthop_group_cmd *nhgc; @@ -211,16 +214,22 @@ DEFPY(pbr_map_nexthop_group, pbr_map_nexthop_group_cmd, return CMD_WARNING_CONFIG_FAILED; } + if (pbrms->vrf_lookup || pbrms->vrf_unchanged) { + vty_out(vty, SET_VRF_EXISTS_STR); + return CMD_WARNING_CONFIG_FAILED; + } + nhgc = nhgc_find(name); if (!nhgc) { vty_out(vty, "Specified nexthop-group %s does not exist\n", name); - vty_out(vty, "PBR-MAP will not be applied until it is created\n"); + vty_out(vty, + "PBR-MAP will not be applied until it is created\n"); } if (no) { if (pbrms->nhgrp_name && strcmp(name, pbrms->nhgrp_name) == 0) - pbr_map_delete_nexthop_group(pbrms); + pbr_map_delete_nexthops(pbrms); else { vty_out(vty, "Nexthop Group specified: %s does not exist to remove", @@ -272,6 +281,11 @@ DEFPY(pbr_map_nexthop, pbr_map_nexthop_cmd, return CMD_WARNING_CONFIG_FAILED; } + if (pbrms->vrf_lookup || pbrms->vrf_unchanged) { + vty_out(vty, SET_VRF_EXISTS_STR); + return CMD_WARNING_CONFIG_FAILED; + } + if (vrf_name) vrf = vrf_lookup_by_name(vrf_name); else @@ -372,6 +386,61 @@ DEFPY(pbr_map_nexthop, pbr_map_nexthop_cmd, return CMD_SUCCESS; } +DEFPY(pbr_map_vrf, pbr_map_vrf_cmd, + "[no] set vrf <NAME$vrf_name|unchanged>", + NO_STR + "Set for the PBR-MAP\n" + "Specify the VRF for this map\n" + "The VRF Name\n" + "Use the interface's VRF for lookup\n") +{ + struct pbr_map_sequence *pbrms = VTY_GET_CONTEXT(pbr_map_sequence); + int ret = CMD_SUCCESS; + + if (no) { + pbr_map_delete_vrf(pbrms); + + /* Reset all data */ + pbrms->nhs_installed = false; + pbrms->vrf_name[0] = '\0'; + pbrms->vrf_lookup = false; + pbrms->vrf_unchanged = false; + + goto done; + } + + if (pbrms->nhgrp_name || pbrms->nhg) { + vty_out(vty, + "A `set nexthop/nexthop-group XX` command already exits, please remove that first\n"); + ret = CMD_WARNING_CONFIG_FAILED; + goto done; + } + + if (pbrms->vrf_lookup || pbrms->vrf_unchanged) { + vty_out(vty, SET_VRF_EXISTS_STR); + ret = CMD_WARNING_CONFIG_FAILED; + goto done; + } + + if (vrf_name) { + if (!pbr_vrf_lookup_by_name(vrf_name)) { + vty_out(vty, "Specified: %s is non-existent\n", + vrf_name); + ret = CMD_WARNING_CONFIG_FAILED; + goto done; + } + + pbrms->vrf_lookup = true; + strlcpy(pbrms->vrf_name, vrf_name, sizeof(pbrms->vrf_name)); + } else + pbrms->vrf_unchanged = true; + + pbr_map_check(pbrms); + +done: + return ret; +} + DEFPY (pbr_policy, pbr_policy_cmd, "[no] pbr-policy PBRMAP$mapname", @@ -500,6 +569,12 @@ DEFPY (show_pbr_map, pbrms->internal_nhg_name), pbr_nht_get_table( pbrms->internal_nhg_name)); + } else if (pbrms->vrf_unchanged) { + vty_out(vty, + "\tVRF Unchanged (use interface vrf)\n"); + } else if (pbrms->vrf_lookup) { + vty_out(vty, "\tVRF Lookup: %s\n", + pbrms->vrf_name); } else { vty_out(vty, "\tNexthop-Group: Unknown Installed: 0(0)\n"); @@ -662,6 +737,12 @@ static int pbr_vty_map_config_write_sequence(struct vty *vty, if (pbrms->mark) vty_out(vty, " match mark %u\n", pbrms->mark); + if (pbrms->vrf_unchanged) + vty_out(vty, " set vrf unchanged\n"); + + if (pbrms->vrf_lookup) + vty_out(vty, " set vrf %s\n", pbrms->vrf_name); + if (pbrms->nhgrp_name) vty_out(vty, " set nexthop-group %s\n", pbrms->nhgrp_name); @@ -737,6 +818,7 @@ void pbr_vty_init(void) install_element(PBRMAP_NODE, &pbr_map_match_mark_cmd); install_element(PBRMAP_NODE, &pbr_map_nexthop_group_cmd); install_element(PBRMAP_NODE, &pbr_map_nexthop_cmd); + install_element(PBRMAP_NODE, &pbr_map_vrf_cmd); install_element(VIEW_NODE, &show_pbr_cmd); install_element(VIEW_NODE, &show_pbr_map_cmd); install_element(VIEW_NODE, &show_pbr_interface_cmd); diff --git a/pbrd/pbr_zebra.c b/pbrd/pbr_zebra.c index 719374e3b9..b0a689a7e4 100644 --- a/pbrd/pbr_zebra.c +++ b/pbrd/pbr_zebra.c @@ -39,6 +39,7 @@ #include "pbr_memory.h" #include "pbr_zebra.h" #include "pbr_debug.h" +#include "pbr_vrf.h" DEFINE_MTYPE_STATIC(PBRD, PBR_INTERFACE, "PBR Interface") @@ -67,8 +68,11 @@ int pbr_ifp_create(struct interface *ifp) if (!ifp->info) pbr_if_new(ifp); + /* Update nexthops tracked from a `set nexthop` command */ pbr_nht_nexthop_interface_update(ifp); + pbr_map_policy_interface_update(ifp, true); + return 0; } @@ -77,6 +81,8 @@ int pbr_ifp_destroy(struct interface *ifp) DEBUGD(&pbr_dbg_zebra, "%s: %s", __PRETTY_FUNCTION__, ifp->name); + pbr_map_policy_interface_update(ifp, false); + return 0; } @@ -133,6 +139,29 @@ int pbr_ifp_down(struct interface *ifp) return 0; } +static int interface_vrf_update(ZAPI_CALLBACK_ARGS) +{ + struct interface *ifp; + vrf_id_t new_vrf_id; + + ifp = zebra_interface_vrf_update_read(zclient->ibuf, vrf_id, + &new_vrf_id); + + if (!ifp) { + DEBUGD(&pbr_dbg_zebra, "%s: VRF change interface not found", + __func__); + + return 0; + } + + DEBUGD(&pbr_dbg_zebra, "%s: %s VRF change %u -> %u", __func__, + ifp->name, vrf_id, new_vrf_id); + + if_update_to_new_vrf(ifp, new_vrf_id); + + return 0; +} + static int route_notify_owner(ZAPI_CALLBACK_ARGS) { struct prefix p; @@ -421,6 +450,7 @@ void pbr_zebra_init(void) zclient->zebra_connected = zebra_connected; zclient->interface_address_add = interface_address_add; zclient->interface_address_delete = interface_address_delete; + zclient->interface_vrf_update = interface_vrf_update; zclient->route_notify_owner = route_notify_owner; zclient->rule_notify_owner = rule_notify_owner; zclient->nexthop_update = pbr_zebra_nexthop_update; @@ -483,6 +513,26 @@ static void pbr_encode_pbr_map_sequence_prefix(struct stream *s, stream_put(s, &p->u.prefix, prefix_blen(p)); } +static void +pbr_encode_pbr_map_sequence_vrf(struct stream *s, + const struct pbr_map_sequence *pbrms, + const struct interface *ifp) +{ + struct pbr_vrf *pbr_vrf; + + if (pbrms->vrf_unchanged) + pbr_vrf = pbr_vrf_lookup_by_id(ifp->vrf_id); + else + pbr_vrf = pbr_vrf_lookup_by_name(pbrms->vrf_name); + + if (!pbr_vrf) { + DEBUGD(&pbr_dbg_zebra, "%s: VRF not found", __func__); + return; + } + + stream_putl(s, pbr_vrf->vrf->data.l.table_id); +} + static void pbr_encode_pbr_map_sequence(struct stream *s, struct pbr_map_sequence *pbrms, struct interface *ifp) @@ -501,7 +551,10 @@ static void pbr_encode_pbr_map_sequence(struct stream *s, pbr_encode_pbr_map_sequence_prefix(s, pbrms->dst, family); stream_putw(s, 0); /* dst port */ stream_putl(s, pbrms->mark); - if (pbrms->nhgrp_name) + + if (pbrms->vrf_unchanged || pbrms->vrf_lookup) + pbr_encode_pbr_map_sequence_vrf(s, pbrms, ifp); + else if (pbrms->nhgrp_name) stream_putl(s, pbr_nht_get_table(pbrms->nhgrp_name)); else if (pbrms->nhg) stream_putl(s, pbr_nht_get_table(pbrms->internal_nhg_name)); diff --git a/pbrd/subdir.am b/pbrd/subdir.am index 0f2e7ad8bd..41d0e5a0b8 100644 --- a/pbrd/subdir.am +++ b/pbrd/subdir.am @@ -20,6 +20,7 @@ pbrd_libpbr_a_SOURCES = \ pbrd/pbr_memory.c \ pbrd/pbr_nht.c \ pbrd/pbr_debug.c \ + pbrd/pbr_vrf.c \ # end noinst_HEADERS += \ @@ -29,6 +30,7 @@ noinst_HEADERS += \ pbrd/pbr_vty.h \ pbrd/pbr_zebra.h \ pbrd/pbr_debug.h \ + pbrd/pbr_vrf.h \ # end pbrd/pbr_vty_clippy.c: $(CLIPPY_DEPS) diff --git a/pimd/pim_cmd.c b/pimd/pim_cmd.c index e7e0573968..ca86017f10 100644 --- a/pimd/pim_cmd.c +++ b/pimd/pim_cmd.c @@ -61,6 +61,7 @@ #include "pim_nht.h" #include "pim_bfd.h" #include "pim_vxlan.h" +#include "pim_mlag.h" #include "bfd.h" #include "pim_bsm.h" @@ -7460,9 +7461,9 @@ DEFPY_HIDDEN (interface_ip_pim_activeactive, pim_ifp = ifp->info; if (no) - pim_ifp->activeactive = false; + pim_if_unconfigure_mlag_dualactive(pim_ifp); else - pim_ifp->activeactive = true; + pim_if_configure_mlag_dualactive(pim_ifp); return CMD_SUCCESS; } @@ -8380,6 +8381,20 @@ DEFUN (no_debug_pim_zebra, return CMD_SUCCESS; } +DEFUN(debug_pim_mlag, debug_pim_mlag_cmd, "debug pim mlag", + DEBUG_STR DEBUG_PIM_STR DEBUG_PIM_MLAG_STR) +{ + PIM_DO_DEBUG_MLAG; + return CMD_SUCCESS; +} + +DEFUN(no_debug_pim_mlag, no_debug_pim_mlag_cmd, "no debug pim mlag", + NO_STR DEBUG_STR DEBUG_PIM_STR DEBUG_PIM_MLAG_STR) +{ + PIM_DONT_DEBUG_MLAG; + return CMD_SUCCESS; +} + DEFUN (debug_pim_vxlan, debug_pim_vxlan_cmd, "debug pim vxlan", @@ -10406,6 +10421,8 @@ void pim_cmd_init(void) install_element(ENABLE_NODE, &no_debug_ssmpingd_cmd); install_element(ENABLE_NODE, &debug_pim_zebra_cmd); install_element(ENABLE_NODE, &no_debug_pim_zebra_cmd); + install_element(ENABLE_NODE, &debug_pim_mlag_cmd); + install_element(ENABLE_NODE, &no_debug_pim_mlag_cmd); install_element(ENABLE_NODE, &debug_pim_vxlan_cmd); install_element(ENABLE_NODE, &no_debug_pim_vxlan_cmd); install_element(ENABLE_NODE, &debug_msdp_cmd); diff --git a/pimd/pim_cmd.h b/pimd/pim_cmd.h index f5bb316a7a..89a4e6e699 100644 --- a/pimd/pim_cmd.h +++ b/pimd/pim_cmd.h @@ -54,6 +54,7 @@ #define DEBUG_PIM_PACKETDUMP_RECV_STR "Dump received packets\n" #define DEBUG_PIM_TRACE_STR "PIM internal daemon activity\n" #define DEBUG_PIM_ZEBRA_STR "ZEBRA protocol activity\n" +#define DEBUG_PIM_MLAG_STR "PIM Mlag activity\n" #define DEBUG_PIM_VXLAN_STR "PIM VxLAN events\n" #define DEBUG_SSMPINGD_STR "ssmpingd activity\n" #define CLEAR_IP_IGMP_STR "IGMP clear commands\n" diff --git a/pimd/pim_instance.c b/pimd/pim_instance.c index 6848d2dabb..955ad14b01 100644 --- a/pimd/pim_instance.c +++ b/pimd/pim_instance.c @@ -34,6 +34,7 @@ #include "pim_ssmpingd.h" #include "pim_vty.h" #include "pim_bsm.h" +#include "pim_mlag.h" static void pim_instance_terminate(struct pim_instance *pim) { @@ -47,6 +48,8 @@ static void pim_instance_terminate(struct pim_instance *pim) if (pim->static_routes) list_delete(&pim->static_routes); + pim_instance_mlag_terminate(pim); + pim_upstream_terminate(pim); pim_rp_free(pim); @@ -115,6 +118,8 @@ static struct pim_instance *pim_instance_init(struct vrf *vrf) pim_upstream_init(pim); + pim_instance_mlag_init(pim); + pim->last_route_change_time = -1; return pim; } diff --git a/pimd/pim_instance.h b/pimd/pim_instance.h index 06d41c4b53..dd3ac8fcb0 100644 --- a/pimd/pim_instance.h +++ b/pimd/pim_instance.h @@ -64,6 +64,17 @@ struct pim_router { vrf_id_t vrf_id; enum mlag_role role; + uint32_t pim_mlag_intf_cnt; + /* if true we have registered with MLAG */ + bool mlag_process_register; + /* if true local MLAG process reported that it is connected + * with the peer MLAG process + */ + bool connected_to_mlag; + /* Holds the client data(unencoded) that need to be pushed to MCLAGD*/ + struct stream_fifo *mlag_fifo; + struct stream *mlag_stream; + struct thread *zpthread_mlag_write; }; /* Per VRF PIM DB */ @@ -122,6 +133,9 @@ struct pim_instance { bool ecmp_enable; bool ecmp_rebalance_enable; + /* No. of Dual active I/fs in pim_instance */ + uint32_t inst_mlag_intf_cnt; + /* Bsm related */ struct bsm_scope global_scope; uint64_t bsm_rcvd; diff --git a/pimd/pim_main.c b/pimd/pim_main.c index 6a7dbe769f..4090ce7f93 100644 --- a/pimd/pim_main.c +++ b/pimd/pim_main.c @@ -47,6 +47,7 @@ #include "pim_msdp.h" #include "pim_iface.h" #include "pim_bfd.h" +#include "pim_mlag.h" #include "pim_errors.h" extern struct host host; @@ -131,6 +132,7 @@ int main(int argc, char **argv, char **envp) pim_ifp_down, pim_ifp_destroy); pim_zebra_init(); pim_bfd_init(); + pim_mlag_init(); frr_config_fork(); diff --git a/pimd/pim_mlag.c b/pimd/pim_mlag.c new file mode 100644 index 0000000000..f60c18204b --- /dev/null +++ b/pimd/pim_mlag.c @@ -0,0 +1,347 @@ +/* + * This is an implementation of PIM MLAG Functionality + * + * Module name: PIM MLAG + * + * Author: sathesh Kumar karra <sathk@cumulusnetworks.com> + * + * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <zebra.h> + +#include "pimd.h" +#include "pim_mlag.h" + +extern struct zclient *zclient; + + +/********************API to process PIM MLAG Data ************************/ + +static void pim_mlag_process_mlagd_state_change(struct mlag_status msg) +{ + char buf[MLAG_ROLE_STRSIZE]; + + if (PIM_DEBUG_MLAG) + zlog_debug("%s: msg dump: my_role: %s, peer_state: %s", + __func__, + mlag_role2str(msg.my_role, buf, sizeof(buf)), + (msg.peer_state == MLAG_STATE_RUNNING ? "RUNNING" + : "DOWN")); +} + +static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg) +{ + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: msg dump: peer_frr_state: %s", __func__, + (msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN")); +} + +static void pim_mlag_process_vxlan_update(struct mlag_vxlan *msg) +{ +} + +static void pim_mlag_process_mroute_add(struct mlag_mroute_add msg) +{ + if (PIM_DEBUG_MLAG) { + zlog_debug( + "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x cost: %u", + __func__, msg.vrf_name, msg.source_ip, msg.group_ip, + msg.cost_to_rp); + zlog_debug( + "owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s", + msg.owner_id, msg.am_i_dr, msg.am_i_dual_active, + msg.vrf_id, msg.intf_name); + } +} + +static void pim_mlag_process_mroute_del(struct mlag_mroute_del msg) +{ + if (PIM_DEBUG_MLAG) { + zlog_debug( + "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x ", + __func__, msg.vrf_name, msg.source_ip, msg.group_ip); + zlog_debug("owner_id: %d, vrf_id: 0x%x intf_name: %s", + msg.owner_id, msg.vrf_id, msg.intf_name); + } +} + + +int pim_zebra_mlag_handle_msg(struct stream *s, int len) +{ + struct mlag_msg mlag_msg; + char buf[ZLOG_FILTER_LENGTH_MAX]; + int rc = 0; + + rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg); + if (rc) + return (rc); + + if (PIM_DEBUG_MLAG) + zlog_debug("%s: Received msg type: %s length: %d, bulk_cnt: %d", + __func__, + mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, + sizeof(buf)), + mlag_msg.data_len, mlag_msg.msg_cnt); + + switch (mlag_msg.msg_type) { + case MLAG_STATUS_UPDATE: { + struct mlag_status msg; + + rc = mlag_lib_decode_mlag_status(s, &msg); + if (rc) + return (rc); + pim_mlag_process_mlagd_state_change(msg); + } break; + case MLAG_PEER_FRR_STATUS: { + struct mlag_frr_status msg; + + rc = mlag_lib_decode_frr_status(s, &msg); + if (rc) + return (rc); + pim_mlag_process_peer_frr_state_change(msg); + } break; + case MLAG_VXLAN_UPDATE: { + struct mlag_vxlan msg; + + rc = mlag_lib_decode_vxlan_update(s, &msg); + if (rc) + return rc; + pim_mlag_process_vxlan_update(&msg); + } break; + case MLAG_MROUTE_ADD: { + struct mlag_mroute_add msg; + + rc = mlag_lib_decode_mroute_add(s, &msg); + if (rc) + return (rc); + pim_mlag_process_mroute_add(msg); + } break; + case MLAG_MROUTE_DEL: { + struct mlag_mroute_del msg; + + rc = mlag_lib_decode_mroute_del(s, &msg); + if (rc) + return (rc); + pim_mlag_process_mroute_del(msg); + } break; + case MLAG_MROUTE_ADD_BULK: { + struct mlag_mroute_add msg; + int i; + + for (i = 0; i < mlag_msg.msg_cnt; i++) { + + rc = mlag_lib_decode_mroute_add(s, &msg); + if (rc) + return (rc); + pim_mlag_process_mroute_add(msg); + } + } break; + case MLAG_MROUTE_DEL_BULK: { + struct mlag_mroute_del msg; + int i; + + for (i = 0; i < mlag_msg.msg_cnt; i++) { + + rc = mlag_lib_decode_mroute_del(s, &msg); + if (rc) + return (rc); + pim_mlag_process_mroute_del(msg); + } + } break; + default: + break; + } + return 0; +} + +/****************End of PIM Mesasge processing handler********************/ + +int pim_zebra_mlag_process_up(void) +{ + if (PIM_DEBUG_MLAG) + zlog_debug("%s: Received Process-Up from Mlag", __func__); + + return 0; +} + +int pim_zebra_mlag_process_down(void) +{ + if (PIM_DEBUG_MLAG) + zlog_debug("%s: Received Process-Down from Mlag", __func__); + + return 0; +} + +static int pim_mlag_register_handler(struct thread *thread) +{ + uint32_t bit_mask = 0; + + if (!zclient) + return -1; + + SET_FLAG(bit_mask, (1 << MLAG_STATUS_UPDATE)); + SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD)); + SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL)); + SET_FLAG(bit_mask, (1 << MLAG_DUMP)); + SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD_BULK)); + SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL_BULK)); + SET_FLAG(bit_mask, (1 << MLAG_PIM_CFG_DUMP)); + SET_FLAG(bit_mask, (1 << MLAG_VXLAN_UPDATE)); + SET_FLAG(bit_mask, (1 << MLAG_PEER_FRR_STATUS)); + + if (PIM_DEBUG_MLAG) + zlog_debug("%s: Posting Client Register to MLAG mask: 0x%x", + __func__, bit_mask); + + zclient_send_mlag_register(zclient, bit_mask); + return 0; +} + +void pim_mlag_register(void) +{ + if (router->mlag_process_register) + return; + + router->mlag_process_register = true; + + thread_add_event(router->master, pim_mlag_register_handler, NULL, 0, + NULL); +} + +static int pim_mlag_deregister_handler(struct thread *thread) +{ + if (!zclient) + return -1; + + if (PIM_DEBUG_MLAG) + zlog_debug("%s: Posting Client De-Register to MLAG from PIM", + __func__); + router->connected_to_mlag = false; + zclient_send_mlag_deregister(zclient); + return 0; +} + +void pim_mlag_deregister(void) +{ + /* if somebody still interested in the MLAG channel skip de-reg */ + if (router->pim_mlag_intf_cnt) + return; + + /* not registered; nothing do */ + if (!router->mlag_process_register) + return; + + router->mlag_process_register = false; + + thread_add_event(router->master, pim_mlag_deregister_handler, NULL, 0, + NULL); +} + +void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp) +{ + if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == true) + return; + + if (PIM_DEBUG_MLAG) + zlog_debug("%s: Configuring active-active on Interface: %s", + __func__, "NULL"); + + pim_ifp->activeactive = true; + if (pim_ifp->pim) + pim_ifp->pim->inst_mlag_intf_cnt++; + + router->pim_mlag_intf_cnt++; + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: Total MLAG configured Interfaces on router: %d, Inst: %d", + __func__, router->pim_mlag_intf_cnt, + pim_ifp->pim->inst_mlag_intf_cnt); + + if (router->pim_mlag_intf_cnt == 1) { + /* + * atleast one Interface is configured for MLAG, send register + * to Zebra for receiving MLAG Updates + */ + pim_mlag_register(); + } +} + +void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp) +{ + if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == false) + return; + + if (PIM_DEBUG_MLAG) + zlog_debug("%s: UnConfiguring active-active on Interface: %s", + __func__, "NULL"); + + pim_ifp->activeactive = false; + pim_ifp->pim->inst_mlag_intf_cnt--; + + router->pim_mlag_intf_cnt--; + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: Total MLAG configured Interfaces on router: %d, Inst: %d", + __func__, router->pim_mlag_intf_cnt, + pim_ifp->pim->inst_mlag_intf_cnt); + + if (router->pim_mlag_intf_cnt == 0) { + /* + * all the Interfaces are MLAG un-configured, post MLAG + * De-register to Zebra + */ + pim_mlag_deregister(); + } +} + + +void pim_instance_mlag_init(struct pim_instance *pim) +{ + if (!pim) + return; + + pim->inst_mlag_intf_cnt = 0; +} + + +void pim_instance_mlag_terminate(struct pim_instance *pim) +{ + struct interface *ifp; + + if (!pim) + return; + + FOR_ALL_INTERFACES (pim->vrf, ifp) { + struct pim_interface *pim_ifp = ifp->info; + + if (!pim_ifp || pim_ifp->activeactive == false) + continue; + + pim_if_unconfigure_mlag_dualactive(pim_ifp); + } + pim->inst_mlag_intf_cnt = 0; +} + +void pim_mlag_init(void) +{ + router->pim_mlag_intf_cnt = 0; + router->connected_to_mlag = false; + router->mlag_fifo = stream_fifo_new(); + router->zpthread_mlag_write = NULL; + router->mlag_stream = stream_new(MLAG_BUF_LIMIT); +} diff --git a/pimd/pim_mlag.h b/pimd/pim_mlag.h new file mode 100644 index 0000000000..e86fdae78f --- /dev/null +++ b/pimd/pim_mlag.h @@ -0,0 +1,40 @@ +/* + * This is an implementation of PIM MLAG Functionality + * + * Module name: PIM MLAG + * + * Author: sathesh Kumar karra <sathk@cumulusnetworks.com> + * + * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __PIM_MLAG_H__ +#define __PIM_MLAG_H__ + +#include "mlag.h" +#include "pim_iface.h" + +extern void pim_mlag_init(void); +extern void pim_instance_mlag_init(struct pim_instance *pim); +extern void pim_instance_mlag_terminate(struct pim_instance *pim); +extern void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp); +extern void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp); +extern void pim_mlag_register(void); +extern void pim_mlag_deregister(void); +extern int pim_zebra_mlag_process_up(void); +extern int pim_zebra_mlag_process_down(void); +extern int pim_zebra_mlag_handle_msg(struct stream *msg, int len); +#endif diff --git a/pimd/pim_zebra.c b/pimd/pim_zebra.c index b297615435..b999188a9b 100644 --- a/pimd/pim_zebra.c +++ b/pimd/pim_zebra.c @@ -46,11 +46,12 @@ #include "pim_nht.h" #include "pim_ssm.h" #include "pim_vxlan.h" +#include "pim_mlag.h" #undef PIM_DEBUG_IFADDR_DUMP #define PIM_DEBUG_IFADDR_DUMP -static struct zclient *zclient = NULL; +struct zclient *zclient; /* Router-id update message from zebra. */ @@ -587,6 +588,9 @@ void pim_zebra_init(void) zclient->nexthop_update = pim_parse_nexthop_update; zclient->vxlan_sg_add = pim_zebra_vxlan_sg_proc; zclient->vxlan_sg_del = pim_zebra_vxlan_sg_proc; + zclient->mlag_process_up = pim_zebra_mlag_process_up; + zclient->mlag_process_down = pim_zebra_mlag_process_down; + zclient->mlag_handle_msg = pim_zebra_mlag_handle_msg; zclient_init(zclient, ZEBRA_ROUTE_PIM, 0, &pimd_privs); if (PIM_DEBUG_PIM_TRACE) { diff --git a/pimd/pimd.h b/pimd/pimd.h index 3b83d3b6c7..70d2766220 100644 --- a/pimd/pimd.h +++ b/pimd/pimd.h @@ -115,6 +115,7 @@ #define PIM_MASK_MTRACE (1 << 25) #define PIM_MASK_VXLAN (1 << 26) #define PIM_MASK_BSM_PROC (1 << 27) +#define PIM_MASK_MLAG (1 << 28) /* Remember 32 bits!!! */ /* PIM error codes */ @@ -171,6 +172,7 @@ extern uint8_t qpim_ecmp_rebalance_enable; #define PIM_DEBUG_IGMP_TRACE_DETAIL \ (router->debugs & (PIM_MASK_IGMP_TRACE_DETAIL | PIM_MASK_IGMP_TRACE)) #define PIM_DEBUG_ZEBRA (router->debugs & PIM_MASK_ZEBRA) +#define PIM_DEBUG_MLAG (router->debugs & PIM_MASK_MLAG) #define PIM_DEBUG_SSMPINGD (router->debugs & PIM_MASK_SSMPINGD) #define PIM_DEBUG_MROUTE (router->debugs & PIM_MASK_MROUTE) #define PIM_DEBUG_MROUTE_DETAIL \ @@ -217,6 +219,7 @@ extern uint8_t qpim_ecmp_rebalance_enable; #define PIM_DO_DEBUG_IGMP_TRACE_DETAIL \ (router->debugs |= PIM_MASK_IGMP_TRACE_DETAIL) #define PIM_DO_DEBUG_ZEBRA (router->debugs |= PIM_MASK_ZEBRA) +#define PIM_DO_DEBUG_MLAG (router->debugs |= PIM_MASK_MLAG) #define PIM_DO_DEBUG_SSMPINGD (router->debugs |= PIM_MASK_SSMPINGD) #define PIM_DO_DEBUG_MROUTE (router->debugs |= PIM_MASK_MROUTE) #define PIM_DO_DEBUG_MROUTE_DETAIL (router->debugs |= PIM_MASK_MROUTE_DETAIL) @@ -248,6 +251,7 @@ extern uint8_t qpim_ecmp_rebalance_enable; #define PIM_DONT_DEBUG_IGMP_TRACE_DETAIL \ (router->debugs &= ~PIM_MASK_IGMP_TRACE_DETAIL) #define PIM_DONT_DEBUG_ZEBRA (router->debugs &= ~PIM_MASK_ZEBRA) +#define PIM_DONT_DEBUG_MLAG (router->debugs &= ~PIM_MASK_MLAG) #define PIM_DONT_DEBUG_SSMPINGD (router->debugs &= ~PIM_MASK_SSMPINGD) #define PIM_DONT_DEBUG_MROUTE (router->debugs &= ~PIM_MASK_MROUTE) #define PIM_DONT_DEBUG_MROUTE_DETAIL (router->debugs &= ~PIM_MASK_MROUTE_DETAIL) diff --git a/pimd/subdir.am b/pimd/subdir.am index 240b62804f..5407e566a5 100644 --- a/pimd/subdir.am +++ b/pimd/subdir.am @@ -62,6 +62,7 @@ pimd_libpim_a_SOURCES = \ pimd/pim_zebra.c \ pimd/pim_zlookup.c \ pimd/pim_vxlan.c \ + pimd/pim_mlag.c \ pimd/pimd.c \ # end @@ -114,6 +115,7 @@ noinst_HEADERS += \ pimd/pim_zebra.h \ pimd/pim_zlookup.h \ pimd/pim_vxlan.h \ + pimd/pim_mlag.h \ pimd/pim_vxlan_instance.h \ pimd/pimd.h \ pimd/mtracebis_netlink.h \ diff --git a/zebra/interface.c b/zebra/interface.c index eea80652e5..20b05dfb32 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -161,6 +161,7 @@ static int if_zebra_new_hook(struct interface *ifp) rtadv->HomeAgentLifetime = -1; /* derive from AdvDefaultLifetime */ rtadv->AdvIntervalOption = 0; + rtadv->UseFastRexmit = true; rtadv->DefaultPreference = RTADV_PREF_MEDIUM; rtadv->AdvPrefixList = list_new(); @@ -1037,7 +1038,8 @@ void if_up(struct interface *ifp) #if defined(HAVE_RTADV) /* Enable fast tx of RA if enabled && RA interval is not in msecs */ if (zif->rtadv.AdvSendAdvertisements - && (zif->rtadv.MaxRtrAdvInterval >= 1000)) { + && (zif->rtadv.MaxRtrAdvInterval >= 1000) + && zif->rtadv.UseFastRexmit) { zif->rtadv.inFastRexmit = 1; zif->rtadv.NumFastReXmitsRemain = RTADV_NUM_FAST_REXMITS; } @@ -1060,7 +1062,9 @@ void if_up(struct interface *ifp) zif->link_ifindex); if (link_if) zebra_vxlan_svi_up(ifp, link_if); - } + } else if (IS_ZEBRA_IF_MACVLAN(ifp)) + zebra_vxlan_macvlan_up(ifp); + } /* Interface goes down. We have to manage different behavior of based @@ -1092,7 +1096,8 @@ void if_down(struct interface *ifp) zif->link_ifindex); if (link_if) zebra_vxlan_svi_down(ifp, link_if); - } + } else if (IS_ZEBRA_IF_MACVLAN(ifp)) + zebra_vxlan_macvlan_down(ifp); /* Notify to the protocol daemons. */ diff --git a/zebra/interface.h b/zebra/interface.h index 78ccbae623..b7e90a0c31 100644 --- a/zebra/interface.h +++ b/zebra/interface.h @@ -189,6 +189,13 @@ struct rtadvconf { */ struct list *AdvDNSSLList; + /* + * rfc4861 states RAs must be sent at least 3 seconds apart. + * We allow faster retransmits to speed up convergence but can + * turn that capability off to meet the rfc if needed. + */ + bool UseFastRexmit; /* True if fast rexmits are enabled */ + uint8_t inFastRexmit; /* True if we're rexmits faster than usual */ /* Track if RA was configured by BGP or by the Operator or both */ diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c index 73b3dd0b40..6909bcb137 100644 --- a/zebra/rt_socket.c +++ b/zebra/rt_socket.c @@ -178,7 +178,7 @@ static int kernel_rtm(int cmd, const struct prefix *p, case NEXTHOP_TYPE_BLACKHOLE: bh_type = nexthop->bh_type; switch (p->family) { - case AFI_IP: { + case AF_INET: { struct in_addr loopback; loopback.s_addr = htonl(INADDR_LOOPBACK); sin_gate.sin.sin_addr = loopback; @@ -189,7 +189,8 @@ static int kernel_rtm(int cmd, const struct prefix *p, gate = true; } break; - case AFI_IP6: + case AF_INET6: + zlog_warn("v6 blackhole routes have not been programmed yet"); break; } } @@ -230,13 +231,13 @@ static int kernel_rtm(int cmd, const struct prefix *p, __func__, prefix_buf); } else { switch (p->family) { - case AFI_IP: + case AF_INET: inet_ntop(AF_INET, &sin_gate.sin.sin_addr, gate_buf, sizeof(gate_buf)); break; - case AFI_IP6: + case AF_INET6: inet_ntop(AF_INET6, &sin_gate.sin6.sin6_addr, gate_buf, sizeof(gate_buf)); diff --git a/zebra/rtadv.c b/zebra/rtadv.c index 4903455a2b..f51c199f6b 100644 --- a/zebra/rtadv.c +++ b/zebra/rtadv.c @@ -495,7 +495,8 @@ static int rtadv_timer(struct thread *thread) zif = ifp->info; if (zif->rtadv.AdvSendAdvertisements) { - if (zif->rtadv.inFastRexmit) { + if (zif->rtadv.inFastRexmit + && zif->rtadv.UseFastRexmit) { /* We assume we fast rexmit every sec so * no * additional vars */ @@ -535,9 +536,28 @@ static int rtadv_timer(struct thread *thread) static void rtadv_process_solicit(struct interface *ifp) { struct zebra_vrf *zvrf = vrf_info_lookup(ifp->vrf_id); + struct zebra_if *zif; assert(zvrf); - rtadv_send_packet(rtadv_get_socket(zvrf), ifp); + zif = ifp->info; + + /* + * If FastRetransmit is enabled, send the RA immediately. + * If not enabled but it has been more than MIN_DELAY_BETWEEN_RAS + * (3 seconds) since the last RA was sent, send it now and reset + * the timer to start at the max (configured) again. + * If not enabled and it is less than 3 seconds since the last + * RA packet was sent, set the timer for 3 seconds so the next + * one will be sent with a minimum of 3 seconds between RAs. + * RFC4861 sec 6.2.6 + */ + if ((zif->rtadv.UseFastRexmit) + || (zif->rtadv.AdvIntervalTimer <= + (zif->rtadv.MaxRtrAdvInterval - MIN_DELAY_BETWEEN_RAS))) { + rtadv_send_packet(rtadv_get_socket(zvrf), ifp); + zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval; + } else + zif->rtadv.AdvIntervalTimer = MIN_DELAY_BETWEEN_RAS; } /* @@ -904,9 +924,12 @@ static void ipv6_nd_suppress_ra_set(struct interface *ifp, zif->rtadv.AdvIntervalTimer = 0; zvrf->rtadv.adv_if_count++; - if (zif->rtadv.MaxRtrAdvInterval >= 1000) { - /* Enable Fast RA only when RA interval is in - * secs */ + if ((zif->rtadv.MaxRtrAdvInterval >= 1000) + && zif->rtadv.UseFastRexmit) { + /* + * Enable Fast RA only when RA interval is in + * secs and Fast RA retransmit is enabled + */ zif->rtadv.inFastRexmit = 1; zif->rtadv.NumFastReXmitsRemain = RTADV_NUM_FAST_REXMITS; @@ -996,6 +1019,51 @@ void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS) zebra_interface_radv_set(client, hdr, msg, zvrf, 1); } +DEFUN (ipv6_nd_ra_fast_retrans, + ipv6_nd_ra_fast_retrans_cmd, + "ipv6 nd ra-fast-retrans", + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Fast retransmit of RA packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp) + || CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.UseFastRexmit = true; + + return CMD_SUCCESS; +} + +DEFUN (no_ipv6_nd_ra_fast_retrans, + no_ipv6_nd_ra_fast_retrans_cmd, + "no ipv6 nd ra-fast-retrans", + NO_STR + "Interface IPv6 config commands\n" + "Neighbor discovery\n" + "Fast retransmit of RA packets\n") +{ + VTY_DECLVAR_CONTEXT(interface, ifp); + struct zebra_if *zif = ifp->info; + + if (if_is_loopback(ifp) + || CHECK_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK)) { + vty_out(vty, + "Cannot configure IPv6 Router Advertisements on this interface\n"); + return CMD_WARNING_CONFIG_FAILED; + } + + zif->rtadv.UseFastRexmit = false; + + return CMD_SUCCESS; +} + DEFUN (ipv6_nd_suppress_ra, ipv6_nd_suppress_ra_cmd, "ipv6 nd suppress-ra", @@ -1954,6 +2022,10 @@ static int nd_dump_vty(struct vty *vty, struct interface *ifp) " ND router advertisements are sent every " "%d seconds\n", interval / 1000); + if (!rtadv->UseFastRexmit) + vty_out(vty, + " ND router advertisements do not use fast retransmit\n"); + if (rtadv->AdvDefaultLifetime != -1) vty_out(vty, " ND router advertisements live for %d seconds\n", @@ -2025,6 +2097,9 @@ static int rtadv_config_write(struct vty *vty, struct interface *ifp) if (zif->rtadv.AdvIntervalOption) vty_out(vty, " ipv6 nd adv-interval-option\n"); + if (!zif->rtadv.UseFastRexmit) + vty_out(vty, " no ipv6 nd ra-fast-retrans\n"); + if (zif->rtadv.AdvDefaultLifetime != -1) vty_out(vty, " ipv6 nd ra-lifetime %d\n", zif->rtadv.AdvDefaultLifetime); @@ -2173,6 +2248,8 @@ void rtadv_cmd_init(void) hook_register(zebra_if_extra_info, nd_dump_vty); hook_register(zebra_if_config_wr, rtadv_config_write); + install_element(INTERFACE_NODE, &ipv6_nd_ra_fast_retrans_cmd); + install_element(INTERFACE_NODE, &no_ipv6_nd_ra_fast_retrans_cmd); install_element(INTERFACE_NODE, &ipv6_nd_suppress_ra_cmd); install_element(INTERFACE_NODE, &no_ipv6_nd_suppress_ra_cmd); install_element(INTERFACE_NODE, &ipv6_nd_ra_interval_cmd); diff --git a/zebra/rtadv.h b/zebra/rtadv.h index d692ef2417..f9bd2b1d39 100644 --- a/zebra/rtadv.h +++ b/zebra/rtadv.h @@ -59,6 +59,11 @@ struct rtadv_prefix { #endif }; +/* RFC4861 minimum delay between RAs */ +#ifndef MIN_DELAY_BETWEEN_RAS +#define MIN_DELAY_BETWEEN_RAS 3000 +#endif + /* RFC4584 Extension to Sockets API for Mobile IPv6 */ #ifndef ND_OPT_ADV_INTERVAL diff --git a/zebra/subdir.am b/zebra/subdir.am index 28847ce09b..d0f32d6a14 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -38,6 +38,9 @@ man8 += $(MANBUILD)/zebra.8 endif zebra_zebra_LDADD = lib/libfrr.la $(LIBCAP) +if HAVE_PROTOBUF +zebra_zebra_LDADD += mlag/libmlag_pb.la $(PROTOBUF_C_LIBS) +endif zebra_zebra_SOURCES = \ zebra/connected.c \ zebra/debug.c \ @@ -66,6 +69,7 @@ zebra_zebra_SOURCES = \ zebra/rule_netlink.c \ zebra/rule_socket.c \ zebra/zebra_mlag.c \ + zebra/zebra_mlag_private.c \ zebra/zebra_l2.c \ zebra/zebra_memory.c \ zebra/zebra_dplane.c \ @@ -130,6 +134,7 @@ noinst_HEADERS += \ zebra/rtadv.h \ zebra/rule_netlink.h \ zebra/zebra_mlag.h \ + zebra/zebra_mlag_private.h \ zebra/zebra_fpm_private.h \ zebra/zebra_l2.h \ zebra/zebra_dplane.h \ diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index d6ade783cf..e809d2ad3d 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -2556,6 +2556,9 @@ void (*zserv_handlers[])(ZAPI_HANDLER_ARGS) = { [ZEBRA_IPTABLE_DELETE] = zread_iptable, [ZEBRA_VXLAN_FLOOD_CONTROL] = zebra_vxlan_flood_control, [ZEBRA_VXLAN_SG_REPLAY] = zebra_vxlan_sg_replay, + [ZEBRA_MLAG_CLIENT_REGISTER] = zebra_mlag_client_register, + [ZEBRA_MLAG_CLIENT_UNREGISTER] = zebra_mlag_client_unregister, + [ZEBRA_MLAG_FORWARD_MSG] = zebra_mlag_forward_client_msg, }; #if defined(HANDLE_ZAPI_FUZZING) diff --git a/zebra/zebra_mlag.c b/zebra/zebra_mlag.c index 5012cc2a49..1a911e429f 100644 --- a/zebra/zebra_mlag.c +++ b/zebra/zebra_mlag.c @@ -23,9 +23,13 @@ #include "command.h" #include "hook.h" +#include "frr_pthread.h" +#include "mlag.h" #include "zebra/zebra_mlag.h" +#include "zebra/zebra_mlag_private.h" #include "zebra/zebra_router.h" +#include "zebra/zebra_memory.h" #include "zebra/zapi_msg.h" #include "zebra/debug.h" @@ -33,6 +37,543 @@ #include "zebra/zebra_mlag_clippy.c" #endif +#define ZEBRA_MLAG_METADATA_LEN 4 +#define ZEBRA_MLAG_MSG_BCAST 0xFFFFFFFF + +uint8_t mlag_wr_buffer[ZEBRA_MLAG_BUF_LIMIT]; +uint8_t mlag_rd_buffer[ZEBRA_MLAG_BUF_LIMIT]; +uint32_t mlag_rd_buf_offset; + +static bool test_mlag_in_progress; + +static int zebra_mlag_signal_write_thread(void); +static int zebra_mlag_terminate_pthread(struct thread *event); +static int zebra_mlag_post_data_from_main_thread(struct thread *thread); +static void zebra_mlag_publish_process_state(struct zserv *client, + zebra_message_types_t msg_type); + +/**********************MLAG Interaction***************************************/ + +/* + * API to post the Registration to MLAGD + * MLAG will not process any messages with out the registration + */ +void zebra_mlag_send_register(void) +{ + struct stream *s = NULL; + + s = stream_new(sizeof(struct mlag_msg)); + + stream_putl(s, MLAG_REGISTER); + stream_putw(s, MLAG_MSG_NULL_PAYLOAD); + stream_putw(s, MLAG_MSG_NO_BATCH); + stream_fifo_push_safe(zrouter.mlag_info.mlag_fifo, s); + zebra_mlag_signal_write_thread(); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Enqueued MLAG Register to MLAG Thread ", + __func__); +} + +/* + * API to post the De-Registration to MLAGD + * MLAG will not process any messages after the de-registration + */ +void zebra_mlag_send_deregister(void) +{ + struct stream *s = NULL; + + s = stream_new(sizeof(struct mlag_msg)); + + stream_putl(s, MLAG_DEREGISTER); + stream_putw(s, MLAG_MSG_NULL_PAYLOAD); + stream_putw(s, MLAG_MSG_NO_BATCH); + stream_fifo_push_safe(zrouter.mlag_info.mlag_fifo, s); + zebra_mlag_signal_write_thread(); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Enqueued MLAG De-Register to MLAG Thread ", + __func__); +} + +/* + * API To handle MLAG Received data + * Decodes the data using protobuf and enqueue to main thread + * main thread publish this to clients based on client subscription + */ +void zebra_mlag_process_mlag_data(uint8_t *data, uint32_t len) +{ + struct stream *s = NULL; + struct stream *s1 = NULL; + int msg_type = 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + msg_type = zebra_mlag_protobuf_decode_message(s, data, len); + + if (msg_type <= 0) { + /* Something went wrong in decoding */ + stream_free(s); + zlog_err("%s: failed to process mlag data-%d, %u", __func__, + msg_type, len); + return; + } + + /* + * additional four bytes are for message type + */ + s1 = stream_new(stream_get_endp(s) + ZEBRA_MLAG_METADATA_LEN); + stream_putl(s1, msg_type); + stream_put(s1, s->data, stream_get_endp(s)); + thread_add_event(zrouter.master, zebra_mlag_post_data_from_main_thread, + s1, 0, NULL); + stream_free(s); +} + +/**********************End of MLAG Interaction********************************/ + +/************************MLAG Thread Processing*******************************/ + +/* + * after posting every 'ZEBRA_MLAG_POST_LIMIT' packets, MLAG Thread will be + * yielded to give CPU for other threads + */ +#define ZEBRA_MLAG_POST_LIMIT 100 + +/* + * This thread reads the clients data from the Global queue and encodes with + * protobuf and pass on to the MLAG socket. + */ +static int zebra_mlag_client_msg_handler(struct thread *event) +{ + struct stream *s; + uint32_t wr_count = 0; + uint32_t msg_type = 0; + uint32_t max_count = 0; + int len = 0; + + wr_count = stream_fifo_count_safe(zrouter.mlag_info.mlag_fifo); + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug(":%s: Processing MLAG write, %u messages in queue", + __func__, wr_count); + + max_count = MIN(wr_count, ZEBRA_MLAG_POST_LIMIT); + + for (wr_count = 0; wr_count < max_count; wr_count++) { + s = stream_fifo_pop_safe(zrouter.mlag_info.mlag_fifo); + if (!s) { + zlog_debug(":%s: Got a NULL Messages, some thing wrong", + __func__); + break; + } + + /* + * Encode the data now + */ + len = zebra_mlag_protobuf_encode_client_data(s, &msg_type); + + /* + * write to MCLAGD + */ + if (len > 0) { + zebra_mlag_private_write_data(mlag_wr_buffer, len); + + /* + * If message type is De-register, send a signal to main + * thread, so that necessary cleanup will be done by + * main thread. + */ + if (msg_type == MLAG_DEREGISTER) { + thread_add_event(zrouter.master, + zebra_mlag_terminate_pthread, + NULL, 0, NULL); + } + } + + stream_free(s); + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug(":%s: Posted %d messages to MLAGD", __func__, + wr_count); + /* + * Currently there is only message write task is enqueued to this + * thread, yielding was added for future purpose, so that this thread + * can server other tasks also and in case FIFO is empty, this task will + * be schedule when main thread adds some messages + */ + if (wr_count >= ZEBRA_MLAG_POST_LIMIT) + zebra_mlag_signal_write_thread(); + return 0; +} + +/* + * API to handle the process state. + * In case of Down, Zebra keep monitoring the MLAG state. + * all the state Notifications will be published to clients + */ +void zebra_mlag_handle_process_state(enum zebra_mlag_state state) +{ + if (state == MLAG_UP) { + zrouter.mlag_info.connected = true; + zebra_mlag_publish_process_state(NULL, ZEBRA_MLAG_PROCESS_UP); + zebra_mlag_send_register(); + } else if (state == MLAG_DOWN) { + zrouter.mlag_info.connected = false; + zebra_mlag_publish_process_state(NULL, ZEBRA_MLAG_PROCESS_DOWN); + zebra_mlag_private_monitor_state(); + } +} + +/***********************End of MLAG Thread processing*************************/ + +/*************************Multi-entratnt Api's********************************/ + +/* + * Provider api to signal that work/events are available + * for the Zebra MLAG Write pthread. + * This API is called from 2 pthreads.. + * 1) by main thread when client posts a MLAG Message + * 2) by MLAG Thread, in case of yield + * though this api, is called from two threads we don't need any locking + * because Thread task enqueue is thread safe means internally it had + * necessary protection + */ +static int zebra_mlag_signal_write_thread(void) +{ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug(":%s: Scheduling MLAG write", __func__); + /* + * This api will be called from Both main & MLAG Threads. + * main thread writes, "zrouter.mlag_info.th_master" only + * during Zebra Init/after MLAG thread is destroyed. + * so it is safe to use without any locking + */ + thread_add_event(zrouter.mlag_info.th_master, + zebra_mlag_client_msg_handler, NULL, 0, + &zrouter.mlag_info.t_write); + return 0; +} + +/* + * API will be used to publish the MLAG state to interested clients + * In case client is passed, state is posted only for that client, + * otherwise to all interested clients + * this api can be called from two threads. + * 1) from main thread: when client is passed + * 2) from MLAG Thread: when client is NULL + * + * In second case, to avoid global data access data will be post to Main + * thread, so that actual posting to clients will happen from Main thread. + */ +static void zebra_mlag_publish_process_state(struct zserv *client, + zebra_message_types_t msg_type) +{ + struct stream *s; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Publishing MLAG process state:%s to %s Client", + __func__, + (msg_type == ZEBRA_MLAG_PROCESS_UP) ? "UP" : "DOWN", + (client) ? "one" : "all"); + + if (client) { + s = stream_new(ZEBRA_HEADER_SIZE); + zclient_create_header(s, msg_type, VRF_DEFAULT); + zserv_send_message(client, s); + return; + } + + + /* + * additional four bytes are for mesasge type + */ + s = stream_new(ZEBRA_HEADER_SIZE + ZEBRA_MLAG_METADATA_LEN); + stream_putl(s, ZEBRA_MLAG_MSG_BCAST); + zclient_create_header(s, msg_type, VRF_DEFAULT); + thread_add_event(zrouter.master, zebra_mlag_post_data_from_main_thread, + s, 0, NULL); +} + +/**************************End of Multi-entrant Apis**************************/ + +/***********************Zebra Main thread processing**************************/ + +/* + * To avoid data corruption, messages will be post to clients only from + * main thread, because for that access was needed for clients list. + * so instead of forcing the locks, messages will be posted from main thread. + */ +static int zebra_mlag_post_data_from_main_thread(struct thread *thread) +{ + struct stream *s = THREAD_ARG(thread); + struct stream *zebra_s = NULL; + struct listnode *node; + struct zserv *client; + uint32_t msg_type = 0; + uint32_t msg_len = 0; + + if (!s) + return -1; + + STREAM_GETL(s, msg_type); + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: Posting MLAG data for msg_type:0x%x to interested cleints", + __func__, msg_type); + + msg_len = s->endp - ZEBRA_MLAG_METADATA_LEN; + for (ALL_LIST_ELEMENTS_RO(zrouter.client_list, node, client)) { + if (client->mlag_updates_interested == true) { + if (msg_type != ZEBRA_MLAG_MSG_BCAST + && !CHECK_FLAG(client->mlag_reg_mask1, + (1 << msg_type))) { + continue; + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: Posting MLAG data of length-%d to client:%d ", + __func__, msg_len, client->proto); + + zebra_s = stream_new(msg_len); + STREAM_GET(zebra_s->data, s, msg_len); + zebra_s->endp = msg_len; + stream_putw_at(zebra_s, 0, msg_len); + + /* + * This stream will be enqueued to client_obuf, it will + * be freed after posting to client socket. + */ + zserv_send_message(client, zebra_s); + zebra_s = NULL; + } + } + + stream_free(s); + return 0; +stream_failure: + stream_free(s); + if (zebra_s) + stream_free(zebra_s); + return 0; +} + +/* + * Start the MLAG Thread, this will be used to write client data on to + * MLAG Process and to read the data from MLAG and post to cleints. + * when all clients are un-registered, this Thread will be + * suspended. + */ +static void zebra_mlag_spawn_pthread(void) +{ + /* Start MLAG write pthread */ + + struct frr_pthread_attr pattr = {.start = + frr_pthread_attr_default.start, + .stop = frr_pthread_attr_default.stop}; + + zrouter.mlag_info.zebra_pth_mlag = + frr_pthread_new(&pattr, "Zebra MLAG thread", "Zebra MLAG"); + + zrouter.mlag_info.th_master = zrouter.mlag_info.zebra_pth_mlag->master; + + + /* Enqueue an initial event to the Newly spawn MLAG pthread */ + zebra_mlag_signal_write_thread(); + + frr_pthread_run(zrouter.mlag_info.zebra_pth_mlag, NULL); +} + +/* + * all clients are un-registered for MLAG Updates, terminate the + * MLAG write thread + */ +static int zebra_mlag_terminate_pthread(struct thread *event) +{ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Zebra MLAG write thread terminate called"); + + if (zrouter.mlag_info.clients_interested_cnt) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Zebra MLAG: still some clients are interested"); + return 0; + } + + frr_pthread_stop(zrouter.mlag_info.zebra_pth_mlag, NULL); + + /* Destroy pthread */ + frr_pthread_destroy(zrouter.mlag_info.zebra_pth_mlag); + zrouter.mlag_info.zebra_pth_mlag = NULL; + zrouter.mlag_info.th_master = NULL; + zrouter.mlag_info.t_read = NULL; + zrouter.mlag_info.t_write = NULL; + + /* + * Send Notification to clean private data + */ + zebra_mlag_private_cleanup_data(); + return 0; +} + +/* + * API to register zebra client for MLAG Updates + */ +void zebra_mlag_client_register(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + uint32_t reg_mask = 0; + int rc = 0; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Received MLAG Registration from client-proto:%d", + client->proto); + + + /* Get input stream. */ + s = msg; + + /* Get data. */ + STREAM_GETL(s, reg_mask); + + if (client->mlag_updates_interested == true) { + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Client is registered, existing mask: 0x%x, new mask: 0x%x", + client->mlag_reg_mask1, reg_mask); + if (client->mlag_reg_mask1 != reg_mask) + client->mlag_reg_mask1 = reg_mask; + /* + * Client might missed MLAG-UP Notification, post-it again + */ + zebra_mlag_publish_process_state(client, ZEBRA_MLAG_PROCESS_UP); + return; + } + + + client->mlag_updates_interested = true; + client->mlag_reg_mask1 = reg_mask; + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Registering for MLAG Updates with mask: 0x%x, ", + client->mlag_reg_mask1); + + zrouter.mlag_info.clients_interested_cnt++; + + if (zrouter.mlag_info.clients_interested_cnt == 1) { + /* + * First-client for MLAG Updates,open the communication channel + * with MLAG + */ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "First client, opening the channel with MLAG"); + + zebra_mlag_spawn_pthread(); + rc = zebra_mlag_private_open_channel(); + if (rc < 0) { + /* + * For some reason, zebra not able to open the + * comm-channel with MLAG, so post MLAG-DOWN to client. + * later when the channel is open, zebra will send + * MLAG-UP + */ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Fail to open channel with MLAG,rc:%d, post Proto-down", + rc); + zebra_mlag_publish_process_state( + client, ZEBRA_MLAG_PROCESS_DOWN); + } + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Client Registered successfully for MLAG Updates"); + + if (zrouter.mlag_info.connected == true) + zebra_mlag_publish_process_state(client, ZEBRA_MLAG_PROCESS_UP); +stream_failure: + return; +} + +/* + * API to un-register for MLAG Updates + */ +void zebra_mlag_client_unregister(ZAPI_HANDLER_ARGS) +{ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Received MLAG De-Registration from client-proto:%d", + client->proto); + + if (client->mlag_updates_interested == false) + /* Unexpected */ + return; + + client->mlag_updates_interested = false; + client->mlag_reg_mask1 = 0; + zrouter.mlag_info.clients_interested_cnt--; + + if (zrouter.mlag_info.clients_interested_cnt == 0) { + /* + * No-client is interested for MLAG Updates,close the + * communication channel with MLAG + */ + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Last client for MLAG, close the channel "); + + /* + * Clean up flow: + * ============= + * 1) main thread calls socket close which posts De-register + * to MLAG write thread + * 2) after MLAG write thread posts De-register it sends a + * signal back to main thread to do the thread cleanup + * this was mainly to make sure De-register is posted to MCLAGD. + */ + zebra_mlag_private_close_channel(); + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Client De-Registered successfully for MLAG Updates"); +} + +/* + * Does following things. + * 1) allocated new local stream, and copies the client data and enqueue + * to MLAG Thread + * 2) MLAG Thread after dequeing, encode the client data using protobuf + * and write on to MLAG + */ +void zebra_mlag_forward_client_msg(ZAPI_HANDLER_ARGS) +{ + struct stream *zebra_s; + struct stream *mlag_s; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("Received Client MLAG Data from client-proto:%d", + client->proto); + + /* Get input stream. */ + zebra_s = msg; + mlag_s = stream_new(zebra_s->endp); + + /* + * Client data is | Zebra Header + MLAG Data | + * we need to enqueue only the MLAG data, skipping Zebra Header + */ + stream_put(mlag_s, zebra_s->data + zebra_s->getp, + STREAM_READABLE(zebra_s)); + stream_fifo_push_safe(zrouter.mlag_info.mlag_fifo, mlag_s); + zebra_mlag_signal_write_thread(); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Enqueued Client:%d data to MLAG Thread ", + __func__, client->proto); +} + +/***********************End of Zebra Main thread processing*************/ + enum mlag_role zebra_mlag_get_role(void) { return zrouter.mlag_info.role; @@ -45,7 +586,7 @@ DEFUN_HIDDEN (show_mlag, ZEBRA_STR "The mlag role on this machine\n") { - char buf[80]; + char buf[MLAG_ROLE_STRSIZE]; vty_out(vty, "MLag is configured to: %s\n", mlag_role2str(zrouter.mlag_info.role, buf, sizeof(buf))); @@ -53,18 +594,17 @@ DEFUN_HIDDEN (show_mlag, return CMD_SUCCESS; } -DEFPY_HIDDEN (test_mlag, - test_mlag_cmd, - "test zebra mlag <none$none|primary$primary|secondary$secondary>", - "Test code\n" - ZEBRA_STR - "Modify the Mlag state\n" - "Mlag is not setup on the machine\n" - "Mlag is setup to be primary\n" - "Mlag is setup to be the secondary\n") +DEFPY_HIDDEN(test_mlag, test_mlag_cmd, + "test zebra mlag <none$none|primary$primary|secondary$secondary>", + "Test code\n" + ZEBRA_STR + "Modify the Mlag state\n" + "Mlag is not setup on the machine\n" + "Mlag is setup to be primary\n" + "Mlag is setup to be the secondary\n") { enum mlag_role orig = zrouter.mlag_info.role; - char buf1[80], buf2[80]; + char buf1[MLAG_ROLE_STRSIZE], buf2[MLAG_ROLE_STRSIZE]; if (none) zrouter.mlag_info.role = MLAG_ROLE_NONE; @@ -78,8 +618,25 @@ DEFPY_HIDDEN (test_mlag, mlag_role2str(orig, buf1, sizeof(buf1)), mlag_role2str(orig, buf2, sizeof(buf2))); - if (orig != zrouter.mlag_info.role) + if (orig != zrouter.mlag_info.role) { zsend_capabilities_all_clients(); + if (zrouter.mlag_info.role != MLAG_ROLE_NONE) { + if (zrouter.mlag_info.clients_interested_cnt == 0 + && test_mlag_in_progress == false) { + if (zrouter.mlag_info.zebra_pth_mlag == NULL) + zebra_mlag_spawn_pthread(); + zrouter.mlag_info.clients_interested_cnt++; + test_mlag_in_progress = true; + zebra_mlag_private_open_channel(); + } + } else { + if (test_mlag_in_progress == true) { + test_mlag_in_progress = false; + zrouter.mlag_info.clients_interested_cnt--; + zebra_mlag_private_close_channel(); + } + } + } return CMD_SUCCESS; } @@ -88,8 +645,539 @@ void zebra_mlag_init(void) { install_element(VIEW_NODE, &show_mlag_cmd); install_element(ENABLE_NODE, &test_mlag_cmd); + + /* + * Intialiaze the MLAG Global variables + * write thread will be created during actual registration with MCLAG + */ + zrouter.mlag_info.clients_interested_cnt = 0; + zrouter.mlag_info.connected = false; + zrouter.mlag_info.timer_running = false; + zrouter.mlag_info.mlag_fifo = stream_fifo_new(); + zrouter.mlag_info.zebra_pth_mlag = NULL; + zrouter.mlag_info.th_master = NULL; + zrouter.mlag_info.t_read = NULL; + zrouter.mlag_info.t_write = NULL; + test_mlag_in_progress = false; + zebra_mlag_reset_read_buffer(); } void zebra_mlag_terminate(void) { } + + +/* + * + * ProtoBuf Encoding APIs + */ + +#ifdef HAVE_PROTOBUF + +DEFINE_MTYPE_STATIC(ZEBRA, MLAG_PBUF, "ZEBRA MLAG PROTOBUF") + +int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) +{ + ZebraMlagHeader hdr = ZEBRA_MLAG__HEADER__INIT; + struct mlag_msg mlag_msg; + uint8_t tmp_buf[ZEBRA_MLAG_BUF_LIMIT]; + int len = 0; + int n_len = 0; + int rc = 0; + char buf[ZLOG_FILTER_LENGTH_MAX]; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Entering..", __func__); + + rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg); + if (rc) + return rc; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Mlag ProtoBuf encoding of message:%s, len:%d", + __func__, + mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, + sizeof(buf)), + mlag_msg.data_len); + *msg_type = mlag_msg.msg_type; + switch (mlag_msg.msg_type) { + case MLAG_MROUTE_ADD: { + struct mlag_mroute_add msg; + ZebraMlagMrouteAdd pay_load = ZEBRA_MLAG_MROUTE_ADD__INIT; + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_add(s, &msg); + if (rc) + return rc; + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load.vrf_name = XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.vrf_name, msg.vrf_name, vrf_name_len); + pay_load.source_ip = msg.source_ip; + pay_load.group_ip = msg.group_ip; + pay_load.cost_to_rp = msg.cost_to_rp; + pay_load.owner_id = msg.owner_id; + pay_load.am_i_dr = msg.am_i_dr; + pay_load.am_i_dual_active = msg.am_i_dual_active; + pay_load.vrf_id = msg.vrf_id; + + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load.intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.intf_name, msg.intf_name, + vrf_name_len); + } + + len = zebra_mlag_mroute_add__pack(&pay_load, tmp_buf); + XFREE(MTYPE_MLAG_PBUF, pay_load.vrf_name); + if (msg.owner_id == MLAG_OWNER_INTERFACE) + XFREE(MTYPE_MLAG_PBUF, pay_load.intf_name); + } break; + case MLAG_MROUTE_DEL: { + struct mlag_mroute_del msg; + ZebraMlagMrouteDel pay_load = ZEBRA_MLAG_MROUTE_DEL__INIT; + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_del(s, &msg); + if (rc) + return rc; + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load.vrf_name = XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.vrf_name, msg.vrf_name, vrf_name_len); + pay_load.source_ip = msg.source_ip; + pay_load.group_ip = msg.group_ip; + pay_load.owner_id = msg.owner_id; + pay_load.vrf_id = msg.vrf_id; + + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load.intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load.intf_name, msg.intf_name, + vrf_name_len); + } + + len = zebra_mlag_mroute_del__pack(&pay_load, tmp_buf); + XFREE(MTYPE_MLAG_PBUF, pay_load.vrf_name); + if (msg.owner_id == MLAG_OWNER_INTERFACE) + XFREE(MTYPE_MLAG_PBUF, pay_load.intf_name); + } break; + case MLAG_MROUTE_ADD_BULK: { + struct mlag_mroute_add msg; + ZebraMlagMrouteAddBulk Bulk_msg = + ZEBRA_MLAG_MROUTE_ADD_BULK__INIT; + ZebraMlagMrouteAdd **pay_load = NULL; + int i; + bool cleanup = false; + + Bulk_msg.n_mroute_add = mlag_msg.msg_cnt; + pay_load = XMALLOC(MTYPE_MLAG_PBUF, sizeof(ZebraMlagMrouteAdd *) + * mlag_msg.msg_cnt); + + for (i = 0; i < mlag_msg.msg_cnt; i++) { + + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_add(s, &msg); + if (rc) { + cleanup = true; + break; + } + pay_load[i] = XMALLOC(MTYPE_MLAG_PBUF, + sizeof(ZebraMlagMrouteAdd)); + zebra_mlag_mroute_add__init(pay_load[i]); + + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load[i]->vrf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + strlcpy(pay_load[i]->vrf_name, msg.vrf_name, + vrf_name_len); + pay_load[i]->source_ip = msg.source_ip; + pay_load[i]->group_ip = msg.group_ip; + pay_load[i]->cost_to_rp = msg.cost_to_rp; + pay_load[i]->owner_id = msg.owner_id; + pay_load[i]->am_i_dr = msg.am_i_dr; + pay_load[i]->am_i_dual_active = msg.am_i_dual_active; + pay_load[i]->vrf_id = msg.vrf_id; + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load[i]->intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + + strlcpy(pay_load[i]->intf_name, msg.intf_name, + vrf_name_len); + } + } + if (cleanup == false) { + Bulk_msg.mroute_add = pay_load; + len = zebra_mlag_mroute_add_bulk__pack(&Bulk_msg, + tmp_buf); + } + + for (i = 0; i < mlag_msg.msg_cnt; i++) { + if (pay_load[i]->vrf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); + if (pay_load[i]->owner_id == MLAG_OWNER_INTERFACE + && pay_load[i]->intf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->intf_name); + if (pay_load[i]) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]); + } + XFREE(MTYPE_MLAG_PBUF, pay_load); + if (cleanup == true) + return -1; + } break; + case MLAG_MROUTE_DEL_BULK: { + struct mlag_mroute_del msg; + ZebraMlagMrouteDelBulk Bulk_msg = + ZEBRA_MLAG_MROUTE_DEL_BULK__INIT; + ZebraMlagMrouteDel **pay_load = NULL; + int i; + bool cleanup = false; + + Bulk_msg.n_mroute_del = mlag_msg.msg_cnt; + pay_load = XMALLOC(MTYPE_MLAG_PBUF, sizeof(ZebraMlagMrouteDel *) + * mlag_msg.msg_cnt); + + for (i = 0; i < mlag_msg.msg_cnt; i++) { + + uint32_t vrf_name_len = 0; + + rc = mlag_lib_decode_mroute_del(s, &msg); + if (rc) { + cleanup = true; + break; + } + + pay_load[i] = XMALLOC(MTYPE_MLAG_PBUF, + sizeof(ZebraMlagMrouteDel)); + zebra_mlag_mroute_del__init(pay_load[i]); + + vrf_name_len = strlen(msg.vrf_name) + 1; + pay_load[i]->vrf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + + strlcpy(pay_load[i]->vrf_name, msg.vrf_name, + vrf_name_len); + pay_load[i]->source_ip = msg.source_ip; + pay_load[i]->group_ip = msg.group_ip; + pay_load[i]->owner_id = msg.owner_id; + pay_load[i]->vrf_id = msg.vrf_id; + if (msg.owner_id == MLAG_OWNER_INTERFACE) { + vrf_name_len = strlen(msg.intf_name) + 1; + pay_load[i]->intf_name = + XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); + + strlcpy(pay_load[i]->intf_name, msg.intf_name, + vrf_name_len); + } + } + if (!cleanup) { + Bulk_msg.mroute_del = pay_load; + len = zebra_mlag_mroute_del_bulk__pack(&Bulk_msg, + tmp_buf); + } + + for (i = 0; i < mlag_msg.msg_cnt; i++) { + if (pay_load[i]->vrf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); + if (pay_load[i]->owner_id == MLAG_OWNER_INTERFACE + && pay_load[i]->intf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->intf_name); + if (pay_load[i]) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]); + } + XFREE(MTYPE_MLAG_PBUF, pay_load); + if (cleanup) + return -1; + } break; + default: + break; + } + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: length of Mlag ProtoBuf encoded message:%s, %d", + __func__, + mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, + sizeof(buf)), + len); + hdr.type = (ZebraMlagHeader__MessageType)mlag_msg.msg_type; + if (len != 0) { + hdr.data.len = len; + hdr.data.data = XMALLOC(MTYPE_MLAG_PBUF, len); + memcpy(hdr.data.data, tmp_buf, len); + } + + /* + * ProtoBuf Infra will not support to demarc the pointers whem multiple + * messages are posted inside a single Buffer. + * 2 -solutions exist to solve this + * 1. add Unenoced length at the beginning of every message, this will + * be used to point to next message in the buffer + * 2. another solution is defining all messages insides another message + * But this will permit only 32 messages. this can be extended with + * multiple levels. + * for simplicity we are going with solution-1. + */ + len = zebra_mlag__header__pack(&hdr, + (mlag_wr_buffer + ZEBRA_MLAG_LEN_SIZE)); + n_len = htonl(len); + memcpy(mlag_wr_buffer, &n_len, ZEBRA_MLAG_LEN_SIZE); + len += ZEBRA_MLAG_LEN_SIZE; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: length of Mlag ProtoBuf message:%s with Header %d", + __func__, + mlag_lib_msgid_to_str(mlag_msg.msg_type, buf, + sizeof(buf)), + len); + if (hdr.data.data) + XFREE(MTYPE_MLAG_PBUF, hdr.data.data); + + return len; +} + +int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, + uint32_t len) +{ + uint32_t msg_type; + ZebraMlagHeader *hdr; + char buf[80]; + + hdr = zebra_mlag__header__unpack(NULL, len, data); + if (hdr == NULL) + return -1; + + /* + * ADD The MLAG Header + */ + zclient_create_header(s, ZEBRA_MLAG_FORWARD_MSG, VRF_DEFAULT); + + msg_type = hdr->type; + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Mlag ProtoBuf decoding of message:%s", __func__, + mlag_lib_msgid_to_str(msg_type, buf, 80)); + + /* + * Internal MLAG Message-types & MLAG.proto message types should + * always match, otherwise there can be decoding errors + * To avoid exposing clients with Protobuf flags, using internal + * message-types + */ + stream_putl(s, hdr->type); + + if (hdr->data.len == 0) { + /* NULL Payload */ + stream_putw(s, MLAG_MSG_NULL_PAYLOAD); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + } else { + switch (msg_type) { + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_STATUS_UPDATE: { + ZebraMlagStatusUpdate *msg = NULL; + + msg = zebra_mlag_status_update__unpack( + NULL, hdr->data.len, hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_status)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + stream_put(s, msg->peerlink, INTERFACE_NAMSIZ); + stream_putl(s, msg->my_role); + stream_putl(s, msg->peer_state); + zebra_mlag_status_update__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_VXLAN_UPDATE: { + ZebraMlagVxlanUpdate *msg = NULL; + + msg = zebra_mlag_vxlan_update__unpack( + NULL, hdr->data.len, hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_vxlan)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + stream_putl(s, msg->anycast_ip); + stream_putl(s, msg->local_ip); + zebra_mlag_vxlan_update__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_ADD: { + ZebraMlagMrouteAdd *msg = NULL; + + msg = zebra_mlag_mroute_add__unpack(NULL, hdr->data.len, + hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_mroute_add)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + stream_put(s, msg->vrf_name, VRF_NAMSIZ); + + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->cost_to_rp); + stream_putl(s, msg->owner_id); + stream_putc(s, msg->am_i_dr); + stream_putc(s, msg->am_i_dual_active); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + stream_put(s, msg->intf_name, INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + zebra_mlag_mroute_add__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_DEL: { + ZebraMlagMrouteDel *msg = NULL; + + msg = zebra_mlag_mroute_del__unpack(NULL, hdr->data.len, + hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_mroute_del)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + stream_put(s, msg->vrf_name, VRF_NAMSIZ); + + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->owner_id); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + stream_put(s, msg->intf_name, INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + zebra_mlag_mroute_del__free_unpacked(msg, NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_ADD_BULK: { + ZebraMlagMrouteAddBulk *Bulk_msg = NULL; + ZebraMlagMrouteAdd *msg = NULL; + size_t i; + + Bulk_msg = zebra_mlag_mroute_add_bulk__unpack( + NULL, hdr->data.len, hdr->data.data); + if (Bulk_msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, (Bulk_msg->n_mroute_add + * sizeof(struct mlag_mroute_add))); + /* No. of msgs in Batch */ + stream_putw(s, Bulk_msg->n_mroute_add); + + /* Actual Data */ + for (i = 0; i < Bulk_msg->n_mroute_add; i++) { + + msg = Bulk_msg->mroute_add[i]; + + stream_put(s, msg->vrf_name, VRF_NAMSIZ); + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->cost_to_rp); + stream_putl(s, msg->owner_id); + stream_putc(s, msg->am_i_dr); + stream_putc(s, msg->am_i_dual_active); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + stream_put(s, msg->intf_name, + INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + } + zebra_mlag_mroute_add_bulk__free_unpacked(Bulk_msg, + NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_MROUTE_DEL_BULK: { + ZebraMlagMrouteDelBulk *Bulk_msg = NULL; + ZebraMlagMrouteDel *msg = NULL; + size_t i; + + Bulk_msg = zebra_mlag_mroute_del_bulk__unpack( + NULL, hdr->data.len, hdr->data.data); + if (Bulk_msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, (Bulk_msg->n_mroute_del + * sizeof(struct mlag_mroute_del))); + /* No. of msgs in Batch */ + stream_putw(s, Bulk_msg->n_mroute_del); + + /* Actual Data */ + for (i = 0; i < Bulk_msg->n_mroute_del; i++) { + + msg = Bulk_msg->mroute_del[i]; + + stream_put(s, msg->vrf_name, VRF_NAMSIZ); + stream_putl(s, msg->source_ip); + stream_putl(s, msg->group_ip); + stream_putl(s, msg->owner_id); + stream_putl(s, msg->vrf_id); + if (msg->owner_id == MLAG_OWNER_INTERFACE) + stream_put(s, msg->intf_name, + INTERFACE_NAMSIZ); + else + stream_put(s, NULL, INTERFACE_NAMSIZ); + } + zebra_mlag_mroute_del_bulk__free_unpacked(Bulk_msg, + NULL); + } break; + case ZEBRA_MLAG__HEADER__MESSAGE_TYPE__ZEBRA_MLAG_ZEBRA_STATUS_UPDATE: { + ZebraMlagZebraStatusUpdate *msg = NULL; + + msg = zebra_mlag_zebra_status_update__unpack( + NULL, hdr->data.len, hdr->data.data); + if (msg == NULL) { + zebra_mlag__header__free_unpacked(hdr, NULL); + return -1; + } + /* Payload len */ + stream_putw(s, sizeof(struct mlag_frr_status)); + /* No Batching */ + stream_putw(s, MLAG_MSG_NO_BATCH); + /* Actual Data */ + stream_putl(s, msg->peer_frrstate); + zebra_mlag_zebra_status_update__free_unpacked(msg, + NULL); + } break; + default: + break; + } + } + zebra_mlag__header__free_unpacked(hdr, NULL); + return msg_type; +} + +#else +int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) +{ + return 0; +} + +int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, + uint32_t len) +{ + return 0; +} +#endif diff --git a/zebra/zebra_mlag.h b/zebra/zebra_mlag.h index 90a5a41fa4..6f7ef8319f 100644 --- a/zebra/zebra_mlag.h +++ b/zebra/zebra_mlag.h @@ -23,18 +23,45 @@ #define __ZEBRA_MLAG_H__ #include "mlag.h" +#include "zclient.h" +#include "zebra/zserv.h" -#ifdef __cplusplus -extern "C" { +#ifdef HAVE_PROTOBUF +#include "mlag/mlag.pb-c.h" #endif -void zebra_mlag_init(void); -void zebra_mlag_terminate(void); +#define ZEBRA_MLAG_BUF_LIMIT 2048 +#define ZEBRA_MLAG_LEN_SIZE 4 -enum mlag_role zebra_mlag_get_role(void); +extern uint8_t mlag_wr_buffer[ZEBRA_MLAG_BUF_LIMIT]; +extern uint8_t mlag_rd_buffer[ZEBRA_MLAG_BUF_LIMIT]; +extern uint32_t mlag_rd_buf_offset; -#ifdef __cplusplus +static inline void zebra_mlag_reset_read_buffer(void) +{ + mlag_rd_buf_offset = 0; } -#endif +enum zebra_mlag_state { + MLAG_UP = 1, + MLAG_DOWN = 2, +}; + +void zebra_mlag_init(void); +void zebra_mlag_terminate(void); +enum mlag_role zebra_mlag_get_role(void); +void zebra_mlag_client_register(ZAPI_HANDLER_ARGS); +void zebra_mlag_client_unregister(ZAPI_HANDLER_ARGS); +void zebra_mlag_forward_client_msg(ZAPI_HANDLER_ARGS); +void zebra_mlag_send_register(void); +void zebra_mlag_send_deregister(void); +void zebra_mlag_handle_process_state(enum zebra_mlag_state state); +void zebra_mlag_process_mlag_data(uint8_t *data, uint32_t len); +/* + * ProtoBuffer Api's + */ +int zebra_mlag_protobuf_encode_client_data(struct stream *s, + uint32_t *msg_type); +int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, + uint32_t len); #endif diff --git a/zebra/zebra_mlag_private.c b/zebra/zebra_mlag_private.c new file mode 100644 index 0000000000..4df7b6dd11 --- /dev/null +++ b/zebra/zebra_mlag_private.c @@ -0,0 +1,299 @@ +/* + * This is an implementation of MLAG Functionality + * + * Module name: Zebra MLAG + * + * Author: sathesh Kumar karra <sathk@cumulusnetworks.com> + * + * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "zebra.h" + +#include "hook.h" +#include "module.h" +#include "thread.h" +#include "frr_pthread.h" +#include "libfrr.h" +#include "version.h" +#include "network.h" + +#include "lib/stream.h" + +#include "zebra/debug.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_mlag.h" +#include "zebra/zebra_mlag_private.h" + +#include <sys/un.h> + + +/* + * This file will have platform specific apis to communicate with MCLAG. + * + */ + +#ifdef HAVE_CUMULUS + +static struct thread_master *zmlag_master; +static int mlag_socket; + +static int zebra_mlag_connect(struct thread *thread); +static int zebra_mlag_read(struct thread *thread); + +/* + * Write the data to MLAGD + */ +int zebra_mlag_private_write_data(uint8_t *data, uint32_t len) +{ + int rc = 0; + + if (IS_ZEBRA_DEBUG_MLAG) { + zlog_debug("%s: Writing %d length Data to clag", __func__, len); + zlog_hexdump(data, len); + } + rc = write(mlag_socket, data, len); + return rc; +} + +static void zebra_mlag_sched_read(void) +{ + thread_add_read(zmlag_master, zebra_mlag_read, NULL, mlag_socket, + &zrouter.mlag_info.t_read); +} + +static int zebra_mlag_read(struct thread *thread) +{ + uint32_t *msglen; + uint32_t h_msglen; + uint32_t tot_len, curr_len = mlag_rd_buf_offset; + + /* + * Received message in sock_stream looks like below + * | len-1 (4 Bytes) | payload-1 (len-1) | + * len-2 (4 Bytes) | payload-2 (len-2) | .. + * + * Idea is read one message completely, then process, until message is + * read completely, keep on reading from the socket + */ + if (curr_len < ZEBRA_MLAG_LEN_SIZE) { + ssize_t data_len; + + data_len = read(mlag_socket, mlag_rd_buffer + curr_len, + ZEBRA_MLAG_LEN_SIZE - curr_len); + if (data_len == 0 || data_len == -1) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("MLAG connection closed socket : %d", + mlag_socket); + close(mlag_socket); + zebra_mlag_handle_process_state(MLAG_DOWN); + return -1; + } + mlag_rd_buf_offset += data_len; + if (data_len != (ssize_t)ZEBRA_MLAG_LEN_SIZE - curr_len) { + /* Try again later */ + zebra_mlag_sched_read(); + return 0; + } + curr_len = ZEBRA_MLAG_LEN_SIZE; + } + + /* Get the actual packet length */ + msglen = (uint32_t *)mlag_rd_buffer; + h_msglen = ntohl(*msglen); + + /* This will be the actual length of the packet */ + tot_len = h_msglen + ZEBRA_MLAG_LEN_SIZE; + + if (curr_len < tot_len) { + ssize_t data_len; + + data_len = read(mlag_socket, mlag_rd_buffer + curr_len, + tot_len - curr_len); + if (data_len == 0 || data_len == -1) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("MLAG connection closed socket : %d", + mlag_socket); + close(mlag_socket); + zebra_mlag_handle_process_state(MLAG_DOWN); + return -1; + } + mlag_rd_buf_offset += data_len; + if (data_len != (ssize_t)tot_len - curr_len) { + /* Try again later */ + zebra_mlag_sched_read(); + return 0; + } + } + + if (IS_ZEBRA_DEBUG_MLAG) { + zlog_debug("Received a MLAG Message from socket: %d, len:%u ", + mlag_socket, tot_len); + zlog_hexdump(mlag_rd_buffer, tot_len); + } + + tot_len -= ZEBRA_MLAG_LEN_SIZE; + + /* Process the packet */ + zebra_mlag_process_mlag_data(mlag_rd_buffer + ZEBRA_MLAG_LEN_SIZE, + tot_len); + + /* Register read thread. */ + zebra_mlag_reset_read_buffer(); + zebra_mlag_sched_read(); + return 0; +} + +static int zebra_mlag_connect(struct thread *thread) +{ + struct sockaddr_un svr = {0}; + struct ucred ucred; + socklen_t len = 0; + + /* Reset the Timer-running flag */ + zrouter.mlag_info.timer_running = false; + + svr.sun_family = AF_UNIX; +#define MLAG_SOCK_NAME "/var/run/clag-zebra.socket" + strlcpy(svr.sun_path, MLAG_SOCK_NAME, sizeof(MLAG_SOCK_NAME) + 1); + + mlag_socket = socket(svr.sun_family, SOCK_STREAM, 0); + if (mlag_socket < 0) + return -1; + + if (connect(mlag_socket, (struct sockaddr *)&svr, sizeof(svr)) == -1) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "Unable to connect to %s try again in 10 secs", + svr.sun_path); + close(mlag_socket); + zrouter.mlag_info.timer_running = true; + thread_add_timer(zmlag_master, zebra_mlag_connect, NULL, 10, + &zrouter.mlag_info.t_read); + return 0; + } + len = sizeof(struct ucred); + ucred.pid = getpid(); + + set_nonblocking(mlag_socket); + setsockopt(mlag_socket, SOL_SOCKET, SO_PEERCRED, &ucred, len); + + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Connection with MLAG is established ", + __func__); + + thread_add_read(zmlag_master, zebra_mlag_read, NULL, mlag_socket, + &zrouter.mlag_info.t_read); + /* + * Connection is established with MLAGD, post to clients + */ + zebra_mlag_handle_process_state(MLAG_UP); + return 0; +} + +/* + * Currently we are doing polling later we will look for better options + */ +void zebra_mlag_private_monitor_state(void) +{ + thread_add_event(zmlag_master, zebra_mlag_connect, NULL, 0, + &zrouter.mlag_info.t_read); +} + +int zebra_mlag_private_open_channel(void) +{ + zmlag_master = zrouter.mlag_info.th_master; + + if (zrouter.mlag_info.connected == true) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: Zebra already connected to MLAGD", + __func__); + return 0; + } + + if (zrouter.mlag_info.timer_running == true) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug( + "%s: Connection retry is in progress for MLAGD", + __func__); + return 0; + } + + if (zrouter.mlag_info.clients_interested_cnt) { + /* + * Connect only if any clients are showing interest + */ + thread_add_event(zmlag_master, zebra_mlag_connect, NULL, 0, + &zrouter.mlag_info.t_read); + } + return 0; +} + +int zebra_mlag_private_close_channel(void) +{ + if (zmlag_master == NULL) + return -1; + + if (zrouter.mlag_info.clients_interested_cnt) { + if (IS_ZEBRA_DEBUG_MLAG) + zlog_debug("%s: still %d clients are connected, skip", + __func__, + zrouter.mlag_info.clients_interested_cnt); + return -1; + } + + /* + * Post the De-register to MLAG, so that it can do necesasry cleanup + */ + zebra_mlag_send_deregister(); + + return 0; +} + +void zebra_mlag_private_cleanup_data(void) +{ + zmlag_master = NULL; + zrouter.mlag_info.connected = false; + zrouter.mlag_info.timer_running = false; + + close(mlag_socket); +} + +#else /*HAVE_CUMULUS */ + +int zebra_mlag_private_write_data(uint8_t *data, uint32_t len) +{ + return 0; +} + +void zebra_mlag_private_monitor_state(void) +{ +} + +int zebra_mlag_private_open_channel(void) +{ + return 0; +} + +int zebra_mlag_private_close_channel(void) +{ + return 0; +} + +void zebra_mlag_private_cleanup_data(void) +{ +} +#endif /*HAVE_CUMULUS*/ diff --git a/zebra/zebra_mlag_private.h b/zebra/zebra_mlag_private.h new file mode 100644 index 0000000000..f7b68e9ba0 --- /dev/null +++ b/zebra/zebra_mlag_private.h @@ -0,0 +1,37 @@ +/* + * This is an implementation of MLAG Functionality + * + * Module name: Zebra MLAG + * + * Author: sathesh Kumar karra <sathk@cumulusnetworks.com> + * + * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __ZEBRA_MLAG_PRIVATE_H__ +#define __ZEBRA_MLAG_PRIVATE_H__ + + +/* + * all the platform specific API's + */ + +int zebra_mlag_private_open_channel(void); +int zebra_mlag_private_close_channel(void); +void zebra_mlag_private_monitor_state(void); +int zebra_mlag_private_write_data(uint8_t *data, uint32_t len); +void zebra_mlag_private_cleanup_data(void); +#endif diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 2d7521d8be..ba204bda61 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -1675,10 +1675,13 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re) new_active = nexthop_active_check(rn, re, nexthop); - if (new_active - && nexthop_group_active_nexthop_num(&new_grp) - >= zrouter.multipath_num) { - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + if (new_active && curr_active >= zrouter.multipath_num) { + struct nexthop *nh; + + /* Set it and its resolved nexthop as inactive. */ + for (nh = nexthop; nh; nh = nh->resolved) + UNSET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE); + new_active = 0; } diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c index 364f5755d8..091f66f9eb 100644 --- a/zebra/zebra_routemap.c +++ b/zebra/zebra_routemap.c @@ -203,22 +203,22 @@ static void show_vrf_proto_rm(struct vty *vty, struct zebra_vrf *zvrf, { int i; - vty_out(vty, "Protocol : route-map\n"); - vty_out(vty, "------------------------\n"); + vty_out(vty, "Protocol : route-map\n"); + vty_out(vty, "-------------------------------------\n"); for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { if (PROTO_RM_NAME(zvrf, af_type, i)) - vty_out(vty, "%-10s : %-10s\n", zebra_route_string(i), + vty_out(vty, "%-24s : %-10s\n", zebra_route_string(i), PROTO_RM_NAME(zvrf, af_type, i)); else - vty_out(vty, "%-10s : none\n", zebra_route_string(i)); + vty_out(vty, "%-24s : none\n", zebra_route_string(i)); } if (PROTO_RM_NAME(zvrf, af_type, i)) - vty_out(vty, "%-10s : %-10s\n", "any", + vty_out(vty, "%-24s : %-10s\n", "any", PROTO_RM_NAME(zvrf, af_type, i)); else - vty_out(vty, "%-10s : none\n", "any"); + vty_out(vty, "%-24s : none\n", "any"); } static void show_vrf_nht_rm(struct vty *vty, struct zebra_vrf *zvrf, @@ -226,22 +226,22 @@ static void show_vrf_nht_rm(struct vty *vty, struct zebra_vrf *zvrf, { int i; - vty_out(vty, "Protocol : route-map\n"); - vty_out(vty, "------------------------\n"); + vty_out(vty, "Protocol : route-map\n"); + vty_out(vty, "-------------------------------------\n"); for (i = 0; i < ZEBRA_ROUTE_MAX; i++) { if (NHT_RM_NAME(zvrf, af_type, i)) - vty_out(vty, "%-10s : %-10s\n", zebra_route_string(i), + vty_out(vty, "%-24s : %-10s\n", zebra_route_string(i), NHT_RM_NAME(zvrf, af_type, i)); else - vty_out(vty, "%-10s : none\n", zebra_route_string(i)); + vty_out(vty, "%-24s : none\n", zebra_route_string(i)); } if (NHT_RM_NAME(zvrf, af_type, i)) - vty_out(vty, "%-10s : %-10s\n", "any", + vty_out(vty, "%-24s : %-10s\n", "any", NHT_RM_NAME(zvrf, af_type, i)); else - vty_out(vty, "%-10s : none\n", "any"); + vty_out(vty, "%-24s : none\n", "any"); } static int show_proto_rm(struct vty *vty, int af_type, const char *vrf_all, diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h index ac4c961475..d8ad8a6864 100644 --- a/zebra/zebra_router.h +++ b/zebra/zebra_router.h @@ -71,6 +71,41 @@ struct zebra_mlag_info { /* The system mac being used */ struct ethaddr mac; + /* + * Zebra will open the communication channel with MLAGD only if any + * clients are interested and it is controlled dynamically based on + * client registers & un-registers. + */ + uint32_t clients_interested_cnt; + + /* coomunication channel with MLAGD is established */ + bool connected; + + /* connection retry timer is running */ + bool timer_running; + + /* Holds the client data(unencoded) that need to be pushed to MCLAGD*/ + struct stream_fifo *mlag_fifo; + + /* + * A new Kernel thread will be created to post the data to MCLAGD. + * where as, read will be performed from the zebra main thread, because + * read involves accessing client registartion data structures. + */ + struct frr_pthread *zebra_pth_mlag; + + /* MLAG Thread context 'master' */ + struct thread_master *th_master; + + /* + * Event for Initial MLAG Connection setup & Data Read + * Read can be performed only after successful connection establishment, + * so no issues. + * + */ + struct thread *t_read; + /* Event for MLAG write */ + struct thread *t_write; }; struct zebra_router { diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 12517f3135..8a7c7e359f 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -2227,7 +2227,7 @@ DEFUN (show_evpn_vni_vni, vni = strtoul(argv[3]->arg, NULL, 10); zvrf = zebra_vrf_get_evpn(); - zebra_vxlan_print_vni(vty, zvrf, vni, uj); + zebra_vxlan_print_vni(vty, zvrf, vni, uj, NULL); return CMD_SUCCESS; } diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index 3efb407fae..086b13d670 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -118,6 +118,8 @@ static int zvni_neigh_probe(zebra_vni_t *zvni, zebra_neigh_t *n); static zebra_vni_t *zvni_from_svi(struct interface *ifp, struct interface *br_if); static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if); +static struct interface *zvni_map_to_macvlan(struct interface *br_if, + struct interface *svi_if); /* l3-vni next-hop neigh related APIs */ static zebra_neigh_t *zl3vni_nh_lookup(zebra_l3vni_t *zl3vni, @@ -1814,6 +1816,8 @@ static void zl3vni_print(zebra_l3vni_t *zl3vni, void **ctx) CHECK_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY) ? "prefix-routes-only" : "none"); + vty_out(vty, " System MAC: %s\n", + zl3vni_sysmac2str(zl3vni, buf, sizeof(buf))); vty_out(vty, " Router MAC: %s\n", zl3vni_rmac2str(zl3vni, buf, sizeof(buf))); vty_out(vty, " L2 VNIs: "); @@ -1833,6 +1837,9 @@ static void zl3vni_print(zebra_l3vni_t *zl3vni, void **ctx) json_object_string_add(json, "state", zl3vni_state2str(zl3vni)); json_object_string_add(json, "vrf", zl3vni_vrf_name(zl3vni)); json_object_string_add( + json, "sysMac", + zl3vni_sysmac2str(zl3vni, buf, sizeof(buf))); + json_object_string_add( json, "routerMac", zl3vni_rmac2str(zl3vni, buf, sizeof(buf))); json_object_string_add( @@ -1987,6 +1994,7 @@ struct zvni_evpn_show { struct vty *vty; json_object *json; struct zebra_vrf *zvrf; + bool use_json; }; /* print a L3 VNI hash entry in detail*/ @@ -1994,20 +2002,21 @@ static void zl3vni_print_hash_detail(struct hash_bucket *bucket, void *data) { struct vty *vty = NULL; zebra_l3vni_t *zl3vni = NULL; - json_object *json = NULL; + json_object *json_array = NULL; bool use_json = false; struct zvni_evpn_show *zes = data; vty = zes->vty; - json = zes->json; - - if (json) - use_json = true; + json_array = zes->json; + use_json = zes->use_json; zl3vni = (zebra_l3vni_t *)bucket->data; - zebra_vxlan_print_vni(vty, zes->zvrf, zl3vni->vni, use_json); - vty_out(vty, "\n"); + zebra_vxlan_print_vni(vty, zes->zvrf, zl3vni->vni, + use_json, json_array); + + if (!use_json) + vty_out(vty, "\n"); } @@ -2082,20 +2091,20 @@ static void zvni_print_hash_detail(struct hash_bucket *bucket, void *data) { struct vty *vty; zebra_vni_t *zvni; - json_object *json = NULL; + json_object *json_array = NULL; bool use_json = false; struct zvni_evpn_show *zes = data; vty = zes->vty; - json = zes->json; - - if (json) - use_json = true; + json_array = zes->json; + use_json = zes->use_json; zvni = (zebra_vni_t *)bucket->data; - zebra_vxlan_print_vni(vty, zes->zvrf, zvni->vni, use_json); - vty_out(vty, "\n"); + zebra_vxlan_print_vni(vty, zes->zvrf, zvni->vni, use_json, json_array); + + if (!use_json) + vty_out(vty, "\n"); } /* @@ -2491,6 +2500,8 @@ static int zvni_neigh_send_add_to_client(vni_t vni, struct ipaddr *ip, /* Set router flag (R-bit) based on local neigh entry add */ if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_ROUTER_FLAG)) SET_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); + if (CHECK_FLAG(neigh_flags, ZEBRA_NEIGH_SVI_IP)) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP); return zvni_macip_send_msg_to_client(vni, macaddr, ip, flags, seq, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_ADD); @@ -2811,6 +2822,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni, n->flags, n->loc_seq); } else if (advertise_svi_macip_enabled(zvni)) { + SET_FLAG(n->flags, ZEBRA_NEIGH_SVI_IP); if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( "SVI %s(%u) L2-VNI %u, sending SVI MAC %s IP %s add to BGP with flags 0x%x", @@ -3684,7 +3696,7 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp, * of two cases: * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN interface * linked to the bridge - * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge inteface + * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge interface * itself */ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) @@ -3735,6 +3747,52 @@ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) return found ? tmp_if : NULL; } +/* Map to MAC-VLAN interface corresponding to specified SVI interface. + */ +static struct interface *zvni_map_to_macvlan(struct interface *br_if, + struct interface *svi_if) +{ + struct zebra_ns *zns; + struct route_node *rn; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + int found = 0; + + /* Defensive check, caller expected to invoke only with valid bridge. */ + if (!br_if) + return NULL; + + if (!svi_if) { + zlog_debug("svi_if is not passed."); + return NULL; + } + + /* Determine if bridge is VLAN-aware or not */ + zif = br_if->info; + assert(zif); + + /* Identify corresponding VLAN interface. */ + zns = zebra_ns_lookup(NS_DEFAULT); + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + /* Check oper status of the SVI. */ + if (!tmp_if || !if_is_operative(tmp_if)) + continue; + zif = tmp_if->info; + + if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN) + continue; + + if (zif->link == svi_if) { + found = 1; + break; + } + } + + return found ? tmp_if : NULL; +} + + /* * Install remote MAC into the forwarding plane. */ @@ -4151,6 +4209,16 @@ static void zvni_build_hash_table(void) */ zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + /* Associate l3vni to mac-vlan and extract VRR MAC */ + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("create l3vni %u svi_if %s mac_vlan_if %s", + vni, zl3vni->svi_if ? zl3vni->svi_if->name + : "NIL", + zl3vni->mac_vlan_if ? + zl3vni->mac_vlan_if->name : "NIL"); + if (is_l3vni_oper_up(zl3vni)) zebra_vxlan_process_l3vni_oper_up(zl3vni); @@ -5056,6 +5124,24 @@ struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni) return zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if); } +struct interface *zl3vni_map_to_mac_vlan_if(zebra_l3vni_t *zl3vni) +{ + struct zebra_if *zif = NULL; /* zebra_if for vxlan_if */ + + if (!zl3vni) + return NULL; + + if (!zl3vni->vxlan_if) + return NULL; + + zif = zl3vni->vxlan_if->info; + if (!zif) + return NULL; + + return zvni_map_to_macvlan(zif->brslave_info.br_if, zl3vni->svi_if); +} + + zebra_l3vni_t *zl3vni_from_vrf(vrf_id_t vrf_id) { struct zebra_vrf *zvrf = NULL; @@ -5139,6 +5225,19 @@ static zebra_l3vni_t *zl3vni_from_svi(struct interface *ifp, return zl3vni; } +static inline void zl3vni_get_vrr_rmac(zebra_l3vni_t *zl3vni, + struct ethaddr *rmac) +{ + if (!zl3vni) + return; + + if (!is_l3vni_oper_up(zl3vni)) + return; + + if (zl3vni->mac_vlan_if && if_is_operative(zl3vni->mac_vlan_if)) + memcpy(rmac->octet, zl3vni->mac_vlan_if->hw_addr, ETH_ALEN); +} + /* * Inform BGP about l3-vni. */ @@ -5146,35 +5245,54 @@ static int zl3vni_send_add_to_client(zebra_l3vni_t *zl3vni) { struct stream *s = NULL; struct zserv *client = NULL; - struct ethaddr rmac; + struct ethaddr svi_rmac, vrr_rmac = {.octet = {0} }; + struct zebra_vrf *zvrf; char buf[ETHER_ADDR_STRLEN]; + char buf1[ETHER_ADDR_STRLEN]; + bool is_anycast_mac = true; client = zserv_find_client(ZEBRA_ROUTE_BGP, 0); /* BGP may not be running. */ if (!client) return 0; - /* get the rmac */ - memset(&rmac, 0, sizeof(struct ethaddr)); - zl3vni_get_rmac(zl3vni, &rmac); + zvrf = zebra_vrf_lookup_by_id(zl3vni->vrf_id); + assert(zvrf); + + /* get the svi and vrr rmac values */ + memset(&svi_rmac, 0, sizeof(struct ethaddr)); + zl3vni_get_svi_rmac(zl3vni, &svi_rmac); + zl3vni_get_vrr_rmac(zl3vni, &vrr_rmac); + + /* In absence of vrr mac use svi mac as anycast MAC value */ + if (is_zero_mac(&vrr_rmac)) { + memcpy(&vrr_rmac, &svi_rmac, ETH_ALEN); + is_anycast_mac = false; + } s = stream_new(ZEBRA_MAX_PACKET_SIZ); + /* The message is used for both vni add and/or update like + * vrr mac is added for l3vni SVI. + */ zclient_create_header(s, ZEBRA_L3VNI_ADD, zl3vni_vrf_id(zl3vni)); stream_putl(s, zl3vni->vni); - stream_put(s, &rmac, sizeof(struct ethaddr)); + stream_put(s, &svi_rmac, sizeof(struct ethaddr)); stream_put_in_addr(s, &zl3vni->local_vtep_ip); stream_put(s, &zl3vni->filter, sizeof(int)); stream_putl(s, zl3vni->svi_if->ifindex); + stream_put(s, &vrr_rmac, sizeof(struct ethaddr)); + stream_putl(s, is_anycast_mac); /* Write packet size. */ stream_putw_at(s, 0, stream_get_endp(s)); if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( - "Send L3_VNI_ADD %u VRF %s RMAC %s local-ip %s filter %s to %s", + "Send L3_VNI_ADD %u VRF %s RMAC %s VRR %s local-ip %s filter %s to %s", zl3vni->vni, vrf_id_to_name(zl3vni_vrf_id(zl3vni)), - prefix_mac2str(&rmac, buf, sizeof(buf)), + prefix_mac2str(&svi_rmac, buf, sizeof(buf)), + prefix_mac2str(&vrr_rmac, buf1, sizeof(buf1)), inet_ntoa(zl3vni->local_vtep_ip), CHECK_FLAG(zl3vni->filter, PREFIX_ROUTES_ONLY) ? "prefix-routes-only" @@ -7169,9 +7287,14 @@ void zebra_vxlan_print_macs_vni_vtep(struct vty *vty, struct zebra_vrf *zvrf, /* * Display VNI information (VTY command handler). + * + * use_json flag indicates that output should be in JSON format. + * json_array is non NULL when JSON output needs to be aggregated (by the + * caller) and then printed, otherwise, JSON evpn vni info is printed + * right away. */ void zebra_vxlan_print_vni(struct vty *vty, struct zebra_vrf *zvrf, vni_t vni, - bool use_json) + bool use_json, json_object *json_array) { json_object *json = NULL; void *args[2]; @@ -7183,6 +7306,7 @@ void zebra_vxlan_print_vni(struct vty *vty, struct zebra_vrf *zvrf, vni_t vni, if (use_json) json = json_object_new_object(); + args[0] = vty; args[1] = json; @@ -7191,21 +7315,25 @@ void zebra_vxlan_print_vni(struct vty *vty, struct zebra_vrf *zvrf, vni_t vni, zl3vni_print(zl3vni, (void *)args); } else { zvni = zvni_lookup(vni); - if (!zvni) { - if (use_json) - vty_out(vty, "{}\n"); - else - vty_out(vty, "%% VNI %u does not exist\n", vni); - return; - } - - zvni_print(zvni, (void *)args); + if (zvni) + zvni_print(zvni, (void *)args); + else if (!json) + vty_out(vty, "%% VNI %u does not exist\n", vni); } if (use_json) { - vty_out(vty, "%s\n", json_object_to_json_string_ext( - json, JSON_C_TO_STRING_PRETTY)); - json_object_free(json); + /* + * Each "json" object contains info about 1 VNI. + * When "json_array" is non-null, we aggreggate the json output + * into json_array and print it as a JSON array. + */ + if (json_array) + json_object_array_add(json_array, json); + else { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } } } @@ -7362,7 +7490,7 @@ stream_failure: void zebra_vxlan_print_vnis_detail(struct vty *vty, struct zebra_vrf *zvrf, bool use_json) { - json_object *json = NULL; + json_object *json_array = NULL; struct zebra_ns *zns = NULL; struct zvni_evpn_show zes; @@ -7373,13 +7501,13 @@ void zebra_vxlan_print_vnis_detail(struct vty *vty, struct zebra_vrf *zvrf, if (!zns) return; - if (use_json) - json = json_object_new_object(); + json_array = json_object_new_array(); zes.vty = vty; - zes.json = json; + zes.json = json_array; zes.zvrf = zvrf; + zes.use_json = use_json; /* Display all L2-VNIs */ hash_iterate( @@ -7396,8 +7524,8 @@ void zebra_vxlan_print_vnis_detail(struct vty *vty, struct zebra_vrf *zvrf, if (use_json) { vty_out(vty, "%s\n", json_object_to_json_string_ext( - json, JSON_C_TO_STRING_PRETTY)); - json_object_free(json); + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); } } @@ -8452,6 +8580,78 @@ int zebra_vxlan_svi_up(struct interface *ifp, struct interface *link_if) } /* + * Handle MAC-VLAN interface going down. + * L3VNI: When MAC-VLAN interface goes down, + * find its associated SVI and update type2/type-5 routes + * with SVI as RMAC + */ +void zebra_vxlan_macvlan_down(struct interface *ifp) +{ + zebra_l3vni_t *zl3vni = NULL; + struct zebra_if *zif, *link_zif; + struct interface *link_ifp, *link_if; + + zif = ifp->info; + assert(zif); + link_ifp = zif->link; + if (!link_ifp) { + if (IS_ZEBRA_DEBUG_VXLAN) { + struct interface *ifp; + + ifp = if_lookup_by_index_all_vrf(zif->link_ifindex); + zlog_debug("macvlan %s parent link is not found. Parent index %d ifp %s", + ifp->name, zif->link_ifindex, + ifp ? ifp->name : " "); + } + return; + } + link_zif = link_ifp->info; + assert(link_zif); + + link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), + link_zif->link_ifindex); + + zl3vni = zl3vni_from_svi(link_ifp, link_if); + if (zl3vni) { + zl3vni->mac_vlan_if = NULL; + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } +} + +/* + * Handle MAC-VLAN interface going up. + * L3VNI: When MAC-VLAN interface comes up, + * find its associated SVI and update type-2 routes + * with MAC-VLAN's MAC as RMAC and for type-5 routes + * use SVI's MAC as RMAC. + */ +void zebra_vxlan_macvlan_up(struct interface *ifp) +{ + zebra_l3vni_t *zl3vni = NULL; + struct zebra_if *zif, *link_zif; + struct interface *link_ifp, *link_if; + + zif = ifp->info; + assert(zif); + link_ifp = zif->link; + link_zif = link_ifp->info; + assert(link_zif); + + link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), + link_zif->link_ifindex); + zl3vni = zl3vni_from_svi(link_ifp, link_if); + if (zl3vni) { + /* associate with macvlan (VRR) interface */ + zl3vni->mac_vlan_if = ifp; + + /* process oper-up */ + if (is_l3vni_oper_up(zl3vni)) + zebra_vxlan_process_l3vni_oper_up(zl3vni); + } +} + +/* * Handle VxLAN interface down */ int zebra_vxlan_if_down(struct interface *ifp) @@ -8531,15 +8731,18 @@ int zebra_vxlan_if_up(struct interface *ifp) zl3vni = zl3vni_lookup(vni); if (zl3vni) { - - if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug("Intf %s(%u) L3-VNI %u is UP", ifp->name, - ifp->ifindex, vni); - /* we need to associate with SVI, if any, we can associate with * svi-if only after association with vxlan-intf is complete */ zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Intf %s(%u) L3-VNI %u is UP svi_if %s mac_vlan_if %s" + , ifp->name, ifp->ifindex, vni, + zl3vni->svi_if ? zl3vni->svi_if->name : "NIL", + zl3vni->mac_vlan_if ? + zl3vni->mac_vlan_if->name : "NIL"); if (is_l3vni_oper_up(zl3vni)) zebra_vxlan_process_l3vni_oper_up(zl3vni); @@ -8702,6 +8905,8 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags) zebra_vxlan_process_l3vni_oper_down(zl3vni); zl3vni->svi_if = NULL; zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + zl3vni->mac_vlan_if = + zl3vni_map_to_mac_vlan_if(zl3vni); zl3vni->local_vtep_ip = vxl->vtep_ip; if (is_l3vni_oper_up(zl3vni)) zebra_vxlan_process_l3vni_oper_up( @@ -8861,6 +9066,8 @@ int zebra_vxlan_if_add(struct interface *ifp) * after association with vxlan_if is complete */ zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + if (is_l3vni_oper_up(zl3vni)) zebra_vxlan_process_l3vni_oper_up(zl3vni); } else { @@ -8993,6 +9200,16 @@ int zebra_vxlan_process_vrf_vni_cmd(struct zebra_vrf *zvrf, vni_t vni, */ zl3vni->svi_if = zl3vni_map_to_svi_if(zl3vni); + zl3vni->mac_vlan_if = zl3vni_map_to_mac_vlan_if(zl3vni); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("%s: l3vni %u svi_if %s mac_vlan_if %s", + __PRETTY_FUNCTION__, vni, + zl3vni->svi_if ? + zl3vni->svi_if->name : "NIL", + zl3vni->mac_vlan_if ? + zl3vni->mac_vlan_if->name : "NIL"); + /* formulate l2vni list */ hash_iterate(zvrf_evpn->vni_table, zvni_add_to_l3vni_list, zl3vni); diff --git a/zebra/zebra_vxlan.h b/zebra/zebra_vxlan.h index bb80ae1c9a..b551ba8dff 100644 --- a/zebra/zebra_vxlan.h +++ b/zebra/zebra_vxlan.h @@ -141,7 +141,8 @@ extern void zebra_vxlan_print_neigh_vni_dad(struct vty *vty, struct zebra_vrf *zvrf, vni_t vni, bool use_json); extern void zebra_vxlan_print_vni(struct vty *vty, struct zebra_vrf *zvrf, - vni_t vni, bool use_json); + vni_t vni, bool use_json, + json_object *json_array); extern void zebra_vxlan_print_vnis(struct vty *vty, struct zebra_vrf *zvrf, bool use_json); extern void zebra_vxlan_print_vnis_detail(struct vty *vty, @@ -217,6 +218,8 @@ extern int zebra_vxlan_clear_dup_detect_vni(struct vty *vty, extern void zebra_vxlan_handle_result(struct zebra_dplane_ctx *ctx); extern void zebra_evpn_init(void); +extern void zebra_vxlan_macvlan_up(struct interface *ifp); +extern void zebra_vxlan_macvlan_down(struct interface *ifp); #ifdef __cplusplus } diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h index 8e78042646..989ea464e7 100644 --- a/zebra/zebra_vxlan_private.h +++ b/zebra/zebra_vxlan_private.h @@ -125,6 +125,8 @@ struct zebra_l3vni_t_ { /* SVI interface corresponding to the l3vni */ struct interface *svi_if; + struct interface *mac_vlan_if; + /* list of L2 VNIs associated with the L3 VNI */ struct list *l2vnis; @@ -167,6 +169,44 @@ static inline const char *zl3vni_rmac2str(zebra_l3vni_t *zl3vni, char *buf, ptr = buf; } + if (zl3vni->mac_vlan_if) + snprintf(ptr, (ETHER_ADDR_STRLEN), + "%02x:%02x:%02x:%02x:%02x:%02x", + (uint8_t)zl3vni->mac_vlan_if->hw_addr[0], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[1], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[2], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[3], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[4], + (uint8_t)zl3vni->mac_vlan_if->hw_addr[5]); + else if (zl3vni->svi_if) + snprintf(ptr, (ETHER_ADDR_STRLEN), + "%02x:%02x:%02x:%02x:%02x:%02x", + (uint8_t)zl3vni->svi_if->hw_addr[0], + (uint8_t)zl3vni->svi_if->hw_addr[1], + (uint8_t)zl3vni->svi_if->hw_addr[2], + (uint8_t)zl3vni->svi_if->hw_addr[3], + (uint8_t)zl3vni->svi_if->hw_addr[4], + (uint8_t)zl3vni->svi_if->hw_addr[5]); + else + snprintf(ptr, ETHER_ADDR_STRLEN, "None"); + + return ptr; +} + +/* get the sys mac string */ +static inline const char *zl3vni_sysmac2str(zebra_l3vni_t *zl3vni, char *buf, + int size) +{ + char *ptr; + + if (!buf) + ptr = (char *)XMALLOC(MTYPE_TMP, + ETHER_ADDR_STRLEN * sizeof(char)); + else { + assert(size >= ETHER_ADDR_STRLEN); + ptr = buf; + } + if (zl3vni->svi_if) snprintf(ptr, (ETHER_ADDR_STRLEN), "%02x:%02x:%02x:%02x:%02x:%02x", @@ -215,7 +255,8 @@ static inline vrf_id_t zl3vni_vrf_id(zebra_l3vni_t *zl3vni) return zl3vni->vrf_id; } -static inline void zl3vni_get_rmac(zebra_l3vni_t *zl3vni, struct ethaddr *rmac) +static inline void zl3vni_get_svi_rmac(zebra_l3vni_t *zl3vni, + struct ethaddr *rmac) { if (!zl3vni) return; @@ -363,6 +404,7 @@ struct zebra_neigh_t_ { #define ZEBRA_NEIGH_DEF_GW 0x08 #define ZEBRA_NEIGH_ROUTER_FLAG 0x10 #define ZEBRA_NEIGH_DUPLICATE 0x20 +#define ZEBRA_NEIGH_SVI_IP 0x40 enum zebra_neigh_state state; @@ -433,6 +475,7 @@ struct nh_walk_ctx { extern zebra_l3vni_t *zl3vni_from_vrf(vrf_id_t vrf_id); extern struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni); extern struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni); +extern struct interface *zl3vni_map_to_mac_vlan_if(zebra_l3vni_t *zl3vni); DECLARE_HOOK(zebra_rmac_update, (zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, bool delete, const char *reason), (rmac, zl3vni, delete, reason)) diff --git a/zebra/zserv.c b/zebra/zserv.c index b0991e98f8..419f30e6d3 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -914,7 +914,7 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client) zserv_command_string(last_write_cmd)); vty_out(vty, "\n"); - vty_out(vty, "Type Add Update Del \n"); + vty_out(vty, "Type Add Update Del \n"); vty_out(vty, "================================================== \n"); vty_out(vty, "IPv4 %-12d%-12d%-12d\n", client->v4_route_add_cnt, client->v4_route_upd8_cnt, client->v4_route_del_cnt); @@ -965,7 +965,7 @@ static void zebra_show_client_brief(struct vty *vty, struct zserv *client) last_write_time = (time_t)atomic_load_explicit(&client->last_write_time, memory_order_relaxed); - vty_out(vty, "%-8s%12s %12s%12s%8d/%-8d%8d/%-8d\n", + vty_out(vty, "%-10s%12s %12s%12s%8d/%-8d%8d/%-8d\n", zebra_route_string(client->proto), zserv_time_buf(&connect_time, cbuf, ZEBRA_TIME_BUF), zserv_time_buf(&last_read_time, rbuf, ZEBRA_TIME_BUF), @@ -1019,7 +1019,7 @@ DEFUN (show_zebra_client_summary, struct zserv *client; vty_out(vty, - "Name Connect Time Last Read Last Write IPv4 Routes IPv6 Routes \n"); + "Name Connect Time Last Read Last Write IPv4 Routes IPv6 Routes \n"); vty_out(vty, "--------------------------------------------------------------------------------\n"); diff --git a/zebra/zserv.h b/zebra/zserv.h index 708ff1e226..ccc8d92aa2 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -99,6 +99,13 @@ struct zserv { uint8_t proto; uint16_t instance; + /* + * Interested for MLAG Updates, and also stores the client + * interested message mask + */ + bool mlag_updates_interested; + uint32_t mlag_reg_mask1; + /* Statistics */ uint32_t redist_v4_add_cnt; uint32_t redist_v4_del_cnt; |
