diff options
73 files changed, 2368 insertions, 319 deletions
diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c index c5f7927c4d..7ed37319b1 100644 --- a/bgpd/bgp_evpn_vty.c +++ b/bgpd/bgp_evpn_vty.c @@ -373,14 +373,14 @@ static void display_l3vni(struct vty *vty, struct bgp *bgp_vrf, json_export_rtl = json_object_new_array(); json_object_int_add(json, "vni", bgp_vrf->l3vni); json_object_string_add(json, "type", "L3"); - json_object_string_add(json, "kernelFlag", "Yes"); + json_object_string_add(json, "inKernel", "True"); json_object_string_add( json, "rd", prefix_rd2str(&bgp_vrf->vrf_prd, buf1, RD_ADDRSTRLEN)); json_object_string_add(json, "originatorIp", inet_ntoa(bgp_vrf->originator_ip)); json_object_string_add(json, "advertiseGatewayMacip", "n/a"); - json_object_string_add(json, "advertiseSviMacip", "n/a"); + json_object_string_add(json, "advertiseSviMacIp", "n/a"); json_object_to_json_string_ext(json, JSON_C_TO_STRING_NOSLASHESCAPE); json_object_string_add(json, "advertisePip", @@ -519,8 +519,8 @@ static void display_vni(struct vty *vty, struct bgpevpn *vpn, json_object *json) json_export_rtl = json_object_new_array(); json_object_int_add(json, "vni", vpn->vni); json_object_string_add(json, "type", "L2"); - json_object_string_add(json, "kernelFlag", - is_vni_live(vpn) ? "Yes" : "No"); + json_object_string_add(json, "inKernel", + is_vni_live(vpn) ? "True" : "False"); json_object_string_add( json, "rd", prefix_rd2str(&vpn->prd, buf1, sizeof(buf1))); @@ -544,13 +544,13 @@ static void display_vni(struct vty *vty, struct bgpevpn *vpn, json_object *json) "Disabled"); if (!vpn->advertise_svi_macip && bgp_evpn && bgp_evpn->evpn_info->advertise_svi_macip) - json_object_string_add(json, "advertiseSviMacip", + json_object_string_add(json, "advertiseSviMacIp", "Active"); else if (vpn->advertise_svi_macip) - json_object_string_add(json, "advertiseSviMacip", + json_object_string_add(json, "advertiseSviMacIp", "Enabled"); else - json_object_string_add(json, "advertiseSviMacip", + json_object_string_add(json, "advertiseSviMacIp", "Disabled"); } else { vty_out(vty, "VNI: %d", vpn->vni); @@ -887,6 +887,22 @@ static void show_l3vni_entry(struct vty *vty, struct bgp *bgp, json_object_string_add( json_vni, "rd", prefix_rd2str(&bgp->vrf_prd, buf2, RD_ADDRSTRLEN)); + json_object_string_add(json_vni, "advertiseGatewayMacip", + "n/a"); + json_object_string_add(json_vni, "advertiseSviMacIp", "n/a"); + json_object_to_json_string_ext(json_vni, + JSON_C_TO_STRING_NOSLASHESCAPE); + json_object_string_add( + json_vni, "advertisePip", + bgp->evpn_info->advertise_pip ? "Enabled" : "Disabled"); + json_object_string_add(json_vni, "sysIP", + inet_ntoa(bgp->evpn_info->pip_ip)); + json_object_string_add(json_vni, "sysMAC", + prefix_mac2str(&bgp->evpn_info->pip_rmac, + buf2, sizeof(buf2))); + json_object_string_add( + json_vni, "rmac", + prefix_mac2str(&bgp->rmac, buf2, sizeof(buf2))); } else { vty_out(vty, "%-1s %-10u %-4s %-21s", buf1, bgp->l3vni, "L3", prefix_rd2str(&bgp->vrf_prd, buf2, RD_ADDRSTRLEN)); @@ -1011,10 +1027,13 @@ static void show_vni_entry(struct hash_bucket *bucket, void *args[]) char *ecom_str; struct listnode *node, *nnode; struct ecommunity *ecom; + struct bgp *bgp_evpn; vty = args[0]; json = args[1]; + bgp_evpn = bgp_get_evpn(); + if (json) { json_vni = json_object_new_object(); json_import_rtl = json_object_new_array(); @@ -1030,13 +1049,37 @@ static void show_vni_entry(struct hash_bucket *bucket, void *args[]) json_object_string_add(json_vni, "type", "L2"); json_object_string_add(json_vni, "inKernel", is_vni_live(vpn) ? "True" : "False"); - json_object_string_add(json_vni, "originatorIp", - inet_ntoa(vpn->originator_ip)); - json_object_string_add(json_vni, "originatorIp", - inet_ntoa(vpn->originator_ip)); json_object_string_add( json_vni, "rd", prefix_rd2str(&vpn->prd, buf2, sizeof(buf2))); + json_object_string_add(json_vni, "originatorIp", + inet_ntoa(vpn->originator_ip)); + json_object_string_add(json_vni, "mcastGroup", + inet_ntoa(vpn->mcast_grp)); + /* per vni knob is enabled -- Enabled + * Global knob is enabled -- Active + * default -- Disabled + */ + if (!vpn->advertise_gw_macip && bgp_evpn + && bgp_evpn->advertise_gw_macip) + json_object_string_add( + json_vni, "advertiseGatewayMacip", "Active"); + else if (vpn->advertise_gw_macip) + json_object_string_add( + json_vni, "advertiseGatewayMacip", "Enabled"); + else + json_object_string_add( + json_vni, "advertiseGatewayMacip", "Disabled"); + if (!vpn->advertise_svi_macip && bgp_evpn + && bgp_evpn->evpn_info->advertise_svi_macip) + json_object_string_add(json_vni, "advertiseSviMacIp", + "Active"); + else if (vpn->advertise_svi_macip) + json_object_string_add(json_vni, "advertiseSviMacIp", + "Enabled"); + else + json_object_string_add(json_vni, "advertiseSviMacIp", + "Disabled"); } else { vty_out(vty, "%-1s %-10u %-4s %-21s", buf1, vpn->vni, "L2", prefix_rd2str(&vpn->prd, buf2, RD_ADDRSTRLEN)); @@ -3901,7 +3944,7 @@ DEFUN(show_bgp_l2vpn_evpn_vni, bgp_evpn->advertise_gw_macip ? "Enabled" : "Disabled"); - json_object_string_add(json, "advertiseSviMacip", + json_object_string_add(json, "advertiseSviMacIp", bgp_evpn->evpn_info->advertise_svi_macip ? "Enabled" : "Disabled"); json_object_string_add(json, "advertiseAllVnis", diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index e0a9e3e4f0..b483d39bba 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -578,7 +578,8 @@ const char *const peer_down_str[] = {"", "Waiting for VRF to be initialized", "No AFI/SAFI activated for peer", "AS Set config change", - "Waiting for peer OPEN"}; + "Waiting for peer OPEN", + "Reached received prefix count"}; static int bgp_graceful_restart_timer_expire(struct thread *thread) { @@ -1512,6 +1513,10 @@ int bgp_start(struct peer *peer) "%s [FSM] Trying to start suppressed peer" " - this is never supposed to happen!", peer->host); + if (CHECK_FLAG(peer->flags, PEER_FLAG_SHUTDOWN)) + peer->last_reset = PEER_DOWN_USER_SHUTDOWN; + else if (CHECK_FLAG(peer->sflags, PEER_STATUS_PREFIX_OVERFLOW)) + peer->last_reset = PEER_DOWN_PFX_COUNT; return -1; } diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index 8b585704d8..8759a88444 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -203,7 +203,7 @@ int bgp_set_socket_ttl(struct peer *peer, int bgp_sock) int ret = 0; /* In case of peer is EBGP, we should set TTL for this connection. */ - if (!peer->gtsm_hops && (peer_sort(peer) == BGP_PEER_EBGP)) { + if (!peer->gtsm_hops && (peer_sort_lookup(peer) == BGP_PEER_EBGP)) { ret = sockopt_ttl(peer->su.sa.sa_family, bgp_sock, peer->ttl); if (ret) { flog_err( diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index 3060fe482c..55b44de6fb 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -428,9 +428,11 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); - zclient_send_interface_radv_req( - zclient, nexthop->vrf_id, ifp, true, - BGP_UNNUM_DEFAULT_RA_INTERVAL); + if (ifp) + zclient_send_interface_radv_req( + zclient, nexthop->vrf_id, ifp, + true, + BGP_UNNUM_DEFAULT_RA_INTERVAL); } /* There is at least one label-switched path */ if (nexthop->nh_label && @@ -898,8 +900,11 @@ void bgp_nht_register_enhe_capability_interfaces(struct peer *peer) return; for (nhop = bnc->nexthop; nhop; nhop = nhop->next) { - ifp = if_lookup_by_index(nhop->ifindex, - nhop->vrf_id); + ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id); + + if (!ifp) + continue; + zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, true, diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index f6ba28b26a..2e43fb25f4 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -914,8 +914,8 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, pair (newm, existm) with the cluster list length. Prefer the path with smaller cluster list length. */ if (newm == existm) { - if (peer_sort(new->peer) == BGP_PEER_IBGP - && peer_sort(exist->peer) == BGP_PEER_IBGP + if (peer_sort_lookup(new->peer) == BGP_PEER_IBGP + && peer_sort_lookup(exist->peer) == BGP_PEER_IBGP && (mpath_cfg == NULL || CHECK_FLAG( mpath_cfg->ibgp_flags, diff --git a/bgpd/bgp_rpki.c b/bgpd/bgp_rpki.c index 7247210c93..2ca0c7b96d 100644 --- a/bgpd/bgp_rpki.c +++ b/bgpd/bgp_rpki.c @@ -1412,21 +1412,18 @@ DEFUN (match_rpki, ret = route_map_add_match(index, "rpki", argv[2]->arg, RMAP_EVENT_MATCH_ADDED); - if (ret) { - switch (ret) { - case RMAP_RULE_MISSING: - vty_out(vty, "%% BGP Can't find rule.\n"); - return CMD_WARNING_CONFIG_FAILED; - case RMAP_COMPILE_ERROR: - vty_out(vty, "%% BGP Argument is malformed.\n"); - return CMD_WARNING_CONFIG_FAILED; - case RMAP_COMPILE_SUCCESS: - /* - * Intentionally doing nothing here - */ - break; - } + switch (ret) { + case RMAP_RULE_MISSING: + vty_out(vty, "%% BGP Can't find rule.\n"); + return CMD_WARNING_CONFIG_FAILED; + case RMAP_COMPILE_ERROR: + vty_out(vty, "%% BGP Argument is malformed.\n"); + return CMD_WARNING_CONFIG_FAILED; + case RMAP_COMPILE_SUCCESS: + return CMD_SUCCESS; + break; } + return CMD_SUCCESS; } @@ -1445,21 +1442,18 @@ DEFUN (no_match_rpki, ret = route_map_delete_match(index, "rpki", argv[3]->arg, RMAP_EVENT_MATCH_DELETED); - if (ret) { - switch (ret) { - case RMAP_RULE_MISSING: - vty_out(vty, "%% BGP Can't find rule.\n"); - break; - case RMAP_COMPILE_ERROR: - vty_out(vty, "%% BGP Argument is malformed.\n"); - break; - case RMAP_COMPILE_SUCCESS: - /* - * Nothing to do here - */ - break; - } + switch (ret) { + case RMAP_RULE_MISSING: + vty_out(vty, "%% BGP Can't find rule.\n"); + return CMD_WARNING_CONFIG_FAILED; + break; + case RMAP_COMPILE_ERROR: + vty_out(vty, "%% BGP Argument is malformed.\n"); return CMD_WARNING_CONFIG_FAILED; + break; + case RMAP_COMPILE_SUCCESS: + return CMD_SUCCESS; + break; } return CMD_SUCCESS; diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index f1987c58eb..f18f9ccf4b 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -8979,7 +8979,7 @@ static int bgp_show_summary(struct vty *vty, struct bgp *bgp, int afi, int safi, vty_out(vty, "EstdCnt DropCnt ResetTime Reason\n"); else vty_out(vty, - "V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd\n"); + "V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd\n"); } } @@ -9104,7 +9104,7 @@ static int bgp_show_summary(struct vty *vty, struct bgp *bgp, int afi, int safi, vty_out(vty, "%*s", max_neighbor_width - len, " "); - vty_out(vty, "4 %10u %7u %7u %8" PRIu64 " %4d %4zd %8s", + vty_out(vty, "4 %10u %9u %9u %8" PRIu64 " %4d %4zd %8s", peer->as, PEER_TOTAL_RX(peer), PEER_TOTAL_TX(peer), peer->version[afi][safi], 0, peer->obuf->count, diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 9ac3cce86f..96b307ee21 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -433,10 +433,12 @@ int bgp_confederation_id_set(struct bgp *bgp, as_t as) AS change. Just Reset EBGP sessions, not CONFED sessions. If we were not doing confederation before, reset all EBGP sessions. */ for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) { + bgp_peer_sort_t ptype = peer_sort(peer); + /* We're looking for peers who's AS is not local or part of our confederation. */ if (already_confed) { - if (peer_sort(peer) == BGP_PEER_EBGP) { + if (ptype == BGP_PEER_EBGP) { peer->local_as = as; if (BGP_IS_VALID_STATE_FOR_NOTIF( peer->status)) { @@ -452,9 +454,9 @@ int bgp_confederation_id_set(struct bgp *bgp, as_t as) /* Not doign confederation before, so reset every non-local session */ - if (peer_sort(peer) != BGP_PEER_IBGP) { + if (ptype != BGP_PEER_IBGP) { /* Reset the local_as to be our EBGP one */ - if (peer_sort(peer) == BGP_PEER_EBGP) + if (ptype == BGP_PEER_EBGP) peer->local_as = as; if (BGP_IS_VALID_STATE_FOR_NOTIF( peer->status)) { @@ -967,6 +969,11 @@ bgp_peer_sort_t peer_sort(struct peer *peer) return peer->sort; } +bgp_peer_sort_t peer_sort_lookup(struct peer *peer) +{ + return peer->sort; +} + static void peer_free(struct peer *peer) { afi_t afi; @@ -1658,7 +1665,7 @@ int bgp_afi_safi_peer_exists(struct bgp *bgp, afi_t afi, safi_t safi) /* Change peer's AS number. */ void peer_as_change(struct peer *peer, as_t as, int as_specified) { - bgp_peer_sort_t type; + bgp_peer_sort_t origtype, newtype; /* Stop peer. */ if (!CHECK_FLAG(peer->sflags, PEER_STATUS_GROUP)) { @@ -1669,7 +1676,7 @@ void peer_as_change(struct peer *peer, as_t as, int as_specified) } else bgp_session_reset(peer); } - type = peer_sort(peer); + origtype = peer_sort_lookup(peer); peer->as = as; peer->as_type = as_specified; @@ -1680,21 +1687,22 @@ void peer_as_change(struct peer *peer, as_t as, int as_specified) else peer->local_as = peer->bgp->as; + newtype = peer_sort(peer); /* Advertisement-interval reset */ if (!CHECK_FLAG(peer->flags, PEER_FLAG_ROUTEADV)) { - peer->v_routeadv = (peer_sort(peer) == BGP_PEER_IBGP) + peer->v_routeadv = (newtype == BGP_PEER_IBGP) ? BGP_DEFAULT_IBGP_ROUTEADV : BGP_DEFAULT_EBGP_ROUTEADV; } /* TTL reset */ - if (peer_sort(peer) == BGP_PEER_IBGP) + if (newtype == BGP_PEER_IBGP) peer->ttl = MAXTTL; - else if (type == BGP_PEER_IBGP) + else if (origtype == BGP_PEER_IBGP) peer->ttl = BGP_DEFAULT_TTL; /* reflector-client reset */ - if (peer_sort(peer) != BGP_PEER_IBGP) { + if (newtype != BGP_PEER_IBGP) { UNSET_FLAG(peer->af_flags[AFI_IP][SAFI_UNICAST], PEER_FLAG_REFLECTOR_CLIENT); UNSET_FLAG(peer->af_flags[AFI_IP][SAFI_MULTICAST], @@ -1724,7 +1732,7 @@ void peer_as_change(struct peer *peer, as_t as, int as_specified) } /* local-as reset */ - if (peer_sort(peer) != BGP_PEER_EBGP) { + if (newtype != BGP_PEER_EBGP) { peer->change_local_as = 0; peer_flag_unset(peer, PEER_FLAG_LOCAL_AS); peer_flag_unset(peer, PEER_FLAG_LOCAL_AS_NO_PREPEND); @@ -2720,6 +2728,7 @@ int peer_group_bind(struct bgp *bgp, union sockunion *su, struct peer *peer, int first_member = 0; afi_t afi; safi_t safi; + bgp_peer_sort_t ptype, gtype; /* Lookup the peer. */ if (!peer) @@ -2748,15 +2757,16 @@ int peer_group_bind(struct bgp *bgp, union sockunion *su, struct peer *peer, peer->sort = group->conf->sort; } - if (!group->conf->as && peer_sort(peer)) { - if (peer_sort(group->conf) != BGP_PEER_INTERNAL - && peer_sort(group->conf) != peer_sort(peer)) { + ptype = peer_sort(peer); + if (!group->conf->as && ptype != BGP_PEER_UNSPECIFIED) { + gtype = peer_sort(group->conf); + if ((gtype != BGP_PEER_INTERNAL) && (gtype != ptype)) { if (as) *as = peer->as; return BGP_ERR_PEER_GROUP_PEER_TYPE_DIFFERENT; } - if (peer_sort(group->conf) == BGP_PEER_INTERNAL) + if (gtype == BGP_PEER_INTERNAL) first_member = 1; } @@ -2788,22 +2798,22 @@ int peer_group_bind(struct bgp *bgp, union sockunion *su, struct peer *peer, } if (first_member) { + gtype = peer_sort(group->conf); /* Advertisement-interval reset */ if (!CHECK_FLAG(group->conf->flags, PEER_FLAG_ROUTEADV)) { group->conf->v_routeadv = - (peer_sort(group->conf) - == BGP_PEER_IBGP) + (gtype == BGP_PEER_IBGP) ? BGP_DEFAULT_IBGP_ROUTEADV : BGP_DEFAULT_EBGP_ROUTEADV; } /* ebgp-multihop reset */ - if (peer_sort(group->conf) == BGP_PEER_IBGP) + if (gtype == BGP_PEER_IBGP) group->conf->ttl = MAXTTL; /* local-as reset */ - if (peer_sort(group->conf) != BGP_PEER_EBGP) { + if (gtype != BGP_PEER_EBGP) { group->conf->change_local_as = 0; peer_flag_unset(group->conf, PEER_FLAG_LOCAL_AS); @@ -4125,6 +4135,7 @@ static int peer_af_flag_modify(struct peer *peer, afi_t afi, safi_t safi, struct peer *member; struct listnode *node, *nnode; struct peer_flag_action action; + bgp_peer_sort_t ptype; memset(&action, 0, sizeof(struct peer_flag_action)); size = sizeof peer_af_flag_action_list @@ -4138,18 +4149,17 @@ static int peer_af_flag_modify(struct peer *peer, afi_t afi, safi_t safi, if (!found) return BGP_ERR_INVALID_FLAG; + ptype = peer_sort(peer); /* Special check for reflector client. */ - if (flag & PEER_FLAG_REFLECTOR_CLIENT - && peer_sort(peer) != BGP_PEER_IBGP) + if (flag & PEER_FLAG_REFLECTOR_CLIENT && ptype != BGP_PEER_IBGP) return BGP_ERR_NOT_INTERNAL_PEER; /* Special check for remove-private-AS. */ - if (flag & PEER_FLAG_REMOVE_PRIVATE_AS - && peer_sort(peer) == BGP_PEER_IBGP) + if (flag & PEER_FLAG_REMOVE_PRIVATE_AS && ptype == BGP_PEER_IBGP) return BGP_ERR_REMOVE_PRIVATE_AS; /* as-override is not allowed for IBGP peers */ - if (flag & PEER_FLAG_AS_OVERRIDE && peer_sort(peer) == BGP_PEER_IBGP) + if (flag & PEER_FLAG_AS_OVERRIDE && ptype == BGP_PEER_IBGP) return BGP_ERR_AS_OVERRIDE; /* Handle flag updates where desired state matches current state. */ @@ -5316,9 +5326,9 @@ int peer_local_as_set(struct peer *peer, as_t as, int no_prepend, struct bgp *bgp = peer->bgp; struct peer *member; struct listnode *node, *nnode; + bgp_peer_sort_t ptype = peer_sort(peer); - if (peer_sort(peer) != BGP_PEER_EBGP - && peer_sort(peer) != BGP_PEER_INTERNAL) + if (ptype != BGP_PEER_EBGP && ptype != BGP_PEER_INTERNAL) return BGP_ERR_LOCAL_AS_ALLOWED_ONLY_FOR_EBGP; if (bgp->as == as) diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index d47ae71582..40bf9bcd23 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -1351,6 +1351,7 @@ struct peer { #define PEER_DOWN_NOAFI_ACTIVATED 30 /* No AFI/SAFI activated for peer */ #define PEER_DOWN_AS_SETS_REJECT 31 /* Reject routes with AS_SET */ #define PEER_DOWN_WAITING_OPEN 32 /* Waiting for open to succeed */ +#define PEER_DOWN_PFX_COUNT 33 /* Reached received prefix count */ /* * Remember to update peer_down_str in bgp_fsm.c when you add * a new value to the last_reset reason @@ -1731,6 +1732,8 @@ extern struct peer *peer_unlock_with_caller(const char *, struct peer *); #define peer_lock(B) peer_lock_with_caller(__FUNCTION__, (B)) extern bgp_peer_sort_t peer_sort(struct peer *peer); +extern bgp_peer_sort_t peer_sort_lookup(struct peer *peer); + extern int peer_active(struct peer *); extern int peer_active_nego(struct peer *); extern void bgp_recalculate_all_bestpaths(struct bgp *bgp); diff --git a/configure.ac b/configure.ac index c8371f304e..901cac2318 100755 --- a/configure.ac +++ b/configure.ac @@ -543,7 +543,7 @@ AC_ARG_ENABLE([realms], AC_ARG_ENABLE([rtadv], AS_HELP_STRING([--disable-rtadv], [disable IPV6 router advertisement feature])) AC_ARG_ENABLE([irdp], - AS_HELP_STRING([--disable-irdp], [enable IRDP server support in zebra (default if supported)])) + AS_HELP_STRING([--disable-irdp], [disable IRDP server support in zebra (enabled by default if supported)])) AC_ARG_ENABLE([capabilities], AS_HELP_STRING([--disable-capabilities], [disable using POSIX capabilities])) AC_ARG_ENABLE([rusage], diff --git a/doc/developer/library.rst b/doc/developer/library.rst index a904a4e778..3d5c6a2a15 100644 --- a/doc/developer/library.rst +++ b/doc/developer/library.rst @@ -15,5 +15,6 @@ Library Facilities (libfrr) hooks cli modules + lua diff --git a/doc/developer/lists.rst b/doc/developer/lists.rst index 5f020060ce..853c65ddf3 100644 --- a/doc/developer/lists.rst +++ b/doc/developer/lists.rst @@ -1,3 +1,5 @@ +.. _lists: + List implementations ==================== diff --git a/doc/developer/locking.rst b/doc/developer/locking.rst index aee05aae06..d698789f9f 100644 --- a/doc/developer/locking.rst +++ b/doc/developer/locking.rst @@ -1,3 +1,5 @@ +.. _locking: + Locking ======= diff --git a/doc/developer/logging.rst b/doc/developer/logging.rst index e393fe6fba..db577c9216 100644 --- a/doc/developer/logging.rst +++ b/doc/developer/logging.rst @@ -1,3 +1,5 @@ +.. _logging: + Developer's Guide to Logging ============================ diff --git a/doc/developer/lua.rst b/doc/developer/lua.rst new file mode 100644 index 0000000000..23eb35fc58 --- /dev/null +++ b/doc/developer/lua.rst @@ -0,0 +1,65 @@ +.. _lua: + +Lua +=== + +Lua is currently experimental within FRR and has very limited +support. If you would like to compile FRR with Lua you must +follow these steps: + +1. Installation of Relevant Libraries + + .. code-block:: shell + + apt-get install lua5.3 liblua5-3 liblua5.3-dev + + These are the Debian libraries that are needed. There should + be equivalent RPM's that can be found + +2. Compilation + + Configure needs these options + + .. code-block:: shell + + ./configure --enable-dev-build --enable-lua <all other interesting options> + + Typically you just include the two new enable lines to build with it. + +3. Using Lua + + * Copy tools/lua.scr into /etc/frr + + * Create a route-map match command + + .. code-block:: console + + ! + router bgp 55 + neighbor 10.50.11.116 remote-as external + address-family ipv4 unicast + neighbor 10.50.11.116 route-map TEST in + exit-address-family + ! + route-map TEST permit 10 + match command mooey + ! + + * In the lua.scr file make sure that you have a function named 'mooey' + + .. code-block:: console + + function mooey () + zlog_debug(string.format("afi: %d: %s %d ifdx: %d aspath: %s localpref: %d", + prefix.family, prefix.route, nexthop.metric, + nexthop.ifindex, nexthop.aspath, nexthop.localpref)) + + nexthop.metric = 33 + nexthop.localpref = 13 + return 3 + end + +4. General Comments + + Please be aware that this is extremely experimental and needs a ton of work + to get this up into a state that is usable. diff --git a/doc/developer/modules.rst b/doc/developer/modules.rst index 763d8b1b8d..02330ddfe4 100644 --- a/doc/developer/modules.rst +++ b/doc/developer/modules.rst @@ -1,3 +1,5 @@ +.. _modules: + Modules ======= diff --git a/doc/developer/static-linking.rst b/doc/developer/static-linking.rst index bc33207b38..1e45c48dc3 100644 --- a/doc/developer/static-linking.rst +++ b/doc/developer/static-linking.rst @@ -10,7 +10,7 @@ likely to be present on a given platform - libfrr and libyang. The resultant binaries should still be fairly portable. For example, here is the DSO dependency list for `bgpd` after using these steps: -.. code-block:: +.. code-block:: shell $ ldd bgpd linux-vdso.so.1 (0x00007ffe3a989000) @@ -56,7 +56,7 @@ usable for our purposes. So download ``libpcre`` from `SourceForge <https://sourceforge.net/projects/pcre/>`_, and build it like this: -.. code-block:: +.. code-block:: shell ./configure --with-pic make diff --git a/doc/developer/subdir.am b/doc/developer/subdir.am index 791f7679a6..538a290c34 100644 --- a/doc/developer/subdir.am +++ b/doc/developer/subdir.am @@ -34,6 +34,7 @@ dev_RSTFILES = \ doc/developer/lists.rst \ doc/developer/locking.rst \ doc/developer/logging.rst \ + doc/developer/lua.rst \ doc/developer/memtypes.rst \ doc/developer/modules.rst \ doc/developer/next-hop-tracking.rst \ diff --git a/doc/user/installation.rst b/doc/user/installation.rst index 392a2dd784..9cd1210529 100644 --- a/doc/user/installation.rst +++ b/doc/user/installation.rst @@ -144,6 +144,11 @@ options from the list below. Build watchfrr with systemd integration, this will allow FRR to communicate with systemd to tell systemd if FRR has come up properly. +.. option:: --enable-werror + + Build with all warnings converted to errors as a compile option. This + is recommended for developers only. + .. option:: --disable-pimd Turn off building of pimd. On some BSD platforms pimd will not build properly due @@ -181,6 +186,10 @@ options from the list below. Turn off bgpd's ability to use VNC. +.. option:: --disable-bgp-bmp + + Turn off BGP BMP support + .. option:: --enable-datacenter Enable system defaults to work as if in a Data Center. See defaults.h @@ -217,6 +226,11 @@ options from the list below. realm value when inserting into the Linux kernel. Then routing policy can be assigned to the realm. See the tc man page. +.. option:: --disable-irdp + + Disable IRDP server support. This is enabled by default if we have + both `struct in_pktinfo` and `struct icmphdr` available to us. + .. option:: --disable-rtadv Disable support IPV6 router advertisement in zebra. @@ -302,6 +316,18 @@ options from the list below. Build the Sysrepo northbound plugin. +.. option:: --enable-grpc + + Enable the gRPC northbound plugin. + +.. option:: --enable-zeromq + + Enable the ZeroMQ handler. + +.. option:: --with-libpam + + Use libpam for PAM support in vtysh. + .. option:: --enable-time-check XXX When this is enabled with a XXX value in microseconds, any thread that @@ -319,6 +345,10 @@ options from the list below. load might see improvement in behavior. Be aware that `show thread cpu` is considered a good data gathering tool from the perspective of developers. +.. option:: --enable-pcreposix + + Turn on the usage of PCRE Posix libs for regex functionality. + You may specify any combination of the above options to the configure script. By default, the executables are placed in :file:`/usr/local/sbin` and the configuration files in :file:`/usr/local/etc`. The :file:`/usr/local/` diff --git a/doc/user/pim.rst b/doc/user/pim.rst index 6bda692607..9876216736 100644 --- a/doc/user/pim.rst +++ b/doc/user/pim.rst @@ -461,6 +461,18 @@ cause great confusion. Display upstream information for S,G's and the RPF data associated with them. +.. index:: show ip pim [vrf NAME] mlag upstream [A.B.C.D [A.B.C.D]] [json] +.. clicmd:: show ip pim mlag upstream + + Display upstream entries that are synced across MLAG switches. + Allow the user to specify sub Source and Groups address filters. + +.. index:: show ip pim mlag summary +.. clicmd:: show ip pim mlag summary + + Display PIM MLAG (multi-chassis link aggregation) session status and + control message statistics. + .. index:: show ip pim bsr .. clicmd:: show ip pim bsr diff --git a/ldpd/ldp_zebra.c b/ldpd/ldp_zebra.c index 946b51e4ee..b3ccb77602 100644 --- a/ldpd/ldp_zebra.c +++ b/ldpd/ldp_zebra.c @@ -106,7 +106,7 @@ static int ldp_zebra_send_mpls_labels(int cmd, struct kroute *kr) { struct zapi_labels zl = {}; - struct zapi_nexthop_label *znh; + struct zapi_nexthop *znh; if (kr->local_label < MPLS_LABEL_RESERVED_MAX || kr->remote_label == NO_LABEL) @@ -143,16 +143,14 @@ ldp_zebra_send_mpls_labels(int cmd, struct kroute *kr) znh = &zl.nexthops[0]; switch (kr->af) { case AF_INET: - znh->family = AF_INET; - znh->address.ipv4 = kr->nexthop.v4; + znh->gate.ipv4 = kr->nexthop.v4; if (kr->ifindex) znh->type = NEXTHOP_TYPE_IPV4_IFINDEX; else znh->type = NEXTHOP_TYPE_IPV4; break; case AF_INET6: - znh->family = AF_INET6; - znh->address.ipv6 = kr->nexthop.v6; + znh->gate.ipv6 = kr->nexthop.v6; if (kr->ifindex) znh->type = NEXTHOP_TYPE_IPV6_IFINDEX; else @@ -162,7 +160,8 @@ ldp_zebra_send_mpls_labels(int cmd, struct kroute *kr) break; } znh->ifindex = kr->ifindex; - znh->label = kr->remote_label; + znh->label_num = 1; + znh->labels[0] = kr->remote_label; return zebra_send_mpls_labels(zclient, cmd, &zl); } diff --git a/lib/frrstr.c b/lib/frrstr.c index 8a72a35af0..7ef5fffd12 100644 --- a/lib/frrstr.c +++ b/lib/frrstr.c @@ -25,7 +25,11 @@ #include <string.h> #include <ctype.h> #include <sys/types.h> +#ifdef HAVE_LIBPCREPOSIX +#include <pcreposix.h> +#else #include <regex.h> +#endif /* HAVE_LIBPCREPOSIX */ #include "frrstr.h" #include "memory.h" diff --git a/lib/frrstr.h b/lib/frrstr.h index 3a935c90cb..441d7b8670 100644 --- a/lib/frrstr.h +++ b/lib/frrstr.h @@ -22,7 +22,12 @@ #define _FRRSTR_H_ #include <sys/types.h> +#include <sys/types.h> +#ifdef HAVE_LIBPCREPOSIX +#include <pcreposix.h> +#else #include <regex.h> +#endif /* HAVE_LIBPCREPOSIX */ #include <stdbool.h> #include "vector.h" @@ -1092,6 +1092,11 @@ static const struct zebra_desc_table command_types[] = { DESC_ENTRY(ZEBRA_VXLAN_SG_ADD), DESC_ENTRY(ZEBRA_VXLAN_SG_DEL), DESC_ENTRY(ZEBRA_VXLAN_SG_REPLAY), + DESC_ENTRY(ZEBRA_MLAG_PROCESS_UP), + DESC_ENTRY(ZEBRA_MLAG_PROCESS_DOWN), + DESC_ENTRY(ZEBRA_MLAG_CLIENT_REGISTER), + DESC_ENTRY(ZEBRA_MLAG_CLIENT_UNREGISTER), + DESC_ENTRY(ZEBRA_MLAG_FORWARD_MSG), DESC_ENTRY(ZEBRA_ERROR), DESC_ENTRY(ZEBRA_CLIENT_CAPABILITIES)}; #undef DESC_ENTRY @@ -23,7 +23,12 @@ #include <lib/version.h> #include <sys/types.h> +#include <sys/types.h> +#ifdef HAVE_LIBPCREPOSIX +#include <pcreposix.h> +#else #include <regex.h> +#endif /* HAVE_LIBPCREPOSIX */ #include <stdio.h> #include "linklist.h" @@ -22,7 +22,11 @@ #define _ZEBRA_VTY_H #include <sys/types.h> +#ifdef HAVE_LIBPCREPOSIX +#include <pcreposix.h> +#else #include <regex.h> +#endif /* HAVE_LIBPCREPOSIX */ #include "thread.h" #include "log.h" diff --git a/lib/zclient.c b/lib/zclient.c index 4f2ad959dc..b0d2ea43a2 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -2602,7 +2602,7 @@ int zebra_send_mpls_labels(struct zclient *zclient, int cmd, int zapi_labels_encode(struct stream *s, int cmd, struct zapi_labels *zl) { - struct zapi_nexthop_label *znh; + struct zapi_nexthop *znh; stream_reset(s); @@ -2631,20 +2631,8 @@ int zapi_labels_encode(struct stream *s, int cmd, struct zapi_labels *zl) for (int i = 0; i < zl->nexthop_num; i++) { znh = &zl->nexthops[i]; - stream_putc(s, znh->type); - stream_putw(s, znh->family); - switch (znh->family) { - case AF_INET: - stream_put_in_addr(s, &znh->address.ipv4); - break; - case AF_INET6: - stream_write(s, (uint8_t *)&znh->address.ipv6, 16); - break; - default: - break; - } - stream_putl(s, znh->ifindex); - stream_putl(s, znh->label); + if (zapi_nexthop_encode(s, znh, 0) < 0) + return -1; } /* Put length at the first point of the stream. */ @@ -2655,7 +2643,7 @@ int zapi_labels_encode(struct stream *s, int cmd, struct zapi_labels *zl) int zapi_labels_decode(struct stream *s, struct zapi_labels *zl) { - struct zapi_nexthop_label *znh; + struct zapi_nexthop *znh; memset(zl, 0, sizeof(*zl)); @@ -2719,21 +2707,8 @@ int zapi_labels_decode(struct stream *s, struct zapi_labels *zl) for (int i = 0; i < zl->nexthop_num; i++) { znh = &zl->nexthops[i]; - STREAM_GETC(s, znh->type); - STREAM_GETW(s, znh->family); - switch (znh->family) { - case AF_INET: - STREAM_GET(&znh->address.ipv4.s_addr, s, - IPV4_MAX_BYTELEN); - break; - case AF_INET6: - STREAM_GET(&znh->address.ipv6, s, 16); - break; - default: - break; - } - STREAM_GETL(s, znh->ifindex); - STREAM_GETL(s, znh->label); + if (zapi_nexthop_decode(s, znh, 0) < 0) + return -1; } return 0; diff --git a/lib/zclient.h b/lib/zclient.h index 9a230d3f34..e6f4c747e3 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -458,14 +458,6 @@ struct zapi_route { uint32_t tableid; }; -struct zapi_nexthop_label { - enum nexthop_types_t type; - int family; - union g_addr address; - ifindex_t ifindex; - mpls_label_t label; -}; - struct zapi_labels { uint8_t message; #define ZAPI_LABELS_FTN 0x01 @@ -476,8 +468,9 @@ struct zapi_labels { uint8_t type; unsigned short instance; } route; + uint16_t nexthop_num; - struct zapi_nexthop_label nexthops[MULTIPATH_NUM]; + struct zapi_nexthop nexthops[MULTIPATH_NUM]; }; struct zapi_pw { diff --git a/nhrpd/nhrp_vty.c b/nhrpd/nhrp_vty.c index ba8c5d4953..8438f3cb4a 100644 --- a/nhrpd/nhrp_vty.c +++ b/nhrpd/nhrp_vty.c @@ -513,13 +513,15 @@ DEFUN(if_nhrp_map, if_nhrp_map_cmd, } DEFUN(if_no_nhrp_map, if_no_nhrp_map_cmd, - "no " AFI_CMD " nhrp map <A.B.C.D|X:X::X:X>", + "no " AFI_CMD " nhrp map <A.B.C.D|X:X::X:X> [<A.B.C.D|local>]", NO_STR AFI_STR NHRP_STR "Nexthop Server configuration\n" "IPv4 protocol address\n" - "IPv6 protocol address\n") + "IPv6 protocol address\n" + "IPv4 NBMA address\n" + "Handle protocol address locally\n") { VTY_DECLVAR_CONTEXT(interface, ifp); afi_t afi = cmd_to_afi(argv[1]); diff --git a/ospfd/ospf_sr.c b/ospfd/ospf_sr.c index ff2039bec8..b5a54a0bc4 100644 --- a/ospfd/ospf_sr.c +++ b/ospfd/ospf_sr.c @@ -609,7 +609,7 @@ static int compute_prefix_nhlfe(struct sr_prefix *srp) static int ospf_zebra_send_mpls_labels(int cmd, struct sr_nhlfe nhlfe) { struct zapi_labels zl = {}; - struct zapi_nexthop_label *znh; + struct zapi_nexthop *znh; if (IS_DEBUG_OSPF_SR) zlog_debug(" |- %s LSP %u/%u for %s/%u via %u", @@ -631,10 +631,10 @@ static int ospf_zebra_send_mpls_labels(int cmd, struct sr_nhlfe nhlfe) zl.nexthop_num = 1; znh = &zl.nexthops[0]; znh->type = NEXTHOP_TYPE_IPV4_IFINDEX; - znh->family = AF_INET; - znh->address.ipv4 = nhlfe.nexthop; + znh->gate.ipv4 = nhlfe.nexthop; znh->ifindex = nhlfe.ifindex; - znh->label = nhlfe.label_out; + znh->label_num = 1; + znh->labels[0] = nhlfe.label_out; return zebra_send_mpls_labels(zclient, cmd, &zl); } diff --git a/pimd/pim_cmd.c b/pimd/pim_cmd.c index 6508fb4453..45d479c297 100644 --- a/pimd/pim_cmd.c +++ b/pimd/pim_cmd.c @@ -166,7 +166,7 @@ static void pim_if_membership_refresh(struct interface *ifp) sg.src = src->source_addr; sg.grp = grp->group_addr; pim_ifchannel_local_membership_add(ifp, - &sg); + &sg, false /*is_vxlan*/); } } /* scan group sources */ @@ -4625,6 +4625,379 @@ DEFUN (show_ip_pim_local_membership, return CMD_SUCCESS; } +DEFUN (show_ip_pim_mlag_summary, + show_ip_pim_mlag_summary_cmd, + "show ip pim mlag summary [json]", + SHOW_STR + IP_STR + PIM_STR + "MLAG\n" + "status and stats\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + char role_buf[MLAG_ROLE_STRSIZE]; + char addr_buf[INET_ADDRSTRLEN]; + + if (uj) { + json_object *json = NULL; + json_object *json_stat = NULL; + + json = json_object_new_object(); + if (router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP) + json_object_boolean_true_add(json, "mlagConnUp"); + if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) + json_object_boolean_true_add(json, "mlagPeerConnUp"); + if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) + json_object_boolean_true_add(json, "mlagPeerZebraUp"); + json_object_string_add(json, "mlagRole", + mlag_role2str(router->mlag_role, + role_buf, sizeof(role_buf))); + inet_ntop(AF_INET, &router->local_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + json_object_string_add(json, "localVtepIp", addr_buf); + inet_ntop(AF_INET, &router->anycast_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + json_object_string_add(json, "anycastVtepIp", addr_buf); + json_object_string_add(json, "peerlinkRif", + router->peerlink_rif); + + json_stat = json_object_new_object(); + json_object_int_add(json_stat, "mlagConnFlaps", + router->mlag_stats.mlagd_session_downs); + json_object_int_add(json_stat, "mlagPeerConnFlaps", + router->mlag_stats.peer_session_downs); + json_object_int_add(json_stat, "mlagPeerZebraFlaps", + router->mlag_stats.peer_zebra_downs); + json_object_int_add(json_stat, "mrouteAddRx", + router->mlag_stats.msg.mroute_add_rx); + json_object_int_add(json_stat, "mrouteAddTx", + router->mlag_stats.msg.mroute_add_tx); + json_object_int_add(json_stat, "mrouteDelRx", + router->mlag_stats.msg.mroute_del_rx); + json_object_int_add(json_stat, "mrouteDelTx", + router->mlag_stats.msg.mroute_del_tx); + json_object_int_add(json_stat, "mlagStatusUpdates", + router->mlag_stats.msg.mlag_status_updates); + json_object_int_add(json_stat, "peerZebraStatusUpdates", + router->mlag_stats.msg.peer_zebra_status_updates); + json_object_int_add(json_stat, "pimStatusUpdates", + router->mlag_stats.msg.pim_status_updates); + json_object_int_add(json_stat, "vxlanUpdates", + router->mlag_stats.msg.vxlan_updates); + json_object_object_add(json, "connStats", json_stat); + + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + return CMD_SUCCESS; + } + + vty_out(vty, "MLAG daemon connection: %s\n", + (router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP) + ? "up" : "down"); + vty_out(vty, "MLAG peer state: %s\n", + (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) + ? "up" : "down"); + vty_out(vty, "Zebra peer state: %s\n", + (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) + ? "up" : "down"); + vty_out(vty, "MLAG role: %s\n", + mlag_role2str(router->mlag_role, role_buf, sizeof(role_buf))); + inet_ntop(AF_INET, &router->local_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + vty_out(vty, "Local VTEP IP: %s\n", addr_buf); + inet_ntop(AF_INET, &router->anycast_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + vty_out(vty, "Anycast VTEP IP: %s\n", addr_buf); + vty_out(vty, "Peerlink: %s\n", router->peerlink_rif); + vty_out(vty, "Session flaps: mlagd: %d mlag-peer: %d zebra-peer: %d\n", + router->mlag_stats.mlagd_session_downs, + router->mlag_stats.peer_session_downs, + router->mlag_stats.peer_zebra_downs); + vty_out(vty, "Message Statistics:\n"); + vty_out(vty, " mroute adds: rx: %d, tx: %d\n", + router->mlag_stats.msg.mroute_add_rx, + router->mlag_stats.msg.mroute_add_tx); + vty_out(vty, " mroute dels: rx: %d, tx: %d\n", + router->mlag_stats.msg.mroute_del_rx, + router->mlag_stats.msg.mroute_del_tx); + vty_out(vty, " peer zebra status updates: %d\n", + router->mlag_stats.msg.peer_zebra_status_updates); + vty_out(vty, " PIM status updates: %d\n", + router->mlag_stats.msg.pim_status_updates); + vty_out(vty, " VxLAN updates: %d\n", + router->mlag_stats.msg.vxlan_updates); + + return CMD_SUCCESS; +} + +static void pim_show_mlag_up_entry_detail(struct vrf *vrf, + struct vty *vty, struct pim_upstream *up, + char *src_str, char *grp_str, json_object *json) +{ + if (json) { + json_object *json_row = NULL; + json_object *own_list = NULL; + json_object *json_group = NULL; + + + json_object_object_get_ex(json, grp_str, &json_group); + if (!json_group) { + json_group = json_object_new_object(); + json_object_object_add(json, grp_str, + json_group); + } + + json_row = json_object_new_object(); + json_object_string_add(json_row, "source", src_str); + json_object_string_add(json_row, "group", grp_str); + + own_list = json_object_new_array(); + if (pim_up_mlag_is_local(up)) + json_object_array_add(own_list, + json_object_new_string("local")); + if (up->flags & (PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)) + json_object_array_add(own_list, + json_object_new_string("peer")); + if (up->flags & (PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE)) + json_object_array_add( + own_list, json_object_new_string("Interface")); + json_object_object_add(json_row, "owners", own_list); + + json_object_int_add(json_row, "localCost", + pim_up_mlag_local_cost(up)); + json_object_int_add(json_row, "peerCost", + pim_up_mlag_peer_cost(up)); + if (PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags)) + json_object_boolean_false_add(json_row, "df"); + else + json_object_boolean_true_add(json_row, "df"); + json_object_object_add(json_group, src_str, json_row); + } else { + char own_str[6]; + + own_str[0] = '\0'; + if (pim_up_mlag_is_local(up)) + strcpy(own_str + strlen(own_str), "L"); + if (up->flags & (PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)) + strcpy(own_str + strlen(own_str), "P"); + if (up->flags & (PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE)) + strcpy(own_str + strlen(own_str), "I"); + /* XXX - fixup, print paragraph output */ + vty_out(vty, + "%-15s %-15s %-6s %-11u %-10d %2s\n", + src_str, grp_str, own_str, + pim_up_mlag_local_cost(up), + pim_up_mlag_peer_cost(up), + PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags) + ? "n" : "y"); + } +} + +static void pim_show_mlag_up_detail(struct vrf *vrf, + struct vty *vty, const char *src_or_group, + const char *group, bool uj) +{ + char src_str[INET_ADDRSTRLEN]; + char grp_str[INET_ADDRSTRLEN]; + struct pim_upstream *up; + struct pim_instance *pim = vrf->info; + json_object *json = NULL; + + if (uj) + json = json_object_new_object(); + else + vty_out(vty, + "Source Group Owner Local-cost Peer-cost DF\n"); + + frr_each (rb_pim_upstream, &pim->upstream_head, up) { + if (!(up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_PEER) + && !(up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE) + && !pim_up_mlag_is_local(up)) + continue; + + pim_inet4_dump("<src?>", up->sg.src, src_str, sizeof(src_str)); + pim_inet4_dump("<grp?>", up->sg.grp, grp_str, sizeof(grp_str)); + /* XXX: strcmps are clearly inefficient. we should do uint comps + * here instead. + */ + if (group) { + if (strcmp(src_str, src_or_group) || + strcmp(grp_str, group)) + continue; + } else { + if (strcmp(src_str, src_or_group) && + strcmp(grp_str, src_or_group)) + continue; + } + pim_show_mlag_up_entry_detail(vrf, vty, up, + src_str, grp_str, json); + } + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +static void pim_show_mlag_up_vrf(struct vrf *vrf, struct vty *vty, bool uj) +{ + json_object *json = NULL; + json_object *json_row; + struct pim_upstream *up; + char src_str[INET_ADDRSTRLEN]; + char grp_str[INET_ADDRSTRLEN]; + struct pim_instance *pim = vrf->info; + json_object *json_group = NULL; + + if (uj) { + json = json_object_new_object(); + } else { + vty_out(vty, + "Source Group Owner Local-cost Peer-cost DF\n"); + } + + frr_each (rb_pim_upstream, &pim->upstream_head, up) { + if (!(up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_PEER) + && !(up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE) + && !pim_up_mlag_is_local(up)) + continue; + pim_inet4_dump("<src?>", up->sg.src, src_str, sizeof(src_str)); + pim_inet4_dump("<grp?>", up->sg.grp, grp_str, sizeof(grp_str)); + if (uj) { + json_object *own_list = NULL; + + json_object_object_get_ex(json, grp_str, &json_group); + if (!json_group) { + json_group = json_object_new_object(); + json_object_object_add(json, grp_str, + json_group); + } + + json_row = json_object_new_object(); + json_object_string_add(json_row, "vrf", vrf->name); + json_object_string_add(json_row, "source", src_str); + json_object_string_add(json_row, "group", grp_str); + + own_list = json_object_new_array(); + if (pim_up_mlag_is_local(up)) { + + json_object_array_add(own_list, + json_object_new_string("local")); + } + if (up->flags & (PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)) { + json_object_array_add(own_list, + json_object_new_string("peer")); + } + json_object_object_add(json_row, "owners", own_list); + + json_object_int_add(json_row, "localCost", + pim_up_mlag_local_cost(up)); + json_object_int_add(json_row, "peerCost", + pim_up_mlag_peer_cost(up)); + if (PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags)) + json_object_boolean_false_add(json_row, "df"); + else + json_object_boolean_true_add(json_row, "df"); + json_object_object_add(json_group, src_str, json_row); + } else { + char own_str[6]; + + own_str[0] = '\0'; + if (pim_up_mlag_is_local(up)) + strcpy(own_str + strlen(own_str), "L"); + if (up->flags & (PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)) + strcpy(own_str + strlen(own_str), "P"); + if (up->flags & (PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE)) + strcpy(own_str + strlen(own_str), "I"); + vty_out(vty, + "%-15s %-15s %-6s %-11u %-10u %2s\n", + src_str, grp_str, own_str, + pim_up_mlag_local_cost(up), + pim_up_mlag_peer_cost(up), + PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags) + ? "n" : "y"); + } + } + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +static void pim_show_mlag_help_string(struct vty *vty, bool uj) +{ + if (!uj) { + vty_out(vty, "Owner codes:\n"); + vty_out(vty, + "L: EVPN-MLAG Entry, I:PIM-MLAG Entry, " + "P: Peer Entry\n"); + } +} + + +DEFUN(show_ip_pim_mlag_up, show_ip_pim_mlag_up_cmd, + "show ip pim [vrf NAME] mlag upstream [A.B.C.D [A.B.C.D]] [json]", + SHOW_STR + IP_STR + PIM_STR + VRF_CMD_HELP_STR + "MLAG\n" + "upstream\n" + "Unicast or Multicast address\n" + "Multicast address\n" JSON_STR) +{ + const char *src_or_group = NULL; + const char *group = NULL; + int idx = 2; + struct vrf *vrf = pim_cmd_lookup_vrf(vty, argv, argc, &idx); + bool uj = use_json(argc, argv); + + if (!vrf || !vrf->info) { + vty_out(vty, "%s: VRF or Info missing\n", __func__); + return CMD_WARNING; + } + + if (uj) + argc--; + + if (argv_find(argv, argc, "A.B.C.D", &idx)) { + src_or_group = argv[idx]->arg; + if (idx + 1 < argc) + group = argv[idx + 1]->arg; + } + + pim_show_mlag_help_string(vty, uj); + + if (src_or_group || group) + pim_show_mlag_up_detail(vrf, vty, src_or_group, group, uj); + else + pim_show_mlag_up_vrf(vrf, vty, uj); + + return CMD_SUCCESS; +} + + +DEFUN(show_ip_pim_mlag_up_vrf_all, show_ip_pim_mlag_up_vrf_all_cmd, + "show ip pim vrf all mlag upstream [json]", + SHOW_STR IP_STR PIM_STR VRF_CMD_HELP_STR + "MLAG\n" + "upstream\n" JSON_STR) +{ + struct vrf *vrf; + bool uj = use_json(argc, argv); + + pim_show_mlag_help_string(vty, uj); + RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { + pim_show_mlag_up_vrf(vrf, vty, uj); + } + + return CMD_SUCCESS; +} + DEFUN (show_ip_pim_neighbor, show_ip_pim_neighbor_cmd, "show ip pim [vrf NAME] neighbor [detail|WORD] [json]", @@ -5294,7 +5667,7 @@ static void pim_cmd_show_ip_multicast_helper(struct pim_instance *pim, pim = vrf->info; vty_out(vty, "Router MLAG Role: %s\n", - mlag_role2str(router->role, mlag_role, sizeof(mlag_role))); + mlag_role2str(router->mlag_role, mlag_role, sizeof(mlag_role))); vty_out(vty, "Mroute socket descriptor:"); vty_out(vty, " %d(%s)\n", pim->mroute_socket, vrf->name); @@ -10259,7 +10632,7 @@ DEFUN_HIDDEN (no_ip_pim_mlag, addr.s_addr = 0; pim_vxlan_mlag_update(true/*mlag_enable*/, - false/*peer_state*/, PIM_VXLAN_MLAG_ROLE_SECONDARY, + false/*peer_state*/, MLAG_ROLE_NONE, NULL/*peerlink*/, &addr); return CMD_SUCCESS; @@ -10299,9 +10672,9 @@ DEFUN_HIDDEN (ip_pim_mlag, idx += 2; if (!strcmp(argv[idx]->arg, "primary")) { - role = PIM_VXLAN_MLAG_ROLE_PRIMARY; + role = MLAG_ROLE_PRIMARY; } else if (!strcmp(argv[idx]->arg, "secondary")) { - role = PIM_VXLAN_MLAG_ROLE_SECONDARY; + role = MLAG_ROLE_SECONDARY; } else { vty_out(vty, "unknown MLAG role %s\n", argv[idx]->arg); return CMD_WARNING; @@ -10471,6 +10844,9 @@ void pim_cmd_init(void) install_element(VIEW_NODE, &show_ip_pim_join_vrf_all_cmd); install_element(VIEW_NODE, &show_ip_pim_jp_agg_cmd); install_element(VIEW_NODE, &show_ip_pim_local_membership_cmd); + install_element(VIEW_NODE, &show_ip_pim_mlag_summary_cmd); + install_element(VIEW_NODE, &show_ip_pim_mlag_up_cmd); + install_element(VIEW_NODE, &show_ip_pim_mlag_up_vrf_all_cmd); install_element(VIEW_NODE, &show_ip_pim_neighbor_cmd); install_element(VIEW_NODE, &show_ip_pim_neighbor_vrf_all_cmd); install_element(VIEW_NODE, &show_ip_pim_rpf_cmd); @@ -10595,6 +10971,8 @@ void pim_cmd_init(void) install_element(CONFIG_NODE, &no_debug_ssmpingd_cmd); install_element(CONFIG_NODE, &debug_pim_zebra_cmd); install_element(CONFIG_NODE, &no_debug_pim_zebra_cmd); + install_element(CONFIG_NODE, &debug_pim_mlag_cmd); + install_element(CONFIG_NODE, &no_debug_pim_mlag_cmd); install_element(CONFIG_NODE, &debug_pim_vxlan_cmd); install_element(CONFIG_NODE, &no_debug_pim_vxlan_cmd); install_element(CONFIG_NODE, &debug_msdp_cmd); diff --git a/pimd/pim_iface.c b/pimd/pim_iface.c index c615540149..8cc720c535 100644 --- a/pimd/pim_iface.c +++ b/pimd/pim_iface.c @@ -498,6 +498,7 @@ void pim_if_addr_add(struct connected *ifc) struct pim_interface *pim_ifp; struct interface *ifp; struct in_addr ifaddr; + bool vxlan_term; zassert(ifc); @@ -635,7 +636,8 @@ void pim_if_addr_add(struct connected *ifc) address assigned, then try to create a vif_index. */ if (pim_ifp->mroute_vif_index < 0) { - pim_if_add_vif(ifp, false, false /*vxlan_term*/); + vxlan_term = pim_vxlan_is_term_dev_cfg(pim_ifp->pim, ifp); + pim_if_add_vif(ifp, false, vxlan_term); } pim_ifchannel_scan_forward_start(ifp); } @@ -730,6 +732,7 @@ void pim_if_addr_add_all(struct interface *ifp) int v4_addrs = 0; int v6_addrs = 0; struct pim_interface *pim_ifp = ifp->info; + bool vxlan_term; /* PIM/IGMP enabled ? */ @@ -768,7 +771,8 @@ void pim_if_addr_add_all(struct interface *ifp) * address assigned, then try to create a vif_index. */ if (pim_ifp->mroute_vif_index < 0) { - pim_if_add_vif(ifp, false, false /*vxlan_term*/); + vxlan_term = pim_vxlan_is_term_dev_cfg(pim_ifp->pim, ifp); + pim_if_add_vif(ifp, false, vxlan_term); } pim_ifchannel_scan_forward_start(ifp); diff --git a/pimd/pim_ifchannel.c b/pimd/pim_ifchannel.c index 22d6e6298e..2ea1f4e9a4 100644 --- a/pimd/pim_ifchannel.c +++ b/pimd/pim_ifchannel.c @@ -854,8 +854,9 @@ void pim_ifchannel_join_add(struct interface *ifp, struct in_addr neigh_addr, /* * If we are going to be a LHR, we need to note it */ - if (ch->upstream->parent && (ch->upstream->parent->flags - & PIM_UPSTREAM_FLAG_MASK_SRC_IGMP) + if (ch->upstream->parent && + (PIM_UPSTREAM_FLAG_TEST_CAN_BE_LHR( + ch->upstream->parent->flags)) && !(ch->upstream->flags & PIM_UPSTREAM_FLAG_MASK_SRC_LHR)) { pim_upstream_ref(ch->upstream, @@ -1042,11 +1043,12 @@ void pim_ifchannel_prune(struct interface *ifp, struct in_addr upstream, } int pim_ifchannel_local_membership_add(struct interface *ifp, - struct prefix_sg *sg) + struct prefix_sg *sg, bool is_vxlan) { struct pim_ifchannel *ch, *starch; struct pim_interface *pim_ifp; struct pim_instance *pim; + int up_flags; /* PIM enabled on interface? */ pim_ifp = ifp->info; @@ -1080,7 +1082,9 @@ int pim_ifchannel_local_membership_add(struct interface *ifp, } } - ch = pim_ifchannel_add(ifp, sg, 0, PIM_UPSTREAM_FLAG_MASK_SRC_IGMP); + up_flags = is_vxlan ? PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM : + PIM_UPSTREAM_FLAG_MASK_SRC_IGMP; + ch = pim_ifchannel_add(ifp, sg, 0, up_flags); ifmembership_set(ch, PIM_IFMEMBERSHIP_INCLUDE); diff --git a/pimd/pim_ifchannel.h b/pimd/pim_ifchannel.h index b36c3236b0..3d5cbd8ecf 100644 --- a/pimd/pim_ifchannel.h +++ b/pimd/pim_ifchannel.h @@ -130,7 +130,7 @@ void pim_ifchannel_prune(struct interface *ifp, struct in_addr upstream, struct prefix_sg *sg, uint8_t source_flags, uint16_t holdtime); int pim_ifchannel_local_membership_add(struct interface *ifp, - struct prefix_sg *sg); + struct prefix_sg *sg, bool is_vxlan); void pim_ifchannel_local_membership_del(struct interface *ifp, struct prefix_sg *sg); diff --git a/pimd/pim_instance.h b/pimd/pim_instance.h index da0c75decb..7b1fd2e172 100644 --- a/pimd/pim_instance.h +++ b/pimd/pim_instance.h @@ -48,6 +48,46 @@ enum pim_spt_switchover { PIM_SPT_INFINITY, }; +/* stats for updates rxed from the MLAG component during the life of a + * session + */ +struct pim_mlag_msg_stats { + uint32_t mroute_add_rx; + uint32_t mroute_add_tx; + uint32_t mroute_del_rx; + uint32_t mroute_del_tx; + uint32_t mlag_status_updates; + uint32_t pim_status_updates; + uint32_t vxlan_updates; + uint32_t peer_zebra_status_updates; +}; + +struct pim_mlag_stats { + /* message stats are reset when the connection to mlagd flaps */ + struct pim_mlag_msg_stats msg; + uint32_t mlagd_session_downs; + uint32_t peer_session_downs; + uint32_t peer_zebra_downs; +}; + +enum pim_mlag_flags { + PIM_MLAGF_NONE = 0, + /* connection to the local MLAG daemon is up */ + PIM_MLAGF_LOCAL_CONN_UP = (1 << 0), + /* connection to the MLAG daemon on the peer switch is up. note + * that there is no direct connection between FRR and the peer MLAG + * daemon. this is just a peer-session status provided by the local + * MLAG daemon. + */ + PIM_MLAGF_PEER_CONN_UP = (1 << 1), + /* status update rxed from the local daemon */ + PIM_MLAGF_STATUS_RXED = (1 << 2), + /* initial dump of data done post peerlink flap */ + PIM_MLAGF_PEER_REPLAY_DONE = (1 << 3), + /* zebra is up on the peer */ + PIM_MLAGF_PEER_ZEBRA_UP = (1 << 4) +}; + struct pim_router { struct thread_master *master; @@ -65,7 +105,7 @@ struct pim_router { */ vrf_id_t vrf_id; - enum mlag_role role; + enum mlag_role mlag_role; uint32_t pim_mlag_intf_cnt; /* if true we have registered with MLAG */ bool mlag_process_register; @@ -77,6 +117,12 @@ struct pim_router { struct stream_fifo *mlag_fifo; struct stream *mlag_stream; struct thread *zpthread_mlag_write; + struct in_addr anycast_vtep_ip; + struct in_addr local_vtep_ip; + struct pim_mlag_stats mlag_stats; + enum pim_mlag_flags mlag_flags; + char peerlink_rif[INTERFACE_NAMSIZ]; + struct interface *peerlink_rif_p; }; /* Per VRF PIM DB */ diff --git a/pimd/pim_mlag.c b/pimd/pim_mlag.c index f60c18204b..1c2f7c563d 100644 --- a/pimd/pim_mlag.c +++ b/pimd/pim_mlag.c @@ -25,14 +25,462 @@ #include "pimd.h" #include "pim_mlag.h" +#include "pim_upstream.h" +#include "pim_vxlan.h" extern struct zclient *zclient; +#define PIM_MLAG_METADATA_LEN 4 + +/******************************* pim upstream sync **************************/ +/* Update DF role for the upstream entry and return true on role change */ +bool pim_mlag_up_df_role_update(struct pim_instance *pim, + struct pim_upstream *up, bool is_df, const char *reason) +{ + struct channel_oil *c_oil = up->channel_oil; + bool old_is_df = !PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags); + struct pim_interface *vxlan_ifp; + + if (is_df == old_is_df) { + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: Ignoring Role update for %s, since no change", + __func__, up->sg_str); + return false; + } + + if (PIM_DEBUG_MLAG) + zlog_debug("local MLAG mroute %s role changed to %s based on %s", + up->sg_str, is_df ? "df" : "non-df", reason); + + if (is_df) + PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(up->flags); + else + PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags); + + + /* If the DF role has changed check if ipmr-lo needs to be + * muted/un-muted. Active-Active devices and vxlan termination + * devices (ipmr-lo) are suppressed on the non-DF. + * This may leave the mroute with the empty OIL in which case the + * the forwarding entry's sole purpose is to just blackhole the flow + * headed to the switch. + */ + if (c_oil) { + vxlan_ifp = pim_vxlan_get_term_ifp(pim); + if (vxlan_ifp) + pim_channel_update_oif_mute(c_oil, vxlan_ifp); + } + + /* If DF role changed on a (*,G) termination mroute update the + * associated DF role on the inherited (S,G) entries + */ + if ((up->sg.src.s_addr == INADDR_ANY) && + PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags)) + pim_vxlan_inherit_mlag_flags(pim, up, true /* inherit */); + + return true; +} + +/* Run per-upstream entry DF election and return true on role change */ +static bool pim_mlag_up_df_role_elect(struct pim_instance *pim, + struct pim_upstream *up) +{ + bool is_df; + uint32_t peer_cost; + uint32_t local_cost; + bool rv; + + if (!pim_up_mlag_is_local(up)) + return false; + + /* We are yet to rx a status update from the local MLAG daemon so + * we will assume DF status. + */ + if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) + return pim_mlag_up_df_role_update(pim, up, + true /*is_df*/, "mlagd-down"); + + /* If not connected to peer assume DF role on the MLAG primary + * switch (and non-DF on the secondary switch. + */ + if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) { + is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false; + return pim_mlag_up_df_role_update(pim, up, + is_df, "peer-down"); + } + + /* If MLAG peer session is up but zebra is down on the peer + * assume DF role. + */ + if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) + return pim_mlag_up_df_role_update(pim, up, + true /*is_df*/, "zebra-down"); + + /* If we are connected to peer switch but don't have a mroute + * from it we have to assume non-DF role to avoid duplicates. + * Note: When the peer connection comes up we wait for initial + * replay to complete before moving "strays" i.e. local-mlag-mroutes + * without a peer reference to non-df role. + */ + if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) + return pim_mlag_up_df_role_update(pim, up, + false /*is_df*/, "no-peer-mroute"); + + /* switch with the lowest RPF cost wins. if both switches have the same + * cost MLAG role is used as a tie breaker (MLAG primary wins). + */ + peer_cost = up->mlag.peer_mrib_metric; + local_cost = pim_up_mlag_local_cost(up); + if (local_cost == peer_cost) { + is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false; + rv = pim_mlag_up_df_role_update(pim, up, is_df, "equal-cost"); + } else { + is_df = (local_cost < peer_cost) ? true : false; + rv = pim_mlag_up_df_role_update(pim, up, is_df, "cost"); + } + + return rv; +} + +/* Handle upstream entry add from the peer MLAG switch - + * - if a local entry doesn't exist one is created with reference + * _MLAG_PEER + * - if a local entry exists and has a MLAG OIF DF election is run. + * the non-DF switch stop forwarding traffic to MLAG devices. + */ +static void pim_mlag_up_peer_add(struct mlag_mroute_add *msg) +{ + struct pim_upstream *up; + struct pim_instance *pim; + int flags = 0; + struct prefix_sg sg; + struct vrf *vrf; + char sg_str[PIM_SG_LEN]; + + memset(&sg, 0, sizeof(struct prefix_sg)); + sg.src.s_addr = htonl(msg->source_ip); + sg.grp.s_addr = htonl(msg->group_ip); + if (PIM_DEBUG_MLAG) + pim_str_sg_set(&sg, sg_str); + + if (PIM_DEBUG_MLAG) + zlog_debug("peer MLAG mroute add %s:%s cost %d", + msg->vrf_name, sg_str, msg->cost_to_rp); + + /* XXX - this is not correct. we MUST cache updates to avoid losing + * an entry because of race conditions with the peer switch. + */ + vrf = vrf_lookup_by_name(msg->vrf_name); + if (!vrf) { + if (PIM_DEBUG_MLAG) + zlog_debug("peer MLAG mroute add failed %s:%s; no vrf", + msg->vrf_name, sg_str); + return; + } + pim = vrf->info; + + up = pim_upstream_find(pim, &sg); + if (up) { + /* upstream already exists; create peer reference if it + * doesn't already exist. + */ + if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) + pim_upstream_ref(up, + PIM_UPSTREAM_FLAG_MASK_MLAG_PEER, + __PRETTY_FUNCTION__); + } else { + PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags); + up = pim_upstream_add(pim, &sg, NULL /*iif*/, flags, + __PRETTY_FUNCTION__, NULL /*if_ch*/); + + if (!up) { + if (PIM_DEBUG_MLAG) + zlog_debug("peer MLAG mroute add failed %s:%s", + vrf->name, sg_str); + return; + } + } + up->mlag.peer_mrib_metric = msg->cost_to_rp; + pim_mlag_up_df_role_elect(pim, up); +} + +/* Handle upstream entry del from the peer MLAG switch - + * - peer reference is removed. this can result in the upstream + * being deleted altogether. + * - if a local entry continues to exisy and has a MLAG OIF DF election + * is re-run (at the end of which the local entry will be the DF). + */ +static void pim_mlag_up_peer_deref(struct pim_instance *pim, + struct pim_upstream *up) +{ + if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) + return; + + PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(up->flags); + up = pim_upstream_del(pim, up, __PRETTY_FUNCTION__); + if (up) + pim_mlag_up_df_role_elect(pim, up); +} +static void pim_mlag_up_peer_del(struct mlag_mroute_del *msg) +{ + struct pim_upstream *up; + struct pim_instance *pim; + struct prefix_sg sg; + struct vrf *vrf; + char sg_str[PIM_SG_LEN]; + + memset(&sg, 0, sizeof(struct prefix_sg)); + sg.src.s_addr = htonl(msg->source_ip); + sg.grp.s_addr = htonl(msg->group_ip); + if (PIM_DEBUG_MLAG) + pim_str_sg_set(&sg, sg_str); + + if (PIM_DEBUG_MLAG) + zlog_debug("peer MLAG mroute del %s:%s", msg->vrf_name, + sg_str); + + vrf = vrf_lookup_by_name(msg->vrf_name); + if (!vrf) { + if (PIM_DEBUG_MLAG) + zlog_debug("peer MLAG mroute del skipped %s:%s; no vrf", + msg->vrf_name, sg_str); + return; + } + pim = vrf->info; + + up = pim_upstream_find(pim, &sg); + if (!up) { + if (PIM_DEBUG_MLAG) + zlog_debug("peer MLAG mroute del skipped %s:%s; no up", + vrf->name, sg_str); + return; + } + + pim_mlag_up_peer_deref(pim, up); +} + +/* When we lose connection to the local MLAG daemon we can drop all peer + * references. + */ +static void pim_mlag_up_peer_del_all(void) +{ + struct list *temp = list_new(); + struct pim_upstream *up; + struct vrf *vrf; + struct pim_instance *pim; + + /* + * So why these gyrations? + * pim->upstream_head has the list of *,G and S,G + * that are in the system. The problem of course + * is that it is an ordered list: + * (*,G1) -> (S1,G1) -> (S2,G2) -> (S3, G2) -> (*,G2) -> (S1,G2) + * And the *,G1 has pointers to S1,G1 and S2,G1 + * if we delete *,G1 then we have a situation where + * S1,G1 and S2,G2 can be deleted as well. Then a + * simple ALL_LIST_ELEMENTS will have the next listnode + * pointer become invalid and we crash. + * So let's grab the list of MLAG_PEER upstreams + * add a refcount put on another list and delete safely + */ + RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { + pim = vrf->info; + frr_each (rb_pim_upstream, &pim->upstream_head, up) { + if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags)) + continue; + listnode_add(temp, up); + /* + * Add a reference since we are adding to this + * list for deletion + */ + up->ref_count++; + } + + while (temp->count) { + up = listnode_head(temp); + listnode_delete(temp, up); + + pim_mlag_up_peer_deref(pim, up); + /* + * This is the deletion of the reference added + * above + */ + pim_upstream_del(pim, up, __PRETTY_FUNCTION__); + } + } + + list_delete(&temp); +} + +static int pim_mlag_signal_zpthread(void) +{ + /* XXX - This is a temporary stub; the MLAG thread code is planned for + * a separate commit + */ + return (0); +} + +/* Send upstream entry to the local MLAG daemon (which will subsequently + * send it to the peer MLAG switch). + */ +static void pim_mlag_up_local_add_send(struct pim_instance *pim, + struct pim_upstream *up) +{ + struct stream *s = NULL; + struct vrf *vrf = pim->vrf; + + if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) + return; + + s = stream_new(sizeof(struct mlag_mroute_add) + PIM_MLAG_METADATA_LEN); + if (!s) + return; + + if (PIM_DEBUG_MLAG) + zlog_debug("local MLAG mroute add %s:%s", + vrf->name, up->sg_str); + + ++router->mlag_stats.msg.mroute_add_tx; + + stream_putl(s, MLAG_MROUTE_ADD); + stream_put(s, vrf->name, VRF_NAMSIZ); + stream_putl(s, ntohl(up->sg.src.s_addr)); + stream_putl(s, ntohl(up->sg.grp.s_addr)); + + stream_putl(s, pim_up_mlag_local_cost(up)); + /* XXX - who is addding*/ + stream_putl(s, MLAG_OWNER_VXLAN); + /* XXX - am_i_DR field should be removed */ + stream_putc(s, false); + stream_putc(s, !(PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags))); + stream_putl(s, vrf->vrf_id); + /* XXX - this field is a No-op for VXLAN*/ + stream_put(s, NULL, INTERFACE_NAMSIZ); + + stream_fifo_push_safe(router->mlag_fifo, s); + pim_mlag_signal_zpthread(); +} + +static void pim_mlag_up_local_del_send(struct pim_instance *pim, + struct pim_upstream *up) +{ + struct stream *s = NULL; + struct vrf *vrf = pim->vrf; + + if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) + return; + + s = stream_new(sizeof(struct mlag_mroute_del) + PIM_MLAG_METADATA_LEN); + if (!s) + return; + + if (PIM_DEBUG_MLAG) + zlog_debug("local MLAG mroute del %s:%s", + vrf->name, up->sg_str); + + ++router->mlag_stats.msg.mroute_del_tx; + + stream_putl(s, MLAG_MROUTE_DEL); + stream_put(s, vrf->name, VRF_NAMSIZ); + stream_putl(s, ntohl(up->sg.src.s_addr)); + stream_putl(s, ntohl(up->sg.grp.s_addr)); + /* XXX - who is adding */ + stream_putl(s, MLAG_OWNER_VXLAN); + stream_putl(s, vrf->vrf_id); + /* XXX - this field is a No-op for VXLAN */ + stream_put(s, NULL, INTERFACE_NAMSIZ); + + /* XXX - is this the the most optimal way to do things */ + stream_fifo_push_safe(router->mlag_fifo, s); + pim_mlag_signal_zpthread(); +} + + +/* Called when a local upstream entry is created or if it's cost changes */ +void pim_mlag_up_local_add(struct pim_instance *pim, + struct pim_upstream *up) +{ + pim_mlag_up_df_role_elect(pim, up); + /* XXX - need to add some dup checks here */ + pim_mlag_up_local_add_send(pim, up); +} + +/* Called when local MLAG reference is removed from an upstream entry */ +void pim_mlag_up_local_del(struct pim_instance *pim, + struct pim_upstream *up) +{ + pim_mlag_up_df_role_elect(pim, up); + pim_mlag_up_local_del_send(pim, up); +} + +/* When connection to local MLAG daemon is established all the local + * MLAG upstream entries are replayed to it. + */ +static void pim_mlag_up_local_replay(void) +{ + struct pim_upstream *up; + struct vrf *vrf; + struct pim_instance *pim; + + RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { + pim = vrf->info; + frr_each (rb_pim_upstream, &pim->upstream_head, up) { + if (pim_up_mlag_is_local(up)) + pim_mlag_up_local_add_send(pim, up); + } + } +} + +/* on local/peer mlag connection and role changes the DF status needs + * to be re-evaluated + */ +static void pim_mlag_up_local_reeval(bool mlagd_send, const char *reason_code) +{ + struct pim_upstream *up; + struct vrf *vrf; + struct pim_instance *pim; + + if (PIM_DEBUG_MLAG) + zlog_debug("%s re-run DF election because of %s", + __func__, reason_code); + RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { + pim = vrf->info; + frr_each (rb_pim_upstream, &pim->upstream_head, up) { + if (!pim_up_mlag_is_local(up)) + continue; + /* if role changes re-send to peer */ + if (pim_mlag_up_df_role_elect(pim, up) && + mlagd_send) + pim_mlag_up_local_add_send(pim, up); + } + } +} + +/*****************PIM Actions for MLAG state changes**********************/ + +/* notify the anycast VTEP component about state changes */ +static inline void pim_mlag_vxlan_state_update(void) +{ + bool enable = !!(router->mlag_flags & PIM_MLAGF_STATUS_RXED); + bool peer_state = !!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP); + + pim_vxlan_mlag_update(enable, peer_state, router->mlag_role, + router->peerlink_rif_p, &router->local_vtep_ip); + +} + +/**************End of PIM Actions for MLAG State changes******************/ + /********************API to process PIM MLAG Data ************************/ static void pim_mlag_process_mlagd_state_change(struct mlag_status msg) { + bool role_chg = false; + bool state_chg = false; + bool notify_vxlan = false; + struct interface *peerlink_rif_p; char buf[MLAG_ROLE_STRSIZE]; if (PIM_DEBUG_MLAG) @@ -41,6 +489,84 @@ static void pim_mlag_process_mlagd_state_change(struct mlag_status msg) mlag_role2str(msg.my_role, buf, sizeof(buf)), (msg.peer_state == MLAG_STATE_RUNNING ? "RUNNING" : "DOWN")); + + if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { + if (PIM_DEBUG_MLAG) + zlog_debug("%s: msg ignored mlagd process state down", + __func__); + return; + } + ++router->mlag_stats.msg.mlag_status_updates; + + /* evaluate the changes first */ + if (router->mlag_role != msg.my_role) { + role_chg = true; + notify_vxlan = true; + router->mlag_role = msg.my_role; + } + + strcpy(router->peerlink_rif, msg.peerlink_rif); + /* XXX - handle the case where we may rx the interface name from the + * MLAG daemon before we get the interface from zebra. + */ + peerlink_rif_p = if_lookup_by_name(router->peerlink_rif, VRF_DEFAULT); + if (router->peerlink_rif_p != peerlink_rif_p) { + router->peerlink_rif_p = peerlink_rif_p; + notify_vxlan = true; + } + + if (msg.peer_state == MLAG_STATE_RUNNING) { + if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) { + state_chg = true; + notify_vxlan = true; + router->mlag_flags |= PIM_MLAGF_PEER_CONN_UP; + } + router->connected_to_mlag = true; + } else { + if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) { + ++router->mlag_stats.peer_session_downs; + state_chg = true; + notify_vxlan = true; + router->mlag_flags &= ~PIM_MLAGF_PEER_CONN_UP; + } + router->connected_to_mlag = false; + } + + /* apply the changes */ + /* when connection to mlagd comes up we hold send mroutes till we have + * rxed the status and had a chance to re-valuate DF state + */ + if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) { + router->mlag_flags |= PIM_MLAGF_STATUS_RXED; + pim_mlag_vxlan_state_update(); + /* on session up re-eval DF status */ + pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_up"); + /* replay all the upstream entries to the local MLAG daemon */ + pim_mlag_up_local_replay(); + return; + } + + if (notify_vxlan) + pim_mlag_vxlan_state_update(); + + if (state_chg) { + if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) + /* when a connection goes down the primary takes over + * DF role for all entries + */ + pim_mlag_up_local_reeval(true /*mlagd_send*/, + "peer_down"); + else + /* XXX - when session comes up we need to wait for + * PEER_REPLAY_DONE before running re-election on + * local-mlag entries that are missing peer reference + */ + pim_mlag_up_local_reeval(true /*mlagd_send*/, + "peer_up"); + } else if (role_chg) { + /* MLAG role changed without a state change */ + pim_mlag_up_local_reeval(true /*mlagd_send*/, "role_chg"); + } } static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg) @@ -49,37 +575,116 @@ static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg) zlog_debug( "%s: msg dump: peer_frr_state: %s", __func__, (msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN")); + + if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { + if (PIM_DEBUG_MLAG) + zlog_debug("%s: msg ignored mlagd process state down", + __func__); + return; + } + ++router->mlag_stats.msg.peer_zebra_status_updates; + + /* evaluate the changes first */ + if (msg.frr_state == MLAG_FRR_STATE_UP) { + if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) { + router->mlag_flags |= PIM_MLAGF_PEER_ZEBRA_UP; + /* XXX - when peer zebra comes up we need to wait for + * for some time to let the peer setup MDTs before + * before relinquishing DF status + */ + pim_mlag_up_local_reeval(true /*mlagd_send*/, + "zebra_up"); + } + } else { + if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) { + ++router->mlag_stats.peer_zebra_downs; + router->mlag_flags &= ~PIM_MLAGF_PEER_ZEBRA_UP; + /* when a peer zebra goes down we assume DF role */ + pim_mlag_up_local_reeval(true /*mlagd_send*/, + "zebra_down"); + } + } } static void pim_mlag_process_vxlan_update(struct mlag_vxlan *msg) { + char addr_buf1[INET_ADDRSTRLEN]; + char addr_buf2[INET_ADDRSTRLEN]; + uint32_t local_ip; + + if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { + if (PIM_DEBUG_MLAG) + zlog_debug("%s: msg ignored mlagd process state down", + __func__); + return; + } + + ++router->mlag_stats.msg.vxlan_updates; + router->anycast_vtep_ip.s_addr = htonl(msg->anycast_ip); + local_ip = htonl(msg->local_ip); + if (router->local_vtep_ip.s_addr != local_ip) { + router->local_vtep_ip.s_addr = local_ip; + pim_mlag_vxlan_state_update(); + } + + if (PIM_DEBUG_MLAG) { + inet_ntop(AF_INET, &router->local_vtep_ip, + addr_buf1, INET_ADDRSTRLEN); + inet_ntop(AF_INET, &router->anycast_vtep_ip, + addr_buf2, INET_ADDRSTRLEN); + + zlog_debug("%s: msg dump: local-ip:%s, anycast-ip:%s", + __func__, addr_buf1, addr_buf2); + } } static void pim_mlag_process_mroute_add(struct mlag_mroute_add msg) { if (PIM_DEBUG_MLAG) { zlog_debug( - "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x cost: %u", - __func__, msg.vrf_name, msg.source_ip, msg.group_ip, - msg.cost_to_rp); + "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x cost: %u", + __func__, msg.vrf_name, msg.source_ip, + msg.group_ip, msg.cost_to_rp); zlog_debug( - "owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s", - msg.owner_id, msg.am_i_dr, msg.am_i_dual_active, - msg.vrf_id, msg.intf_name); + "owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s", + msg.owner_id, msg.am_i_dr, msg.am_i_dual_active, + msg.vrf_id, msg.intf_name); + } + + if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { + if (PIM_DEBUG_MLAG) + zlog_debug("%s: msg ignored mlagd process state down", + __func__); + return; } + + ++router->mlag_stats.msg.mroute_add_rx; + + pim_mlag_up_peer_add(&msg); } static void pim_mlag_process_mroute_del(struct mlag_mroute_del msg) { if (PIM_DEBUG_MLAG) { zlog_debug( - "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x ", - __func__, msg.vrf_name, msg.source_ip, msg.group_ip); + "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x ", + __func__, msg.vrf_name, msg.source_ip, + msg.group_ip); zlog_debug("owner_id: %d, vrf_id: 0x%x intf_name: %s", - msg.owner_id, msg.vrf_id, msg.intf_name); + msg.owner_id, msg.vrf_id, msg.intf_name); } -} + if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) { + if (PIM_DEBUG_MLAG) + zlog_debug("%s: msg ignored mlagd process state down", + __func__); + return; + } + + ++router->mlag_stats.msg.mroute_del_rx; + + pim_mlag_up_peer_del(&msg); +} int pim_zebra_mlag_handle_msg(struct stream *s, int len) { @@ -179,11 +784,40 @@ int pim_zebra_mlag_process_up(void) return 0; } +static void pim_mlag_param_reset(void) +{ + /* reset the cached params and stats */ + router->mlag_flags &= ~(PIM_MLAGF_STATUS_RXED | + PIM_MLAGF_LOCAL_CONN_UP | + PIM_MLAGF_PEER_CONN_UP | + PIM_MLAGF_PEER_ZEBRA_UP); + router->local_vtep_ip.s_addr = INADDR_ANY; + router->anycast_vtep_ip.s_addr = INADDR_ANY; + router->mlag_role = MLAG_ROLE_NONE; + memset(&router->mlag_stats.msg, 0, sizeof(router->mlag_stats.msg)); + router->peerlink_rif[0] = '\0'; +} + int pim_zebra_mlag_process_down(void) { if (PIM_DEBUG_MLAG) zlog_debug("%s: Received Process-Down from Mlag", __func__); + /* Local CLAG is down, reset peer data and forward the traffic if + * we are DR + */ + if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) + ++router->mlag_stats.peer_session_downs; + if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) + ++router->mlag_stats.peer_zebra_downs; + router->connected_to_mlag = false; + pim_mlag_param_reset(); + /* on mlagd session down re-eval DF status */ + pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_down"); + /* flush all peer references */ + pim_mlag_up_peer_del_all(); + /* notify the vxlan component */ + pim_mlag_vxlan_state_update(); return 0; } @@ -339,6 +973,7 @@ void pim_instance_mlag_terminate(struct pim_instance *pim) void pim_mlag_init(void) { + pim_mlag_param_reset(); router->pim_mlag_intf_cnt = 0; router->connected_to_mlag = false; router->mlag_fifo = stream_fifo_new(); diff --git a/pimd/pim_mlag.h b/pimd/pim_mlag.h index e86fdae78f..dab29cc9a2 100644 --- a/pimd/pim_mlag.h +++ b/pimd/pim_mlag.h @@ -37,4 +37,10 @@ extern void pim_mlag_deregister(void); extern int pim_zebra_mlag_process_up(void); extern int pim_zebra_mlag_process_down(void); extern int pim_zebra_mlag_handle_msg(struct stream *msg, int len); +extern void pim_mlag_up_local_add(struct pim_instance *pim, + struct pim_upstream *upstream); +extern void pim_mlag_up_local_del(struct pim_instance *pim, + struct pim_upstream *upstream); +extern bool pim_mlag_up_df_role_update(struct pim_instance *pim, + struct pim_upstream *up, bool is_df, const char *reason); #endif diff --git a/pimd/pim_mroute.c b/pimd/pim_mroute.c index 3459abbc19..4afd05ab76 100644 --- a/pimd/pim_mroute.c +++ b/pimd/pim_mroute.c @@ -262,7 +262,7 @@ static int pim_mroute_msg_wholepkt(int fd, struct interface *ifp, up = pim_upstream_find(pim_ifp->pim, &star); - if (up && PIM_UPSTREAM_FLAG_TEST_SRC_IGMP(up->flags)) { + if (up && PIM_UPSTREAM_FLAG_TEST_CAN_BE_LHR(up->flags)) { up = pim_upstream_add(pim_ifp->pim, &sg, ifp, PIM_UPSTREAM_FLAG_MASK_SRC_LHR, __PRETTY_FUNCTION__, NULL); diff --git a/pimd/pim_nht.c b/pimd/pim_nht.c index 5cb9492ec3..9efeeaee27 100644 --- a/pimd/pim_nht.c +++ b/pimd/pim_nht.c @@ -561,6 +561,13 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, "%s: current nexthop does not have nbr ", __PRETTY_FUNCTION__); } else { + /* update metric even if the upstream + * neighbor stays unchanged + */ + nexthop->mrib_metric_preference = + pnc->distance; + nexthop->mrib_route_metric = + pnc->metric; if (PIM_DEBUG_PIM_NHT) { char src_str[INET_ADDRSTRLEN]; pim_inet4_dump("<addr?>", diff --git a/pimd/pim_rpf.c b/pimd/pim_rpf.c index 24519adb1e..b27374e302 100644 --- a/pimd/pim_rpf.c +++ b/pimd/pim_rpf.c @@ -36,6 +36,7 @@ #include "pim_time.h" #include "pim_nht.h" #include "pim_oil.h" +#include "pim_mlag.h" static struct in_addr pim_rpf_find_rpf_addr(struct pim_upstream *up); @@ -194,6 +195,32 @@ static int nexthop_mismatch(const struct pim_nexthop *nh1, || (nh1->mrib_route_metric != nh2->mrib_route_metric); } +static void pim_rpf_cost_change(struct pim_instance *pim, + struct pim_upstream *up, uint32_t old_cost) +{ + struct pim_rpf *rpf = &up->rpf; + uint32_t new_cost; + + new_cost = pim_up_mlag_local_cost(up); + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: Cost_to_rp of upstream-%s changed to:%u, from:%u", + __func__, up->sg_str, new_cost, old_cost); + + if (old_cost == new_cost) + return; + + /* Cost changed, it might Impact MLAG DF election, update */ + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: Cost_to_rp of upstream-%s changed to:%u", + __func__, up->sg_str, + rpf->source_nexthop.mrib_route_metric); + + if (pim_up_mlag_is_local(up)) + pim_mlag_up_local_add(pim, up); +} + enum pim_rpf_result pim_rpf_update(struct pim_instance *pim, struct pim_upstream *up, struct pim_rpf *old, const char *caller) @@ -203,6 +230,7 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim, struct prefix nht_p; struct prefix src, grp; bool neigh_needed = true; + uint32_t saved_mrib_route_metric; if (PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) return PIM_RPF_OK; @@ -215,6 +243,7 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim, saved.source_nexthop = rpf->source_nexthop; saved.rpf_addr = rpf->rpf_addr; + saved_mrib_route_metric = pim_up_mlag_local_cost(up); if (old) { old->source_nexthop = saved.source_nexthop; old->rpf_addr = saved.rpf_addr; @@ -236,8 +265,12 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim, neigh_needed = false; pim_find_or_track_nexthop(pim, &nht_p, up, NULL, false, NULL); if (!pim_ecmp_nexthop_lookup(pim, &rpf->source_nexthop, &src, &grp, - neigh_needed)) + neigh_needed)) { + /* Route is Deleted in Zebra, reset the stored NH data */ + pim_upstream_rpf_clear(pim, up); + pim_rpf_cost_change(pim, up, saved_mrib_route_metric); return PIM_RPF_FAILURE; + } rpf->rpf_addr.family = AF_INET; rpf->rpf_addr.u.prefix4 = pim_rpf_find_rpf_addr(up); @@ -290,10 +323,18 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim, if (saved.rpf_addr.u.prefix4.s_addr != rpf->rpf_addr.u.prefix4.s_addr || saved.source_nexthop .interface != rpf->source_nexthop.interface) { - + pim_rpf_cost_change(pim, up, saved_mrib_route_metric); return PIM_RPF_CHANGED; } + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s(%s): Cost_to_rp of upstream-%s changed to:%u", + __func__, caller, up->sg_str, + rpf->source_nexthop.mrib_route_metric); + + pim_rpf_cost_change(pim, up, saved_mrib_route_metric); + return PIM_RPF_OK; } diff --git a/pimd/pim_upstream.c b/pimd/pim_upstream.c index c899e403c8..444ab938f2 100644 --- a/pimd/pim_upstream.c +++ b/pimd/pim_upstream.c @@ -52,6 +52,7 @@ #include "pim_nht.h" #include "pim_ssm.h" #include "pim_vxlan.h" +#include "pim_mlag.h" static void join_timer_stop(struct pim_upstream *up); static void @@ -193,6 +194,9 @@ struct pim_upstream *pim_upstream_del(struct pim_instance *pim, zlog_debug("pim_upstream free vrf:%s %s flags 0x%x", pim->vrf->name, up->sg_str, up->flags); + if (pim_up_mlag_is_local(up)) + pim_mlag_up_local_del(pim, up); + THREAD_OFF(up->t_ka_timer); THREAD_OFF(up->t_rs_timer); THREAD_OFF(up->t_msdp_reg_timer); @@ -883,6 +887,26 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, } } + /* If (S, G) inherit the MLAG_VXLAN from the parent + * (*, G) entry. + */ + if ((up->sg.src.s_addr != INADDR_ANY) && + up->parent && + PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->parent->flags) && + !PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN_ORIG(up->flags)) { + PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(up->flags); + if (PIM_DEBUG_VXLAN) + zlog_debug("upstream %s inherited mlag vxlan flag from parent", + up->sg_str); + } + + /* send the entry to the MLAG peer */ + /* XXX - duplicate send is possible here if pim_rpf_update + * successfully resolved the nexthop + */ + if (pim_up_mlag_is_local(up)) + pim_mlag_up_local_add(pim, up); + if (PIM_DEBUG_PIM_TRACE) { zlog_debug( "%s: Created Upstream %s upstream_addr %s ref count %d increment", @@ -893,6 +917,30 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, return up; } +uint32_t pim_up_mlag_local_cost(struct pim_upstream *up) +{ + if (!(pim_up_mlag_is_local(up))) + return router->infinite_assert_metric.route_metric; + + if ((up->rpf.source_nexthop.interface == + up->pim->vxlan.peerlink_rif) && + (up->rpf.source_nexthop.mrib_route_metric < + (router->infinite_assert_metric.route_metric - + PIM_UPSTREAM_MLAG_PEERLINK_PLUS_METRIC))) + return up->rpf.source_nexthop.mrib_route_metric + + PIM_UPSTREAM_MLAG_PEERLINK_PLUS_METRIC; + + return up->rpf.source_nexthop.mrib_route_metric; +} + +uint32_t pim_up_mlag_peer_cost(struct pim_upstream *up) +{ + if (!(up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)) + return router->infinite_assert_metric.route_metric; + + return up->mlag.peer_mrib_metric; +} + struct pim_upstream *pim_upstream_find(struct pim_instance *pim, struct prefix_sg *sg) { @@ -916,6 +964,15 @@ struct pim_upstream *pim_upstream_find_or_add(struct prefix_sg *sg, void pim_upstream_ref(struct pim_upstream *up, int flags, const char *name) { + /* if a local MLAG reference is being created we need to send the mroute + * to the peer + */ + if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags) && + PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(flags)) { + PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(up->flags); + pim_mlag_up_local_add(up->pim, up); + } + /* when we go from non-FHR to FHR we need to re-eval traffic * forwarding path */ @@ -1950,8 +2007,9 @@ static void pim_upstream_sg_running(void *arg) "source reference created on kat restart %s[%s]", up->sg_str, pim->vrf->name); - pim_upstream_ref(up, PIM_UPSTREAM_FLAG_MASK_SRC_STREAM, - __PRETTY_FUNCTION__); + pim_upstream_ref(up, + PIM_UPSTREAM_FLAG_MASK_SRC_STREAM, + __PRETTY_FUNCTION__); PIM_UPSTREAM_FLAG_SET_SRC_STREAM(up->flags); pim_upstream_fhr_kat_start(up); } @@ -1974,7 +2032,7 @@ void pim_upstream_add_lhr_star_pimreg(struct pim_instance *pim) if (up->sg.src.s_addr != INADDR_ANY) continue; - if (!PIM_UPSTREAM_FLAG_TEST_SRC_IGMP(up->flags)) + if (!PIM_UPSTREAM_FLAG_TEST_CAN_BE_LHR(up->flags)) continue; pim_channel_add_oif(up->channel_oil, pim->regiface, @@ -2021,7 +2079,7 @@ void pim_upstream_remove_lhr_star_pimreg(struct pim_instance *pim, if (up->sg.src.s_addr != INADDR_ANY) continue; - if (!PIM_UPSTREAM_FLAG_TEST_SRC_IGMP(up->flags)) + if (!PIM_UPSTREAM_FLAG_TEST_CAN_BE_LHR(up->flags)) continue; if (!nlist) { diff --git a/pimd/pim_upstream.h b/pimd/pim_upstream.h index 1eb2052bb3..c717c467dc 100644 --- a/pimd/pim_upstream.h +++ b/pimd/pim_upstream.h @@ -74,6 +74,8 @@ * blackholing the traffic pulled down to the LHR. */ #define PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF (1 << 17) +/* MLAG mroute rxed from the peer MLAG switch */ +#define PIM_UPSTREAM_FLAG_MASK_MLAG_PEER (1 << 18) /* * We are creating a non-joined upstream data structure * for this S,G as that we want to have a channel oil @@ -86,6 +88,12 @@ * This flag is only relevant for (S,G) entries. */ #define PIM_UPSTREAM_FLAG_MASK_USE_RPT (1 << 20) +/* PIM Syncs upstream entries to peer Nodes via MLAG in 2 cases. + * one is to support plain PIM Redundancy and another one is to support + * PIM REdundancy. + */ +#define PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE (1 << 21) + #define PIM_UPSTREAM_FLAG_ALL 0xFFFFFFFF @@ -108,8 +116,11 @@ #define PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN(flags) ((flags) & (PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG | PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM)) #define PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN) #define PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF) +#define PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_PEER) #define PIM_UPSTREAM_FLAG_TEST_SRC_NOCACHE(flags) ((flags) &PIM_UPSTREAM_FLAG_MASK_SRC_NOCACHE) #define PIM_UPSTREAM_FLAG_TEST_USE_RPT(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_USE_RPT) +#define PIM_UPSTREAM_FLAG_TEST_CAN_BE_LHR(flags) ((flags) & (PIM_UPSTREAM_FLAG_MASK_SRC_IGMP | PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM)) +#define PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(flags) ((flags)&PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE) #define PIM_UPSTREAM_FLAG_SET_DR_JOIN_DESIRED(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED) #define PIM_UPSTREAM_FLAG_SET_DR_JOIN_DESIRED_UPDATED(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED_UPDATED) @@ -129,7 +140,9 @@ #define PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) #define PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN) #define PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF) +#define PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_PEER) #define PIM_UPSTREAM_FLAG_SET_USE_RPT(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_USE_RPT) +#define PIM_UPSTREAM_FLAG_SET_MLAG_INTERFACE(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE) #define PIM_UPSTREAM_FLAG_UNSET_DR_JOIN_DESIRED(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED) #define PIM_UPSTREAM_FLAG_UNSET_DR_JOIN_DESIRED_UPDATED(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED_UPDATED) @@ -149,8 +162,16 @@ #define PIM_UPSTREAM_FLAG_UNSET_SRC_VXLAN_TERM(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) #define PIM_UPSTREAM_FLAG_UNSET_MLAG_VXLAN(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN) #define PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF) +#define PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_PEER) #define PIM_UPSTREAM_FLAG_UNSET_SRC_NOCACHE(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_NOCACHE) #define PIM_UPSTREAM_FLAG_UNSET_USE_RPT(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_USE_RPT) +#define PIM_UPSTREAM_FLAG_UNSET_MLAG_INTERFACE(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE) + +/* The RPF cost is incremented by 10 if the RPF interface is the peerlink-rif. + * This is used to force the MLAG switch with the lowest cost to the RPF + * to become the MLAG DF. + */ +#define PIM_UPSTREAM_MLAG_PEERLINK_PLUS_METRIC 10 enum pim_upstream_state { PIM_UPSTREAM_NOTJOINED, @@ -169,6 +190,13 @@ enum pim_upstream_sptbit { PIM_UPSTREAM_SPTBIT_TRUE }; +struct pim_up_mlag { + /* MRIB.metric(S) from the peer switch. This is used for DF election + * and switch with the lowest cost wins. + */ + uint32_t peer_mrib_metric; +}; + PREDECL_RBTREE_UNIQ(rb_pim_upstream); /* Upstream (S,G) channel in Joined state @@ -218,6 +246,8 @@ struct pim_upstream { struct pim_rpf rpf; + struct pim_up_mlag mlag; + struct thread *t_join_timer; /* @@ -249,6 +279,14 @@ static inline bool pim_upstream_is_kat_running(struct pim_upstream *up) return (up->t_ka_timer != NULL); } +static inline bool pim_up_mlag_is_local(struct pim_upstream *up) +{ + /* XXX: extend this to also return true if the channel-oil has + * any AA devices + */ + return (up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN); +} + struct pim_upstream *pim_upstream_find(struct pim_instance *pim, struct prefix_sg *sg); struct pim_upstream *pim_upstream_find_or_add(struct prefix_sg *sg, @@ -259,7 +297,8 @@ struct pim_upstream *pim_upstream_add(struct pim_instance *pim, struct interface *ifp, int flags, const char *name, struct pim_ifchannel *ch); -void pim_upstream_ref(struct pim_upstream *up, int flags, const char *name); +void pim_upstream_ref(struct pim_upstream *up, + int flags, const char *name); struct pim_upstream *pim_upstream_del(struct pim_instance *pim, struct pim_upstream *up, const char *name); @@ -350,5 +389,7 @@ void pim_upstream_fill_static_iif(struct pim_upstream *up, struct interface *incoming); void pim_upstream_update_use_rpt(struct pim_upstream *up, bool update_mroute); +uint32_t pim_up_mlag_local_cost(struct pim_upstream *up); +uint32_t pim_up_mlag_peer_cost(struct pim_upstream *up); void pim_upstream_reeval_use_rpt(struct pim_instance *pim); #endif /* PIM_UPSTREAM_H */ diff --git a/pimd/pim_vty.c b/pimd/pim_vty.c index c48ec373f8..b5a5089ae7 100644 --- a/pimd/pim_vty.c +++ b/pimd/pim_vty.c @@ -242,8 +242,6 @@ int pim_global_config_write_worker(struct pim_instance *pim, struct vty *vty) } } - pim_vxlan_config_write(vty, spaces, &writes); - return writes; } diff --git a/pimd/pim_vxlan.c b/pimd/pim_vxlan.c index fc34f3f600..1de0dda9da 100644 --- a/pimd/pim_vxlan.c +++ b/pimd/pim_vxlan.c @@ -38,6 +38,7 @@ #include "pim_nht.h" #include "pim_zebra.h" #include "pim_vxlan.h" +#include "pim_mlag.h" /* pim-vxlan global info */ struct pim_vxlan vxlan_info, *pim_vxlan_p = &vxlan_info; @@ -476,13 +477,14 @@ static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg *vxlan_sg) static void pim_vxlan_orig_mr_iif_update(struct hash_backet *backet, void *arg) { - struct interface *ifp = (struct interface *)arg; + struct interface *ifp; struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; struct interface *old_iif = vxlan_sg->iif; if (!pim_vxlan_is_orig_mroute(vxlan_sg)) return; + ifp = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim); if (PIM_DEBUG_VXLAN) zlog_debug("vxlan SG %s iif changed from %s to %s", vxlan_sg->sg_str, @@ -529,8 +531,15 @@ static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg *vxlan_sg) vxlan_sg->sg_str, vxlan_sg->term_oif->name); if (pim_ifchannel_local_membership_add(vxlan_sg->term_oif, - &vxlan_sg->sg)) { + &vxlan_sg->sg, true /*is_vxlan */)) { vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED; + /* update the inherited OIL */ + /* XXX - I don't see the inherited OIL updated when a local + * member is added. And that probably needs to be fixed. Till + * that happens we do a force update on the inherited OIL + * here. + */ + pim_upstream_inherited_olist(vxlan_sg->pim, vxlan_sg->up); } else { zlog_warn("vxlan SG %s term-oif %s add failed", vxlan_sg->sg_str, vxlan_sg->term_oif->name); @@ -548,6 +557,43 @@ static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg *vxlan_sg) vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED; pim_ifchannel_local_membership_del(vxlan_sg->term_oif, &vxlan_sg->sg); + /* update the inherited OIL */ + /* XXX - I don't see the inherited OIL updated when a local member + * is deleted. And that probably needs to be fixed. Till that happens + * we do a force update on the inherited OIL here. + */ + pim_upstream_inherited_olist(vxlan_sg->pim, vxlan_sg->up); +} + +static void pim_vxlan_update_sg_entry_mlag(struct pim_instance *pim, + struct pim_upstream *up, bool inherit) +{ + bool is_df = true; + + if (inherit && up->parent && + PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->parent->flags) && + PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->parent->flags)) + is_df = false; + + pim_mlag_up_df_role_update(pim, up, is_df, "inherit_xg_df"); +} + +/* We run MLAG DF election only on mroutes that have the termination + * device ipmr-lo in the immediate OIL. This is only (*, G) entries at the + * moment. For (S, G) entries that (with ipmr-lo in the inherited OIL) we + * inherit the DF role from the (*, G) entry. + */ +void pim_vxlan_inherit_mlag_flags(struct pim_instance *pim, + struct pim_upstream *up, bool inherit) +{ + struct listnode *listnode; + struct pim_upstream *child; + + for (ALL_LIST_ELEMENTS_RO(up->sources, listnode, + child)) { + pim_vxlan_update_sg_entry_mlag(pim, + child, true /* inherit */); + } } static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg *vxlan_sg) @@ -576,7 +622,11 @@ static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg *vxlan_sg) if (!up) { zlog_warn("vxlan SG %s term mroute-up add failed", vxlan_sg->sg_str); + return; } + + /* update existing SG entries with the parent's MLAG flag */ + pim_vxlan_inherit_mlag_flags(vxlan_sg->pim, up, true /*enable*/); } static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg) @@ -591,10 +641,13 @@ static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg) vxlan_sg->sg_str); vxlan_sg->up = NULL; if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) { + /* update SG entries that are inheriting from this XG entry */ + pim_vxlan_inherit_mlag_flags(vxlan_sg->pim, up, + false /*enable*/); /* clear out all the vxlan related flags */ up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM | PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN); - + pim_mlag_up_local_del(vxlan_sg->pim, up); pim_upstream_del(vxlan_sg->pim, up, __PRETTY_FUNCTION__); } @@ -660,6 +713,14 @@ static struct pim_vxlan_sg *pim_vxlan_sg_new(struct pim_instance *pim, vxlan_sg = hash_get(pim->vxlan.sg_hash, vxlan_sg, hash_alloc_intern); + /* we register with the MLAG daemon in the first VxLAN SG and never + * de-register during that life of the pimd + */ + if (pim->vxlan.sg_hash->count == 1) { + vxlan_mlag.flags |= PIM_VXLAN_MLAGF_DO_REG; + pim_mlag_register(); + } + return vxlan_sg; } @@ -717,12 +778,18 @@ void pim_vxlan_sg_del(struct pim_instance *pim, struct prefix_sg *sg) } /******************************* MLAG handling *******************************/ +bool pim_vxlan_do_mlag_reg(void) +{ + return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_DO_REG); +} + /* The peerlink sub-interface is added as an OIF to the origination-mroute. * This is done to send a copy of the multicast-vxlan encapsulated traffic * to the MLAG peer which may mroute it over the underlay if there are any * interested receivers. */ -static void pim_vxlan_sg_peerlink_update(struct hash_backet *backet, void *arg) +static void pim_vxlan_sg_peerlink_oif_update(struct hash_backet *backet, + void *arg) { struct interface *new_oif = (struct interface *)arg; struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; @@ -761,8 +828,6 @@ void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role, struct in_addr *reg_addr) { struct pim_instance *pim; - struct interface *old_oif; - struct interface *new_oif; char addr_buf[INET_ADDRSTRLEN]; struct pim_interface *pim_ifp = NULL; @@ -782,8 +847,6 @@ void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role, */ pim = pim_get_pim_instance(VRF_DEFAULT); - old_oif = pim_vxlan_orig_mr_oif_get(pim); - if (enable) vxlan_mlag.flags |= PIM_VXLAN_MLAGF_ENABLED; else @@ -804,35 +867,9 @@ void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role, pim_vxlan_set_peerlink_rif(pim, peerlink_rif); else pim_vxlan_set_peerlink_rif(pim, NULL); - - new_oif = pim_vxlan_orig_mr_oif_get(pim); - if (old_oif != new_oif) - hash_iterate(pim->vxlan.sg_hash, pim_vxlan_sg_peerlink_update, - new_oif); } /****************************** misc callbacks *******************************/ -void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes) -{ - char addr_buf[INET_ADDRSTRLEN]; - - if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) && - vxlan_mlag.peerlink_rif) { - - inet_ntop(AF_INET, &vxlan_mlag.reg_addr, - addr_buf, sizeof(addr_buf)); - vty_out(vty, - "%sip pim mlag %s role %s state %s addr %s\n", - spaces, - vxlan_mlag.peerlink_rif->name, - (vxlan_mlag.role == PIM_VXLAN_MLAG_ROLE_PRIMARY) ? - "primary":"secondary", - vxlan_mlag.peer_state ? "up" : "down", - addr_buf); - *writes += 1; - } -} - static void pim_vxlan_set_default_iif(struct pim_instance *pim, struct interface *ifp) { @@ -864,13 +901,72 @@ static void pim_vxlan_set_default_iif(struct pim_instance *pim, */ if (pim->vxlan.sg_hash) hash_iterate(pim->vxlan.sg_hash, - pim_vxlan_orig_mr_iif_update, ifp); + pim_vxlan_orig_mr_iif_update, NULL); +} + +static void pim_vxlan_up_cost_update(struct pim_instance *pim, + struct pim_upstream *up, + struct interface *old_peerlink_rif) +{ + if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags)) + return; + + if (up->rpf.source_nexthop.interface && + ((up->rpf.source_nexthop.interface == + pim->vxlan.peerlink_rif) || + (up->rpf.source_nexthop.interface == + old_peerlink_rif))) { + if (PIM_DEBUG_VXLAN) + zlog_debug("RPF cost adjust for %s on peerlink-rif (old: %s, new: %s) change", + up->sg_str, + old_peerlink_rif ? + old_peerlink_rif->name : "-", + pim->vxlan.peerlink_rif ? + pim->vxlan.peerlink_rif->name : "-"); + pim_mlag_up_local_add(pim, up); + } +} + +static void pim_vxlan_term_mr_cost_update(struct hash_backet *backet, + void *arg) +{ + struct interface *old_peerlink_rif = (struct interface *)arg; + struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; + struct pim_upstream *up; + struct listnode *listnode; + struct pim_upstream *child; + + if (pim_vxlan_is_orig_mroute(vxlan_sg)) + return; + + /* Lookup all XG and SG entries with RPF-interface peerlink_rif */ + up = vxlan_sg->up; + if (!up) + return; + + pim_vxlan_up_cost_update(vxlan_sg->pim, up, + old_peerlink_rif); + + for (ALL_LIST_ELEMENTS_RO(up->sources, listnode, + child)) + pim_vxlan_up_cost_update(vxlan_sg->pim, child, + old_peerlink_rif); +} + +static void pim_vxlan_sg_peerlink_rif_update(struct hash_backet *backet, + void *arg) +{ + pim_vxlan_orig_mr_iif_update(backet, NULL); + pim_vxlan_term_mr_cost_update(backet, arg); } static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim, struct interface *ifp) { struct interface *old_iif; + struct interface *new_iif; + struct interface *old_oif; + struct interface *new_oif; if (pim->vxlan.peerlink_rif == ifp) return; @@ -882,22 +978,77 @@ static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim, ifp ? ifp->name : "-"); old_iif = pim_vxlan_orig_mr_iif_get(pim); + old_oif = pim_vxlan_orig_mr_oif_get(pim); pim->vxlan.peerlink_rif = ifp; - ifp = pim_vxlan_orig_mr_iif_get(pim); - if (old_iif == ifp) + + new_iif = pim_vxlan_orig_mr_iif_get(pim); + if (old_iif != new_iif) { + if (PIM_DEBUG_VXLAN) + zlog_debug("%s: vxlan orig iif changed from %s to %s", + __PRETTY_FUNCTION__, + old_iif ? old_iif->name : "-", + new_iif ? new_iif->name : "-"); + + /* add/del upstream entries for the existing vxlan SG when the + * interface becomes available + */ + if (pim->vxlan.sg_hash) + hash_iterate(pim->vxlan.sg_hash, + pim_vxlan_sg_peerlink_rif_update, + old_iif); + } + + new_oif = pim_vxlan_orig_mr_oif_get(pim); + if (old_oif != new_oif) { + if (PIM_DEBUG_VXLAN) + zlog_debug("%s: vxlan orig oif changed from %s to %s", + __PRETTY_FUNCTION__, + old_oif ? old_oif->name : "-", + new_oif ? new_oif->name : "-"); + if (pim->vxlan.sg_hash) + hash_iterate(pim->vxlan.sg_hash, + pim_vxlan_sg_peerlink_oif_update, + new_oif); + } +} + +static void pim_vxlan_term_mr_oif_update(struct hash_backet *backet, void *arg) +{ + struct interface *ifp = (struct interface *)arg; + struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; + + if (pim_vxlan_is_orig_mroute(vxlan_sg)) + return; + + if (vxlan_sg->term_oif == ifp) return; if (PIM_DEBUG_VXLAN) - zlog_debug("%s: vxlan orig iif changed from %s to %s", - __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-", + zlog_debug("vxlan SG %s term oif changed from %s to %s", + vxlan_sg->sg_str, + vxlan_sg->term_oif ? vxlan_sg->term_oif->name : "-", ifp ? ifp->name : "-"); - /* add/del upstream entries for the existing vxlan SG when the - * interface becomes available - */ + pim_vxlan_term_mr_del(vxlan_sg); + vxlan_sg->term_oif = ifp; + pim_vxlan_term_mr_add(vxlan_sg); +} + +static void pim_vxlan_term_oif_update(struct pim_instance *pim, + struct interface *ifp) +{ + if (pim->vxlan.term_if == ifp) + return; + + if (PIM_DEBUG_VXLAN) + zlog_debug("vxlan term oif changed from %s to %s", + pim->vxlan.term_if ? pim->vxlan.term_if->name : "-", + ifp ? ifp->name : "-"); + + pim->vxlan.term_if = ifp; if (pim->vxlan.sg_hash) hash_iterate(pim->vxlan.sg_hash, - pim_vxlan_orig_mr_iif_update, ifp); + pim_vxlan_term_mr_oif_update, ifp); } void pim_vxlan_add_vif(struct interface *ifp) @@ -914,6 +1065,9 @@ void pim_vxlan_add_vif(struct interface *ifp) if (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED && (ifp == vxlan_mlag.peerlink_rif)) pim_vxlan_set_peerlink_rif(pim, ifp); + + if (pim->vxlan.term_if_cfg == ifp) + pim_vxlan_term_oif_update(pim, ifp); } void pim_vxlan_del_vif(struct interface *ifp) @@ -929,76 +1083,56 @@ void pim_vxlan_del_vif(struct interface *ifp) if (pim->vxlan.peerlink_rif == ifp) pim_vxlan_set_peerlink_rif(pim, NULL); -} -static void pim_vxlan_term_mr_oif_update(struct hash_backet *backet, void *arg) -{ - struct interface *ifp = (struct interface *)arg; - struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data; - - if (pim_vxlan_is_orig_mroute(vxlan_sg)) - return; - - if (vxlan_sg->term_oif == ifp) - return; - - if (PIM_DEBUG_VXLAN) - zlog_debug("vxlan SG %s term oif changed from %s to %s", - vxlan_sg->sg_str, - vxlan_sg->term_oif ? vxlan_sg->term_oif->name : "-", - ifp ? ifp->name : "-"); - - pim_vxlan_term_mr_del(vxlan_sg); - vxlan_sg->term_oif = ifp; - pim_vxlan_term_mr_add(vxlan_sg); + if (pim->vxlan.term_if == ifp) + pim_vxlan_term_oif_update(pim, NULL); } +/* enable pim implicitly on the termination device add */ void pim_vxlan_add_term_dev(struct pim_instance *pim, struct interface *ifp) { struct pim_interface *pim_ifp; - if (pim->vxlan.term_if == ifp) + if (pim->vxlan.term_if_cfg == ifp) return; if (PIM_DEBUG_VXLAN) - zlog_debug("vxlan term oif changed from %s to %s", - pim->vxlan.term_if ? pim->vxlan.term_if->name : "-", - ifp->name); + zlog_debug("vxlan term oif cfg changed from %s to %s", + pim->vxlan.term_if_cfg ? + pim->vxlan.term_if_cfg->name : "-", + ifp->name); + + pim->vxlan.term_if_cfg = ifp; /* enable pim on the term ifp */ pim_ifp = (struct pim_interface *)ifp->info; if (pim_ifp) { PIM_IF_DO_PIM(pim_ifp->options); + /* ifp is already oper up; activate it as a term dev */ + if (pim_ifp->mroute_vif_index >= 0) + pim_vxlan_term_oif_update(pim, ifp); } else { - pim_ifp = pim_if_new(ifp, false /*igmp*/, true /*pim*/, - false /*pimreg*/, true /*vxlan_term*/); - /* ensure that pimreg existss before using the newly created + /* ensure that pimreg exists before using the newly created * vxlan termination device */ pim_if_create_pimreg(pim); + pim_ifp = pim_if_new(ifp, false /*igmp*/, true /*pim*/, + false /*pimreg*/, true /*vxlan_term*/); } - - pim->vxlan.term_if = ifp; - - if (pim->vxlan.sg_hash) - hash_iterate(pim_ifp->pim->vxlan.sg_hash, - pim_vxlan_term_mr_oif_update, ifp); } +/* disable pim implicitly, if needed, on the termination device deletion */ void pim_vxlan_del_term_dev(struct pim_instance *pim) { - struct interface *ifp = pim->vxlan.term_if; + struct interface *ifp = pim->vxlan.term_if_cfg; struct pim_interface *pim_ifp; if (PIM_DEBUG_VXLAN) - zlog_debug("vxlan term oif changed from %s to -", ifp->name); + zlog_debug("vxlan term oif cfg changed from %s to -", + ifp->name); - pim->vxlan.term_if = NULL; - - if (pim->vxlan.sg_hash) - hash_iterate(pim->vxlan.sg_hash, - pim_vxlan_term_mr_oif_update, NULL); + pim->vxlan.term_if_cfg = NULL; pim_ifp = (struct pim_interface *)ifp->info; if (pim_ifp) { @@ -1006,7 +1140,6 @@ void pim_vxlan_del_term_dev(struct pim_instance *pim) if (!PIM_IF_TEST_IGMP(pim_ifp->options)) pim_if_delete(ifp); } - } void pim_vxlan_init(struct pim_instance *pim) diff --git a/pimd/pim_vxlan.h b/pimd/pim_vxlan.h index c6507a474c..198d1c3281 100644 --- a/pimd/pim_vxlan.h +++ b/pimd/pim_vxlan.h @@ -66,17 +66,14 @@ struct pim_vxlan_sg { enum pim_vxlan_mlag_flags { PIM_VXLAN_MLAGF_NONE = 0, - PIM_VXLAN_MLAGF_ENABLED = (1 << 0) -}; - -enum pim_vxlan_mlag_role { - PIM_VXLAN_MLAG_ROLE_SECONDARY = 0, - PIM_VXLAN_MLAG_ROLE_PRIMARY + PIM_VXLAN_MLAGF_ENABLED = (1 << 0), + PIM_VXLAN_MLAGF_DO_REG = (1 << 1) }; struct pim_vxlan_mlag { enum pim_vxlan_mlag_flags flags; - enum pim_vxlan_mlag_role role; + /* XXX - remove this variable from here */ + int role; bool peer_state; /* routed interface setup on top of MLAG peerlink */ struct interface *peerlink_rif; @@ -122,6 +119,12 @@ static inline bool pim_vxlan_is_local_sip(struct pim_upstream *up) if_is_loopback_or_vrf(up->rpf.source_nexthop.interface); } +static inline bool pim_vxlan_is_term_dev_cfg(struct pim_instance *pim, + struct interface *ifp) +{ + return pim->vxlan.term_if_cfg == ifp; +} + extern struct pim_vxlan *pim_vxlan_p; extern struct pim_vxlan_sg *pim_vxlan_sg_find(struct pim_instance *pim, struct prefix_sg *sg); @@ -141,6 +144,8 @@ extern bool pim_vxlan_get_register_src(struct pim_instance *pim, extern void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role, struct interface *peerlink_rif, struct in_addr *reg_addr); -extern void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes); +extern bool pim_vxlan_do_mlag_reg(void); +extern void pim_vxlan_inherit_mlag_flags(struct pim_instance *pim, + struct pim_upstream *up, bool inherit); #endif /* PIM_VXLAN_H */ diff --git a/pimd/pim_vxlan_instance.h b/pimd/pim_vxlan_instance.h index 3f99483fbe..5b35bcbeaa 100644 --- a/pimd/pim_vxlan_instance.h +++ b/pimd/pim_vxlan_instance.h @@ -36,6 +36,7 @@ struct pim_vxlan_instance { /* device used by the dataplane to terminate multicast encapsulated * vxlan traffic */ + struct interface *term_if_cfg; struct interface *term_if; }; diff --git a/pimd/pim_zebra.c b/pimd/pim_zebra.c index 06507b1f4c..baa6216df2 100644 --- a/pimd/pim_zebra.c +++ b/pimd/pim_zebra.c @@ -452,7 +452,7 @@ static void pim_zebra_connected(struct zclient *zclient) static void pim_zebra_capabilities(struct zclient_capabilities *cap) { - router->role = cap->role; + router->mlag_role = cap->role; } void pim_zebra_init(void) @@ -547,7 +547,8 @@ static void igmp_source_forward_reevaluate_one(struct pim_instance *pim, "local membership add for %s as G is now ASM", pim_str_sg_dump(&sg)); pim_ifchannel_local_membership_add( - group->group_igmp_sock->interface, &sg); + group->group_igmp_sock->interface, &sg, + false /*is_vxlan*/); } } } @@ -765,7 +766,8 @@ void igmp_source_forward_start(struct pim_instance *pim, per-interface (S,G) state. */ if (!pim_ifchannel_local_membership_add( - group->group_igmp_sock->interface, &sg)) { + group->group_igmp_sock->interface, &sg, + false /*is_vxlan*/)) { if (PIM_DEBUG_MROUTE) zlog_warn("%s: Failure to add local membership for %s", __PRETTY_FUNCTION__, pim_str_sg_dump(&sg)); diff --git a/tests/topotests/all-protocol-startup/r1/show_bgp_ipv6_summary.ref b/tests/topotests/all-protocol-startup/r1/show_bgp_ipv6_summary.ref index 85388c738d..3b140e3698 100644 --- a/tests/topotests/all-protocol-startup/r1/show_bgp_ipv6_summary.ref +++ b/tests/topotests/all-protocol-startup/r1/show_bgp_ipv6_summary.ref @@ -3,6 +3,6 @@ BGP table version 1 RIB entries 1, using XXXX bytes of memory Peers 2, using XXXX KiB of memory -Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd -fc00:0:0:8::1000 4 100 0 0 0 0 0 never Active -fc00:0:0:8::2000 4 200 0 0 0 0 0 never Active +Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd +fc00:0:0:8::1000 4 100 0 0 0 0 0 never Active +fc00:0:0:8::2000 4 200 0 0 0 0 0 never Active diff --git a/tests/topotests/all-protocol-startup/r1/show_ip_bgp_summary.ref b/tests/topotests/all-protocol-startup/r1/show_ip_bgp_summary.ref index 4f0ac1c910..7a246b1149 100644 --- a/tests/topotests/all-protocol-startup/r1/show_ip_bgp_summary.ref +++ b/tests/topotests/all-protocol-startup/r1/show_ip_bgp_summary.ref @@ -3,8 +3,8 @@ BGP table version 1 RIB entries 1, using XXXX bytes of memory Peers 4, using XXXX KiB of memory -Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd -192.168.7.10 4 100 0 0 0 0 0 never Active -192.168.7.20 4 200 0 0 0 0 0 never Active -fc00:0:0:8::1000 4 100 0 0 0 0 0 never Active -fc00:0:0:8::2000 4 200 0 0 0 0 0 never Active +Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd +192.168.7.10 4 100 0 0 0 0 0 never Active +192.168.7.20 4 200 0 0 0 0 0 never Active +fc00:0:0:8::1000 4 100 0 0 0 0 0 never Active +fc00:0:0:8::2000 4 200 0 0 0 0 0 never Active diff --git a/tests/topotests/evpn-pim-1/host1/bgpd.conf b/tests/topotests/evpn-pim-1/host1/bgpd.conf new file mode 100644 index 0000000000..cdf4cb4feb --- /dev/null +++ b/tests/topotests/evpn-pim-1/host1/bgpd.conf @@ -0,0 +1 @@ +! diff --git a/tests/topotests/evpn-pim-1/host1/pimd.conf b/tests/topotests/evpn-pim-1/host1/pimd.conf new file mode 100644 index 0000000000..63a44c1333 --- /dev/null +++ b/tests/topotests/evpn-pim-1/host1/pimd.conf @@ -0,0 +1,4 @@ +int lo +! + + diff --git a/tests/topotests/evpn-pim-1/host1/zebra.conf b/tests/topotests/evpn-pim-1/host1/zebra.conf new file mode 100644 index 0000000000..45ad031017 --- /dev/null +++ b/tests/topotests/evpn-pim-1/host1/zebra.conf @@ -0,0 +1,5 @@ +int host1-eth0 + ip addr 192.168.3.4/24 + +int lo + ip addr 192.168.100.4/32 diff --git a/tests/topotests/evpn-pim-1/host2/bgpd.conf b/tests/topotests/evpn-pim-1/host2/bgpd.conf new file mode 100644 index 0000000000..cdf4cb4feb --- /dev/null +++ b/tests/topotests/evpn-pim-1/host2/bgpd.conf @@ -0,0 +1 @@ +! diff --git a/tests/topotests/evpn-pim-1/host2/pimd.conf b/tests/topotests/evpn-pim-1/host2/pimd.conf new file mode 100644 index 0000000000..63a44c1333 --- /dev/null +++ b/tests/topotests/evpn-pim-1/host2/pimd.conf @@ -0,0 +1,4 @@ +int lo +! + + diff --git a/tests/topotests/evpn-pim-1/host2/zebra.conf b/tests/topotests/evpn-pim-1/host2/zebra.conf new file mode 100644 index 0000000000..bfae53017f --- /dev/null +++ b/tests/topotests/evpn-pim-1/host2/zebra.conf @@ -0,0 +1,5 @@ +int host-eth0 + ip addr 192.168.4.5/24 + +int lo + ip addr 192.168.100.5/32 diff --git a/tests/topotests/evpn-pim-1/leaf1/bgpd.conf b/tests/topotests/evpn-pim-1/leaf1/bgpd.conf new file mode 100644 index 0000000000..33d34db677 --- /dev/null +++ b/tests/topotests/evpn-pim-1/leaf1/bgpd.conf @@ -0,0 +1,9 @@ + +router bgp 65002 + neighbor 192.168.1.1 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + ! +!
\ No newline at end of file diff --git a/tests/topotests/evpn-pim-1/leaf1/pimd.conf b/tests/topotests/evpn-pim-1/leaf1/pimd.conf new file mode 100644 index 0000000000..293e252086 --- /dev/null +++ b/tests/topotests/evpn-pim-1/leaf1/pimd.conf @@ -0,0 +1,15 @@ +debug pim events +debug pim nht +debug pim zebra +ip pim rp 192.168.100.1 +! +int lo + ip pim +! +int leaf1-eth0 + ip pim +! +int leaf1-eth1 + ip pim + ip igmp + diff --git a/tests/topotests/evpn-pim-1/leaf1/zebra.conf b/tests/topotests/evpn-pim-1/leaf1/zebra.conf new file mode 100644 index 0000000000..581cc6e7be --- /dev/null +++ b/tests/topotests/evpn-pim-1/leaf1/zebra.conf @@ -0,0 +1,6 @@ +int leaf1-eth0 + ip addr 192.168.1.2/24 +int leaf1-eth1 + ip addr 192.168.3.2/24 +int lo + ip addr 192.168.100.2/32 diff --git a/tests/topotests/evpn-pim-1/leaf2/bgpd.conf b/tests/topotests/evpn-pim-1/leaf2/bgpd.conf new file mode 100644 index 0000000000..3dd9f237be --- /dev/null +++ b/tests/topotests/evpn-pim-1/leaf2/bgpd.conf @@ -0,0 +1,9 @@ + +router bgp 65003 + neighbor 192.168.2.1 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.2.1 activate + advertise-all-vni + ! +!
\ No newline at end of file diff --git a/tests/topotests/evpn-pim-1/leaf2/pimd.conf b/tests/topotests/evpn-pim-1/leaf2/pimd.conf new file mode 100644 index 0000000000..08d5a19a2a --- /dev/null +++ b/tests/topotests/evpn-pim-1/leaf2/pimd.conf @@ -0,0 +1,13 @@ +ip pim rp 192.168.100.1 +! +int lo + ip pim +! +int leaf2-eth0 + ip pim +! +int leaf2-eth1 + ip pim + ip igmp +! + diff --git a/tests/topotests/evpn-pim-1/leaf2/zebra.conf b/tests/topotests/evpn-pim-1/leaf2/zebra.conf new file mode 100644 index 0000000000..1bcf8e1ded --- /dev/null +++ b/tests/topotests/evpn-pim-1/leaf2/zebra.conf @@ -0,0 +1,6 @@ +int leaf2-eth0 + ip addr 192.168.2.3/24 +int leaf2-eth1 + ip addr 192.168.4.3/24 +int lo + ip addr 192.168.100.3/32 diff --git a/tests/topotests/evpn-pim-1/spine/bgp.summ.json b/tests/topotests/evpn-pim-1/spine/bgp.summ.json new file mode 100644 index 0000000000..faf40c8d43 --- /dev/null +++ b/tests/topotests/evpn-pim-1/spine/bgp.summ.json @@ -0,0 +1,44 @@ +{ + "routerId":"192.168.100.1", + "as":65001, + "vrfId":0, + "vrfName":"default", + "tableVersion":7, + "peerCount":2, + "peers":{ + "192.168.1.2":{ + "remoteAs":65002, + "version":4, + "tableVersion":0, + "outq":0, + "inq":0, + "prefixReceivedCount":3, + "pfxRcd":3, + "pfxSnt":7, + "state":"Established", + "connectionsEstablished":1, + "connectionsDropped":0, + "idType":"ipv4" + }, + "192.168.2.3":{ + "remoteAs":65003, + "version":4, + "tableVersion":0, + "outq":0, + "inq":0, + "prefixReceivedCount":3, + "pfxRcd":3, + "pfxSnt":7, + "state":"Established", + "connectionsEstablished":1, + "connectionsDropped":0, + "idType":"ipv4" + } + }, + "failedPeers":0, + "totalPeers":2, + "dynamicPeers":0, + "bestPath":{ + "multiPathRelax":"false" + } +} diff --git a/tests/topotests/evpn-pim-1/spine/bgpd.conf b/tests/topotests/evpn-pim-1/spine/bgpd.conf new file mode 100644 index 0000000000..9a845043e9 --- /dev/null +++ b/tests/topotests/evpn-pim-1/spine/bgpd.conf @@ -0,0 +1,10 @@ + +router bgp 65001 + neighbor 192.168.1.2 remote-as external + neighbor 192.168.2.3 remote-as external + redistribute connected + address-family l2vpn evpn + neighbor 192.168.1.2 activate + neighbor 192.168.2.3 activate + exit-address-family +! diff --git a/tests/topotests/evpn-pim-1/spine/join-info.json b/tests/topotests/evpn-pim-1/spine/join-info.json new file mode 100644 index 0000000000..3d135fb964 --- /dev/null +++ b/tests/topotests/evpn-pim-1/spine/join-info.json @@ -0,0 +1,34 @@ +{ + "spine-eth0":{ + "name":"spine-eth0", + "state":"up", + "address":"192.168.1.1", + "flagMulticast":true, + "flagBroadcast":true, + "lanDelayEnabled":true, + "239.1.1.1":{ + "*":{ + "source":"*", + "group":"239.1.1.1", + "prune":"--:--", + "channelJoinName":"JOIN" + } + } + }, + "spine-eth1":{ + "name":"spine-eth1", + "state":"up", + "address":"192.168.2.1", + "flagMulticast":true, + "flagBroadcast":true, + "lanDelayEnabled":true, + "239.1.1.1":{ + "*":{ + "source":"*", + "group":"239.1.1.1", + "prune":"--:--", + "channelJoinName":"JOIN" + } + } + } +} diff --git a/tests/topotests/evpn-pim-1/spine/pimd.conf b/tests/topotests/evpn-pim-1/spine/pimd.conf new file mode 100644 index 0000000000..56adda5cc4 --- /dev/null +++ b/tests/topotests/evpn-pim-1/spine/pimd.conf @@ -0,0 +1,13 @@ +ip pim rp 192.168.100.1 +! +int lo + ip pim +! +int spine-eth0 + ip pim +! +int spine-eth1 + ip pim +! + + diff --git a/tests/topotests/evpn-pim-1/spine/zebra.conf b/tests/topotests/evpn-pim-1/spine/zebra.conf new file mode 100644 index 0000000000..2cb719486e --- /dev/null +++ b/tests/topotests/evpn-pim-1/spine/zebra.conf @@ -0,0 +1,8 @@ +int spine-eth0 + ip addr 192.168.1.1/24 +! +int spine-eth1 + ip addr 192.168.2.1/24 +! +int lo + ip addr 192.168.100.1/32 diff --git a/tests/topotests/evpn-pim-1/test_evpn_pim_topo1.py b/tests/topotests/evpn-pim-1/test_evpn_pim_topo1.py new file mode 100755 index 0000000000..dafe2e03ac --- /dev/null +++ b/tests/topotests/evpn-pim-1/test_evpn_pim_topo1.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python + +# +# test_evpn-pim_topo1.py +# +# Copyright (c) 2017 by +# Cumulus Networks, Inc. +# Donald Sharp +# +# Permission to use, copy, modify, and/or distribute this software +# for any purpose with or without fee is hereby granted, provided +# that the above copyright notice and this permission notice appear +# in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND NETDEF DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NETDEF BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# + +""" +test_evpn_pim_topo1.py: Testing evpn-pim + +""" + +import os +import re +import sys +import pytest +import json +from functools import partial + +# Save the Current Working Directory to find configuration files. +CWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(CWD, '../')) + +# pylint: disable=C0413 +# Import topogen and topotest helpers +from lib import topotest +from lib.topogen import Topogen, TopoRouter, get_topogen +from lib.topolog import logger + +# Required to instantiate the topology builder class. +from mininet.topo import Topo + +##################################################### +## +## Network Topology Definition +## +##################################################### + +class NetworkTopo(Topo): + "evpn-pim Topology 1" + + def build(self, **_opts): + "Build function" + + tgen = get_topogen(self) + + tgen.add_router('spine') + tgen.add_router('leaf1') + tgen.add_router('leaf2') + tgen.add_router('host1') + tgen.add_router('host2') + + # On main router + # First switch is for a dummy interface (for local network) + # spine-eth0 is connected to leaf1-eth0 + switch = tgen.add_switch('sw1') + switch.add_link(tgen.gears['spine']) + switch.add_link(tgen.gears['leaf1']) + + # spine-eth1 is connected to leaf2-eth0 + switch = tgen.add_switch('sw2') + switch.add_link(tgen.gears['spine']) + switch.add_link(tgen.gears['leaf2']) + + # leaf1-eth1 is connected to host1-eth0 + switch = tgen.add_switch('sw3') + switch.add_link(tgen.gears['leaf1']) + switch.add_link(tgen.gears['host1']) + + # leaf2-eth1 is connected to host2-eth0 + switch = tgen.add_switch('sw4') + switch.add_link(tgen.gears['leaf2']) + switch.add_link(tgen.gears['host2']) + + + +##################################################### +## +## Tests starting +## +##################################################### + +def setup_module(module): + "Setup topology" + tgen = Topogen(NetworkTopo, module.__name__) + tgen.start_topology() + + leaf1 = tgen.gears['leaf1'] + leaf2 = tgen.gears['leaf2'] + + leaf1.run('brctl addbr brleaf1') + leaf2.run('brctl addbr brleaf2') + leaf1.run('ip link set dev brleaf1 up') + leaf2.run('ip link set dev brleaf2 up') + leaf1.run('ip link add vxlan0 type vxlan id 42 group 239.1.1.1 dev leaf1-eth1 dstport 4789') + leaf2.run('ip link add vxlan0 type vxlan id 42 group 239.1.1.1 dev leaf2-eth1 dstport 4789') + leaf1.run('brctl addif brleaf1 vxlan0') + leaf2.run('brctl addif brleaf2 vxlan0') + leaf1.run('ip link set up dev vxlan0') + leaf2.run('ip link set up dev vxlan0') + #tgen.mininet_cli() + # This is a sample of configuration loading. + router_list = tgen.routers() + for rname, router in router_list.iteritems(): + router.load_config( + TopoRouter.RD_ZEBRA, + os.path.join(CWD, '{}/zebra.conf'.format(rname)) + ) + router.load_config( + TopoRouter.RD_BGP, + os.path.join(CWD, '{}/bgpd.conf'.format(rname)) + ) + router.load_config( + TopoRouter.RD_PIM, + os.path.join(CWD, '{}/pimd.conf'.format(rname)) + ) + tgen.start_router() + #tgen.mininet_cli() + +def teardown_module(_mod): + "Teardown the pytest environment" + tgen = get_topogen() + + # This function tears down the whole topology. + tgen.stop_topology() + + +def test_converge_protocols(): + "Wait for protocol convergence" + + tgen = get_topogen() + # Don't run this test if we have any failure. + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + spine = tgen.gears['spine'] + json_file = '{}/{}/bgp.summ.json'.format(CWD, spine.name) + expected = json.loads(open(json_file).read()) + + test_func = partial(topotest.router_json_cmp, + spine, 'show bgp ipv4 uni summ json', expected) + _, result = topotest.run_and_expect(test_func, None, count=125, wait=1) + assertmsg = '"{}" JSON output mismatches'.format(spine.name) + assert result is None, assertmsg + #tgen.mininet_cli() + +def test_multicast_groups_on_rp(): + "Ensure the multicast groups show up on the spine" + # This test implicitly tests the auto mcast groups + # of the created vlans and then the auto-joins that + # pim will do to the RP( spine ) + + tgen = get_topogen() + + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + spine = tgen.gears['spine'] + json_file = '{}/{}/join-info.json'.format(CWD, spine.name) + expected = json.loads(open(json_file).read()) + + test_func = partial(topotest.router_json_cmp, + spine, 'show ip pim join json', expected) + _, result = topotest.run_and_expect(test_func, None, count=30, wait=1) + assertmsg = '"{}" JSON output mismatches'.format(spine.name) + assert result is None, assertmsg + #tgen.mininet_cli() + +def test_shutdown_check_stderr(): + if os.environ.get('TOPOTESTS_CHECK_STDERR') is None: + pytest.skip('Skipping test for Stderr output and memory leaks') + + tgen = get_topogen() + # Don't run this test if we have any failure. + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + logger.info("Verifying unexpected STDERR output from daemons") + + router_list = tgen.routers().values() + for router in router_list: + router.stop() + + log = tgen.net[router.name].getStdErr('pimd') + if log: + logger.error('PIMd StdErr Log:' + log) + log = tgen.net[router.name].getStdErr('bgpd') + if log: + logger.error('BGPd StdErr Log:' + log) + log = tgen.net[router.name].getStdErr('zebra') + if log: + logger.error('Zebra StdErr Log:' + log) + + +if __name__ == '__main__': + args = ["-s"] + sys.argv[1:] + sys.exit(pytest.main(args)) + diff --git a/zebra/connected.c b/zebra/connected.c index 0ff474d787..0ee41afa8f 100644 --- a/zebra/connected.c +++ b/zebra/connected.c @@ -490,6 +490,10 @@ void connected_add_ipv6(struct interface *ifp, int flags, struct in6_addr *addr, p->prefixlen = prefixlen; ifc->address = (struct prefix *)p; + /* Add global ipv6 address to the RA prefix list */ + if (!IN6_IS_ADDR_LINKLOCAL(&p->prefix)) + rtadv_add_prefix(ifp->info, p); + if (dest) { p = prefix_ipv6_new(); p->family = AF_INET6; @@ -533,6 +537,10 @@ void connected_delete_ipv6(struct interface *ifp, struct in6_addr *address, memcpy(&p.u.prefix6, address, sizeof(struct in6_addr)); p.prefixlen = prefixlen; + /* Delete global ipv6 address from RA prefix list */ + if (!IN6_IS_ADDR_LINKLOCAL(&p.u.prefix6)) + rtadv_delete_prefix(ifp->info, &p); + if (dest) { memset(&d, 0, sizeof(struct prefix)); d.family = AF_INET6; diff --git a/zebra/debug.c b/zebra/debug.c index 16b1d0057b..68f6b69305 100644 --- a/zebra/debug.c +++ b/zebra/debug.c @@ -544,6 +544,10 @@ static int config_write_debug(struct vty *vty) vty_out(vty, "debug zebra vxlan\n"); write++; } + if (IS_ZEBRA_DEBUG_MLAG) { + vty_out(vty, "debug zebra mlag\n"); + write++; + } if (IS_ZEBRA_DEBUG_PW) { vty_out(vty, "debug zebra pseudowires\n"); write++; @@ -633,4 +637,5 @@ void zebra_debug_init(void) install_element(CONFIG_NODE, &no_debug_zebra_rib_cmd); install_element(CONFIG_NODE, &no_debug_zebra_fpm_cmd); install_element(CONFIG_NODE, &no_debug_zebra_dplane_cmd); + install_element(CONFIG_NODE, &debug_zebra_mlag_cmd); } diff --git a/zebra/irdp_packet.c b/zebra/irdp_packet.c index f6fe6bbf1e..13dcab9294 100644 --- a/zebra/irdp_packet.c +++ b/zebra/irdp_packet.c @@ -78,6 +78,7 @@ static void parse_irdp_packet(char *p, int len, struct interface *ifp) int ip_hlen, iplen, datalen; struct zebra_if *zi; struct irdp_interface *irdp; + uint16_t saved_chksum; zi = ifp->info; if (!zi) @@ -121,8 +122,10 @@ static void parse_irdp_packet(char *p, int len, struct interface *ifp) icmp = (struct icmphdr *)(p + ip_hlen); + saved_chksum = icmp->checksum; + icmp->checksum = 0; /* check icmp checksum */ - if (in_cksum(icmp, datalen) != icmp->checksum) { + if (in_cksum(icmp, datalen) != saved_chksum) { flog_warn( EC_ZEBRA_IRDP_BAD_CHECKSUM, "IRDP: RX ICMP packet from %s. Bad checksum, silently ignored", @@ -315,15 +318,20 @@ void send_packet(struct interface *ifp, struct stream *s, uint32_t dst, if (setsockopt(irdp_sock, IPPROTO_IP, IP_HDRINCL, (char *)&on, sizeof(on)) < 0) - zlog_debug("sendto %s", safe_strerror(errno)); + flog_err(EC_LIB_SOCKET, + "IRDP: Cannot set IP_HDRINCLU %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); if (dst == INADDR_BROADCAST) { - on = 1; - if (setsockopt(irdp_sock, SOL_SOCKET, SO_BROADCAST, (char *)&on, - sizeof(on)) + uint32_t bon = 1; + + if (setsockopt(irdp_sock, SOL_SOCKET, SO_BROADCAST, &bon, + sizeof(bon)) < 0) - zlog_debug("sendto %s", safe_strerror(errno)); + flog_err(EC_LIB_SOCKET, + "IRDP: Cannot set SO_BROADCAST %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); } if (dst != INADDR_BROADCAST) @@ -354,8 +362,8 @@ void send_packet(struct interface *ifp, struct stream *s, uint32_t dst, sockopt_iphdrincl_swab_htosys(ip); - if (sendmsg(irdp_sock, msg, 0) < 0) { - zlog_debug("sendto %s", safe_strerror(errno)); - } - /* printf("TX on %s idx %d\n", ifp->name, ifp->ifindex); */ + if (sendmsg(irdp_sock, msg, 0) < 0) + flog_err(EC_LIB_SOCKET, + "IRDP: sendmsg send failure %s(%d) on %s", + safe_strerror(errno), errno, ifp->name); } diff --git a/zebra/rtadv.c b/zebra/rtadv.c index e9a97d4b15..e36af00b4e 100644 --- a/zebra/rtadv.c +++ b/zebra/rtadv.c @@ -876,18 +876,48 @@ static struct rtadv_prefix *rtadv_prefix_get(struct list *rplist, return rprefix; } +static void rtadv_prefix_set_defaults(struct rtadv_prefix *rp) +{ + rp->AdvAutonomousFlag = 1; + rp->AdvOnLinkFlag = 1; + rp->AdvRouterAddressFlag = 0; + rp->AdvPreferredLifetime = RTADV_PREFERRED_LIFETIME; + rp->AdvValidLifetime = RTADV_VALID_LIFETIME; +} + static void rtadv_prefix_set(struct zebra_if *zif, struct rtadv_prefix *rp) { struct rtadv_prefix *rprefix; rprefix = rtadv_prefix_get(zif->rtadv.AdvPrefixList, &rp->prefix); - /* Set parameters. */ - rprefix->AdvValidLifetime = rp->AdvValidLifetime; - rprefix->AdvPreferredLifetime = rp->AdvPreferredLifetime; - rprefix->AdvOnLinkFlag = rp->AdvOnLinkFlag; - rprefix->AdvAutonomousFlag = rp->AdvAutonomousFlag; - rprefix->AdvRouterAddressFlag = rp->AdvRouterAddressFlag; + /* + * Set parameters based on where the prefix is created. + * If auto-created based on kernel address addition, set the + * default values. If created from a manual "ipv6 nd prefix" + * command, take the parameters from the manual command. Note + * that if the manual command exists, the default values will + * not overwrite the manual values. + */ + if (rp->AdvPrefixCreate == PREFIX_SRC_MANUAL) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_AUTO) + rprefix->AdvPrefixCreate = PREFIX_SRC_BOTH; + else + rprefix->AdvPrefixCreate = PREFIX_SRC_MANUAL; + + rprefix->AdvAutonomousFlag = rp->AdvAutonomousFlag; + rprefix->AdvOnLinkFlag = rp->AdvOnLinkFlag; + rprefix->AdvRouterAddressFlag = rp->AdvRouterAddressFlag; + rprefix->AdvPreferredLifetime = rp->AdvPreferredLifetime; + rprefix->AdvValidLifetime = rp->AdvValidLifetime; + } else if (rp->AdvPrefixCreate == PREFIX_SRC_AUTO) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_MANUAL) + rprefix->AdvPrefixCreate = PREFIX_SRC_BOTH; + else { + rprefix->AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set_defaults(rprefix); + } + } } static int rtadv_prefix_reset(struct zebra_if *zif, struct rtadv_prefix *rp) @@ -896,6 +926,27 @@ static int rtadv_prefix_reset(struct zebra_if *zif, struct rtadv_prefix *rp) rprefix = rtadv_prefix_lookup(zif->rtadv.AdvPrefixList, &rp->prefix); if (rprefix != NULL) { + + /* + * When deleting an address from the list, need to take care + * it wasn't defined both automatically via kernel + * address addition as well as manually by vtysh cli. If both, + * we don't actually delete but may change the parameters + * back to default if a manually defined entry is deleted. + */ + if (rp->AdvPrefixCreate == PREFIX_SRC_MANUAL) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH) { + rprefix->AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set_defaults(rprefix); + return 1; + } + } else if (rp->AdvPrefixCreate == PREFIX_SRC_AUTO) { + if (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH) { + rprefix->AdvPrefixCreate = PREFIX_SRC_MANUAL; + return 1; + } + } + listnode_delete(zif->rtadv.AdvPrefixList, (void *)rprefix); rtadv_prefix_free(rprefix); return 1; @@ -903,6 +954,28 @@ static int rtadv_prefix_reset(struct zebra_if *zif, struct rtadv_prefix *rp) return 0; } +/* Add IPv6 prefixes learned from the kernel to the RA prefix list */ +void rtadv_add_prefix(struct zebra_if *zif, const struct prefix_ipv6 *p) +{ + struct rtadv_prefix rp; + + rp.prefix = *p; + apply_mask_ipv6(&rp.prefix); + rp.AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_set(zif, &rp); +} + +/* Delete IPv6 prefixes removed by the kernel from the RA prefix list */ +void rtadv_delete_prefix(struct zebra_if *zif, const struct prefix *p) +{ + struct rtadv_prefix rp; + + rp.prefix = *((struct prefix_ipv6 *)p); + apply_mask_ipv6(&rp.prefix); + rp.AdvPrefixCreate = PREFIX_SRC_AUTO; + rtadv_prefix_reset(zif, &rp); +} + static void ipv6_nd_suppress_ra_set(struct interface *ifp, ipv6_nd_suppress_ra_status status) { @@ -1601,6 +1674,7 @@ DEFUN (ipv6_nd_prefix, rp.AdvRouterAddressFlag = routeraddr; rp.AdvValidLifetime = RTADV_VALID_LIFETIME; rp.AdvPreferredLifetime = RTADV_PREFERRED_LIFETIME; + rp.AdvPrefixCreate = PREFIX_SRC_MANUAL; if (lifetimes) { rp.AdvValidLifetime = strmatch(lifetime, "infinite") @@ -1651,6 +1725,7 @@ DEFUN (no_ipv6_nd_prefix, return CMD_WARNING_CONFIG_FAILED; } apply_mask_ipv6(&rp.prefix); /* RFC4861 4.6.2 */ + rp.AdvPrefixCreate = PREFIX_SRC_MANUAL; ret = rtadv_prefix_reset(zebra_if, &rp); if (!ret) { @@ -2182,29 +2257,34 @@ static int rtadv_config_write(struct vty *vty, struct interface *ifp) vty_out(vty, " ipv6 nd mtu %d\n", zif->rtadv.AdvLinkMTU); for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvPrefixList, node, rprefix)) { - vty_out(vty, " ipv6 nd prefix %s", - prefix2str(&rprefix->prefix, buf, sizeof(buf))); - if ((rprefix->AdvValidLifetime != RTADV_VALID_LIFETIME) - || (rprefix->AdvPreferredLifetime - != RTADV_PREFERRED_LIFETIME)) { - if (rprefix->AdvValidLifetime == UINT32_MAX) - vty_out(vty, " infinite"); - else - vty_out(vty, " %u", rprefix->AdvValidLifetime); - if (rprefix->AdvPreferredLifetime == UINT32_MAX) - vty_out(vty, " infinite"); - else - vty_out(vty, " %u", - rprefix->AdvPreferredLifetime); + if ((rprefix->AdvPrefixCreate == PREFIX_SRC_MANUAL) + || (rprefix->AdvPrefixCreate == PREFIX_SRC_BOTH)) { + vty_out(vty, " ipv6 nd prefix %s", + prefix2str(&rprefix->prefix, buf, sizeof(buf))); + if ((rprefix->AdvValidLifetime != RTADV_VALID_LIFETIME) + || (rprefix->AdvPreferredLifetime + != RTADV_PREFERRED_LIFETIME)) { + if (rprefix->AdvValidLifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", + rprefix->AdvValidLifetime); + if (rprefix->AdvPreferredLifetime == UINT32_MAX) + vty_out(vty, " infinite"); + else + vty_out(vty, " %u", + rprefix->AdvPreferredLifetime); + } + if (!rprefix->AdvOnLinkFlag) + vty_out(vty, " off-link"); + if (!rprefix->AdvAutonomousFlag) + vty_out(vty, " no-autoconfig"); + if (rprefix->AdvRouterAddressFlag) + vty_out(vty, " router-address"); + vty_out(vty, "\n"); } - if (!rprefix->AdvOnLinkFlag) - vty_out(vty, " off-link"); - if (!rprefix->AdvAutonomousFlag) - vty_out(vty, " no-autoconfig"); - if (rprefix->AdvRouterAddressFlag) - vty_out(vty, " router-address"); - vty_out(vty, "\n"); } + for (ALL_LIST_ELEMENTS_RO(zif->rtadv.AdvRDNSSList, node, rdnss)) { char buf[INET6_ADDRSTRLEN]; diff --git a/zebra/rtadv.h b/zebra/rtadv.h index 63cec94434..64b28cbfd6 100644 --- a/zebra/rtadv.h +++ b/zebra/rtadv.h @@ -37,6 +37,9 @@ struct rtadv_prefix { /* Prefix to be advertised. */ struct prefix_ipv6 prefix; + /* The prefix was manually/automatically defined. */ + int AdvPrefixCreate; + /* The value to be placed in the Valid Lifetime in the Prefix */ uint32_t AdvValidLifetime; #define RTADV_VALID_LIFETIME 2592000 @@ -133,6 +136,17 @@ struct nd_opt_dnssl { /* DNS search list option [RFC8106 5.2] */ #endif /* HAVE_RTADV */ +/* + * ipv6 nd prefixes can be manually defined, derived from the kernel interface + * configs or both. If both, manual flag/timer settings are used. + */ +enum ipv6_nd_prefix_source { + PREFIX_SRC_NONE = 0, + PREFIX_SRC_MANUAL, + PREFIX_SRC_AUTO, + PREFIX_SRC_BOTH, +}; + typedef enum { RA_ENABLE = 0, RA_SUPPRESS, @@ -145,6 +159,8 @@ extern void rtadv_stop_ra_all(void); extern void rtadv_cmd_init(void); extern void zebra_interface_radv_disable(ZAPI_HANDLER_ARGS); extern void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS); +extern void rtadv_add_prefix(struct zebra_if *zif, const struct prefix_ipv6 *p); +extern void rtadv_delete_prefix(struct zebra_if *zif, const struct prefix *p); #ifdef __cplusplus } diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 02b8cb9723..bed49faa54 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -1823,17 +1823,17 @@ static void zread_mpls_labels_add(ZAPI_HANDLER_ARGS) return; for (int i = 0; i < zl.nexthop_num; i++) { - struct zapi_nexthop_label *znh; + struct zapi_nexthop *znh; znh = &zl.nexthops[i]; - mpls_lsp_install(zvrf, zl.type, zl.local_label, 1, &znh->label, - znh->type, &znh->address, znh->ifindex); + mpls_lsp_install(zvrf, zl.type, zl.local_label, 1, znh->labels, + znh->type, &znh->gate, znh->ifindex); if (CHECK_FLAG(zl.message, ZAPI_LABELS_FTN)) mpls_ftn_update(1, zvrf, zl.type, &zl.route.prefix, - znh->type, &znh->address, znh->ifindex, + znh->type, &znh->gate, znh->ifindex, zl.route.type, zl.route.instance, - znh->label); + znh->labels[0]); } } @@ -1866,19 +1866,20 @@ static void zread_mpls_labels_delete(ZAPI_HANDLER_ARGS) if (zl.nexthop_num > 0) { for (int i = 0; i < zl.nexthop_num; i++) { - struct zapi_nexthop_label *znh; + struct zapi_nexthop *znh; znh = &zl.nexthops[i]; mpls_lsp_uninstall(zvrf, zl.type, zl.local_label, - znh->type, &znh->address, + znh->type, &znh->gate, znh->ifindex); if (CHECK_FLAG(zl.message, ZAPI_LABELS_FTN)) mpls_ftn_update(0, zvrf, zl.type, &zl.route.prefix, znh->type, - &znh->address, znh->ifindex, + &znh->gate, znh->ifindex, zl.route.type, - zl.route.instance, znh->label); + zl.route.instance, + znh->labels[0]); } } else { mpls_lsp_uninstall_all_vrf(zvrf, zl.type, zl.local_label); @@ -1924,17 +1925,18 @@ static void zread_mpls_labels_replace(ZAPI_HANDLER_ARGS) zl.route.type, zl.route.instance); for (int i = 0; i < zl.nexthop_num; i++) { - struct zapi_nexthop_label *znh; + struct zapi_nexthop *znh; znh = &zl.nexthops[i]; - mpls_lsp_install(zvrf, zl.type, zl.local_label, 1, &znh->label, - znh->type, &znh->address, znh->ifindex); + mpls_lsp_install(zvrf, zl.type, zl.local_label, + 1, znh->labels, znh->type, + &znh->gate, znh->ifindex); if (CHECK_FLAG(zl.message, ZAPI_LABELS_FTN)) { mpls_ftn_update(1, zvrf, zl.type, &zl.route.prefix, - znh->type, &znh->address, znh->ifindex, + znh->type, &znh->gate, znh->ifindex, zl.route.type, zl.route.instance, - znh->label); + znh->labels[0]); } } } |
