From: Anuradha Karuppiah Date: Mon, 15 Oct 2018 15:16:51 +0000 (-0700) Subject: bgpd: use IP address as tie breaker if the MM seq number is the same X-Git-Tag: frr-7.1-dev~218^2~4 X-Git-Url: https://git.puffer.fish/?a=commitdiff_plain;h=6d8c603a930f229aac931aac2f556f79f4b7b437;p=matthieu%2Ffrr.git bgpd: use IP address as tie breaker if the MM seq number is the same Same sequence number handling is specified by RFC 7432 - [ If two (or more) PEs advertise the same MAC address with the same sequence number but different Ethernet segment identifiers, a PE that receives these routes selects the route advertised by the PE with the lowest IP address as the best route. If the PE is the originator of the MAC route and it receives the same MAC address with the same sequence number that it generated, it will compare its own IP address with the IP address of the remote PE and will select the lowest IP. If its own route is not the best one, it will withdraw the route. ] To implement that specification this commit uses nexthop IP as a tie breaker between two paths of equal seq number with lower IP winning. Now if a local path already exists with the same sequence number but higher (local-VTEP) IP it is evicted (deleted and withdrawn from the peers) and the winning new remote path is installed in zebra. This is existing code and handled implicitly via evpn_route_select_install. If a local path is rxed from zebra with the same sequence as the current remote winner it is rejected (not installed in the bgp routing tables) and zebra is asked to re-install the older/remote winner. This is a race condition that can only happen if bgp's add and zebra's add cross paths. Additional handling has been added in this commit via evpn_cleanup_local_non_best_route to take care of the race condition. Ticket: CM-22674 Reviewed By: CCR-7937 Signed-off-by: Anuradha Karuppiah --- diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 945094a671..961e3b84de 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -1685,6 +1685,65 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, return route_change; } +/* + * If the local route was not selected evict it and tell zebra to re-add + * the best remote dest. + * + * Typically a local path added by zebra is expected to be selected as + * best. In which case when a remote path wins as best (later) + * evpn_route_select_install itself evicts the older-local-best path. + * + * However if bgp's add and zebra's add cross paths (race condition) it + * is possible that the local path is no longer the "older" best path. + * It is a path that was never designated as best and hence requires + * additional handling to prevent bgp from injecting and holding on to a + * non-best local path. + */ +static void evpn_cleanup_local_non_best_route(struct bgp *bgp, + struct bgpevpn *vpn, + struct bgp_node *rn, + struct bgp_path_info *local_pi, + int *route_change) +{ + struct bgp_path_info *tmp_pi; + struct bgp_path_info *curr_select = NULL; + uint8_t flags = 0; + char buf[PREFIX_STRLEN]; + + if (CHECK_FLAG(local_pi->flags, BGP_PATH_SELECTED)) { + /* local path is the winner; no additional cleanup needed */ + return; + } + + /* local path was not picked as the winner; kick it out */ + if (bgp_debug_zebra(NULL)) { + zlog_debug("evicting local evpn prefix %s as remote won", + prefix2str(&rn->p, buf, sizeof(buf))); + } + *route_change = 0; + evpn_delete_old_local_route(bgp, vpn, rn, local_pi); + bgp_path_info_reap(rn, local_pi); + + /* tell zebra to re-add the best remote path */ + for (tmp_pi = rn->info; tmp_pi; tmp_pi = tmp_pi->next) { + if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_SELECTED)) { + curr_select = tmp_pi; + break; + } + } + if (curr_select && + curr_select->type == ZEBRA_ROUTE_BGP + && curr_select->sub_type == BGP_ROUTE_IMPORTED) { + if (curr_select->attr->sticky) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); + if (curr_select->attr->default_gw) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + evpn_zebra_install(bgp, vpn, (struct prefix_evpn *)&rn->p, + curr_select->attr->nexthop, flags, + mac_mobility_seqnum(curr_select->attr)); + } +} + /* * Create or update EVPN route (of type based on prefix) for specified VNI * and schedule for processing. @@ -1747,10 +1806,19 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, assert(pi); attr_new = pi->attr; + /* lock ri to prevent freeing in evpn_route_select_install */ + bgp_path_info_lock(pi); /* Perform route selection; this is just to set the flags correctly * as local route in the VNI always wins. */ evpn_route_select_install(bgp, vpn, rn); + /* + * if the new local route was not selected evict it and tell zebra + * to add the best remote dest + */ + evpn_cleanup_local_non_best_route(bgp, vpn, rn, pi, &route_change); + bgp_path_info_unlock(pi); + bgp_unlock_node(rn); /* If this is a new route or some attribute has changed, export the diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index c20b404f19..4c408ff9dd 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -453,6 +453,7 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, char exist_buf[PATH_ADDPATH_STR_BUFFER]; uint32_t new_mm_seq; uint32_t exist_mm_seq; + int nh_cmp; *paths_eq = 0; @@ -545,6 +546,28 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, exist_mm_seq); return 0; } + + /* + * if sequence numbers are the same path with the lowest IP + * wins + */ + nh_cmp = bgp_path_info_nexthop_cmp(new, exist); + if (nh_cmp < 0) { + if (debug) + zlog_debug( + "%s: %s wins over %s due to same MM seq %u and lower IP %s", + pfx_buf, new_buf, exist_buf, new_mm_seq, + inet_ntoa(new->attr->nexthop)); + return 1; + } + if (nh_cmp > 0) { + if (debug) + zlog_debug( + "%s: %s loses to %s due to same MM seq %u and higher IP %s", + pfx_buf, new_buf, exist_buf, new_mm_seq, + inet_ntoa(new->attr->nexthop)); + return 0; + } } /* 1. Weight check. */