]> git.puffer.fish Git - matthieu/frr.git/commitdiff
bgpd: use IP address as tie breaker if the MM seq number is the same
authorAnuradha Karuppiah <anuradhak@cumulusnetworks.com>
Mon, 15 Oct 2018 15:16:51 +0000 (08:16 -0700)
committerDonald Sharp <sharpd@cumulusnetworks.com>
Wed, 31 Oct 2018 10:23:32 +0000 (06:23 -0400)
Same sequence number handling is specified by RFC 7432 -
[
If two (or more) PEs advertise the same MAC address with the same
sequence number but different Ethernet segment identifiers, a PE that
receives these routes selects the route advertised by the PE with the
lowest IP address as the best route.

If the PE is the originator of the MAC route and it receives the same
MAC address with the same sequence number that it generated, it will
compare its own IP address with the IP address of the remote PE and
will select the lowest IP.  If its own route is not the best one, it
will withdraw the route.
]

To implement that specification this commit uses nexthop IP as a tie
breaker between two paths of equal seq number with lower IP winning.

Now if a local path already exists with the same sequence number but higher
(local-VTEP) IP it is evicted (deleted and withdrawn from the peers) and
the winning new remote path is installed in zebra. This is existing code
and handled implicitly via evpn_route_select_install.

If a local path is rxed from zebra with the same sequence as the
current remote winner it is rejected (not installed in the bgp
routing tables) and zebra is asked to re-install the older/remote winner.
This is a race condition that can only happen if bgp's add and zebra's add
cross paths. Additional handling has been added in this commit via
evpn_cleanup_local_non_best_route to take care of the race condition.

Ticket: CM-22674
Reviewed By: CCR-7937

Signed-off-by: Anuradha Karuppiah <anuradhak@cumulusnetworks.com>
bgpd/bgp_evpn.c
bgpd/bgp_route.c

index 945094a671eba9ccadd8bb10ca9839d2f7c50be0..961e3b84dee535599caa2751a6bc0f0c4b833f3f 100644 (file)
@@ -1685,6 +1685,65 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
        return route_change;
 }
 
+/*
+ * If the local route was not selected evict it and tell zebra to re-add
+ * the best remote dest.
+ *
+ * Typically a local path added by zebra is expected to be selected as
+ * best. In which case when a remote path wins as best (later)
+ * evpn_route_select_install itself evicts the older-local-best path.
+ *
+ * However if bgp's add and zebra's add cross paths (race condition) it
+ * is possible that the local path is no longer the "older" best path.
+ * It is a path that was never designated as best and hence requires
+ * additional handling to prevent bgp from injecting and holding on to a
+ * non-best local path.
+ */
+static void evpn_cleanup_local_non_best_route(struct bgp *bgp,
+                                             struct bgpevpn *vpn,
+                                             struct bgp_node *rn,
+                                             struct bgp_path_info *local_pi,
+                                             int *route_change)
+{
+       struct bgp_path_info *tmp_pi;
+       struct bgp_path_info *curr_select = NULL;
+       uint8_t flags = 0;
+       char buf[PREFIX_STRLEN];
+
+       if (CHECK_FLAG(local_pi->flags, BGP_PATH_SELECTED)) {
+               /* local path is the winner; no additional cleanup needed */
+               return;
+       }
+
+       /* local path was not picked as the winner; kick it out */
+       if (bgp_debug_zebra(NULL)) {
+               zlog_debug("evicting local evpn prefix %s as remote won",
+                                       prefix2str(&rn->p, buf, sizeof(buf)));
+       }
+       *route_change = 0;
+       evpn_delete_old_local_route(bgp, vpn, rn, local_pi);
+       bgp_path_info_reap(rn, local_pi);
+
+       /* tell zebra to re-add the best remote path */
+       for (tmp_pi = rn->info; tmp_pi; tmp_pi = tmp_pi->next) {
+               if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_SELECTED)) {
+                       curr_select = tmp_pi;
+                       break;
+               }
+       }
+       if (curr_select &&
+           curr_select->type == ZEBRA_ROUTE_BGP
+           && curr_select->sub_type == BGP_ROUTE_IMPORTED) {
+               if (curr_select->attr->sticky)
+                       SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY);
+               if (curr_select->attr->default_gw)
+                       SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW);
+               evpn_zebra_install(bgp, vpn, (struct prefix_evpn *)&rn->p,
+                                  curr_select->attr->nexthop, flags,
+                                  mac_mobility_seqnum(curr_select->attr));
+       }
+}
+
 /*
  * Create or update EVPN route (of type based on prefix) for specified VNI
  * and schedule for processing.
@@ -1747,10 +1806,19 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
        assert(pi);
        attr_new = pi->attr;
 
+       /* lock ri to prevent freeing in evpn_route_select_install */
+       bgp_path_info_lock(pi);
        /* Perform route selection; this is just to set the flags correctly
         * as local route in the VNI always wins.
         */
        evpn_route_select_install(bgp, vpn, rn);
+       /*
+        * if the new local route was not selected evict it and tell zebra
+        * to add the best remote dest
+        */
+       evpn_cleanup_local_non_best_route(bgp, vpn, rn, pi, &route_change);
+       bgp_path_info_unlock(pi);
+
        bgp_unlock_node(rn);
 
        /* If this is a new route or some attribute has changed, export the
index c20b404f19530fc08e967ef346a1ffc537965cd0..4c408ff9dd5599b87f095abbfde4ab4f01742726 100644 (file)
@@ -453,6 +453,7 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
        char exist_buf[PATH_ADDPATH_STR_BUFFER];
        uint32_t new_mm_seq;
        uint32_t exist_mm_seq;
+       int nh_cmp;
 
        *paths_eq = 0;
 
@@ -545,6 +546,28 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
                                        exist_mm_seq);
                        return 0;
                }
+
+               /*
+                * if sequence numbers are the same path with the lowest IP
+                * wins
+                */
+               nh_cmp = bgp_path_info_nexthop_cmp(new, exist);
+               if (nh_cmp < 0) {
+                       if (debug)
+                               zlog_debug(
+                                       "%s: %s wins over %s due to same MM seq %u and lower IP %s",
+                                       pfx_buf, new_buf, exist_buf, new_mm_seq,
+                                       inet_ntoa(new->attr->nexthop));
+                       return 1;
+               }
+               if (nh_cmp > 0) {
+                       if (debug)
+                               zlog_debug(
+                                       "%s: %s loses to %s due to same MM seq %u and higher IP %s",
+                                       pfx_buf, new_buf, exist_buf, new_mm_seq,
+                                       inet_ntoa(new->attr->nexthop));
+                       return 0;
+               }
        }
 
        /* 1. Weight check. */