]> git.puffer.fish Git - matthieu/frr.git/commitdiff
zebra: Allow longer prefix matches for nexthops
authorDonald Sharp <sharpd@nvidia.com>
Thu, 19 Oct 2023 20:38:12 +0000 (16:38 -0400)
committerDonald Sharp <sharpd@nvidia.com>
Mon, 23 Oct 2023 12:15:11 +0000 (08:15 -0400)
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix.  This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route.  For example if zebra currently has a covering prefix
say a 10.0.0.0/8.  And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1.  Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.

Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ).  If we find a usable route for the nexthop accept
it and use it.

The bgp_default_originate topology test is frequently failing
with this exact problem:

B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B   1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B   192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B   198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32

Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ).  When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
tests/topotests/all_protocol_startup/test_all_protocol_startup.py
zebra/zebra_nhg.c

index c319477c8ae3653b5c244884ba51be59aeb95e35..ca340749feb78e720d4006a7640e9c002abd43e4 100644 (file)
@@ -599,16 +599,16 @@ def test_nexthop_groups():
     count = 0
     dups = []
     nhg_id = route_get_nhg_id("6.6.6.1/32")
-    while (len(dups) != 3) and count < 10:
+    while (len(dups) != 4) and count < 10:
         output = net["r1"].cmd('vtysh -c "show nexthop-group rib %d"' % nhg_id)
 
         dups = re.findall(r"(via 1\.1\.1\.1)", output)
-        if len(dups) != 3:
+        if len(dups) != 4:
             count += 1
             sleep(1)
 
     # Should find 3, itself is inactive
-    assert len(dups) == 3, (
+    assert len(dups) == 4, (
         "Route 6.6.6.1/32 with Nexthop Group ID=%d has wrong number of resolved nexthops"
         % nhg_id
     )
index 6517b7830bb08a7d499164b632c520c900b9deaf..19e2657f1df965b756d817c0b7e3112c9eab609b 100644 (file)
@@ -2248,20 +2248,6 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
                return 1;
        }
 
-       if (top &&
-           ((top->family == AF_INET && top->prefixlen == IPV4_MAX_BITLEN &&
-             nexthop->gate.ipv4.s_addr == top->u.prefix4.s_addr) ||
-            (top->family == AF_INET6 && top->prefixlen == IPV6_MAX_BITLEN &&
-             memcmp(&nexthop->gate.ipv6, &top->u.prefix6, IPV6_MAX_BYTELEN) ==
-                     0)) &&
-           nexthop->vrf_id == vrf_id) {
-               if (IS_ZEBRA_DEBUG_RIB_DETAILED)
-                       zlog_debug(
-                               "        :%s: Attempting to install a max prefixlength route through itself",
-                               __func__);
-               return 0;
-       }
-
        /* Validation for ipv4 mapped ipv6 nexthop. */
        if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) {
                afi = AFI_IP;
@@ -2364,7 +2350,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
                                        zlog_debug(
                                                "        %s: Matched against ourself and prefix length is not max bit length",
                                                __func__);
-                               return 0;
+                               goto continue_up_tree;
                        }
 
                /* Pick up selected route. */
@@ -2391,20 +2377,12 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
                /* If there is no selected route or matched route is EGP, go up
                 * tree.
                 */
-               if (!match) {
-                       do {
-                               rn = rn->parent;
-                       } while (rn && rn->info == NULL);
-                       if (rn)
-                               route_lock_node(rn);
-                       continue;
-               }
 
                /* If the candidate match's type is considered "connected",
                 * we consider it first.
                 */
-               if (RIB_CONNECTED_ROUTE(match) ||
-                   (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type))) {
+               if (match && (RIB_CONNECTED_ROUTE(match) ||
+                             (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type)))) {
                        match = zebra_nhg_connected_ifindex(rn, match,
                                                            nexthop->ifindex);
 
@@ -2420,11 +2398,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
                                        zlog_debug(
                                                "%s: %pNHv given ifindex does not match nexthops ifindex found: %pNHv",
                                                __func__, nexthop, newhop);
-                               /*
-                                * NEXTHOP_TYPE_*_IFINDEX but ifindex
-                                * doesn't match what we found.
-                                */
-                               return 0;
+                               goto continue_up_tree;
                        }
 
                        /* NHRP special case: need to indicate onlink */
@@ -2437,7 +2411,7 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
                                        __func__, match, match->nhe, newhop);
 
                        return 1;
-               } else if (CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
+               } else if (match && CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
                        struct nexthop_group *nhg;
                        struct nexthop *resolver;
                        struct backup_nh_map_s map = {};
@@ -2473,6 +2447,10 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
                                                "%s: match %p (%pNG) not installed or being Route Replaced",
                                                __func__, match, match->nhe);
 
+                               if (CHECK_FLAG(match->status,
+                                              ROUTE_ENTRY_QUEUED))
+                                       goto continue_up_tree;
+
                                goto done_with_match;
                        }
 
@@ -2541,25 +2519,37 @@ done_with_match:
                                if (pmtu)
                                        *pmtu = match->mtu;
 
-                       } else if (IS_ZEBRA_DEBUG_RIB_DETAILED)
-                               zlog_debug(
-                                       "        %s: Recursion failed to find",
-                                       __func__);
-
-                       return resolved;
-               } else {
-                       if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
-                               zlog_debug(
-                                       "        %s: Route Type %s has not turned on recursion",
-                                       __func__, zebra_route_string(type));
-                               if (type == ZEBRA_ROUTE_BGP
-                                   && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP))
+                       } else {
+                               if (IS_ZEBRA_DEBUG_RIB_DETAILED)
                                        zlog_debug(
-                                               "        EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
+                                               "        %s: Recursion failed to find while looking at %pRN",
+                                               __func__, rn);
+                               goto continue_up_tree;
                        }
-                       return 0;
+
+                       return 1;
+               } else if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
+                       zlog_debug(
+                               "        %s: Route Type %s has not turned on recursion %pRN failed to match",
+                               __func__, zebra_route_string(type), rn);
+                       if (type == ZEBRA_ROUTE_BGP
+                           && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP))
+                               zlog_debug(
+                                       "        EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
                }
+
+       continue_up_tree:
+               /*
+                * If there is no selected route or matched route is EGP, go up
+                * tree.
+                */
+               do {
+                       rn = rn->parent;
+               } while (rn && rn->info == NULL);
+               if (rn)
+                       route_lock_node(rn);
        }
+
        if (IS_ZEBRA_DEBUG_RIB_DETAILED)
                zlog_debug("        %s: Nexthop did not lookup in table",
                           __func__);