From 8d4665aabfba6dc2da854d6cb5cd439930c1ea76 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 21 Oct 2022 07:20:44 -0400 Subject: [PATCH] zebra: Fix handling of recursive routes when processing closely in time When zebra receives routes from upper level protocols it decodes the zapi message and places the routes on the metaQ for processing. Suppose we have a route A that is already installed by some routing protocol. And there is a route B that has a nexthop that will be recursively resolved through A. Imagine if a route replace operation for A is going to happen from an upper level protocol at about the same time the route B is going to be installed into zebra. If these routes are received, and decoded, at about the same time there exists a chance that the metaQ will contain both of them at the same time. If the order of installation is [ B, A ]. B will be resolved correctly through A and installed, A will be processed and re-installed into the FIB. If the nexthops have changed for A then the owner of B should be notified about the change( and B can do the correct action here and decide to withdraw or re-install ). Now imagine if the order of routes received for processing on the metaQ is [ A, B ]. A will be received, processed and sent to the dataplane for reinstall. B will then be pulled off the metaQ and fail the install since A is in a `not Installed` state. Let's loosen the restriction in nexthop resolution for B such that if the route we are dependent on is a route replace operation allow the resolution to suceed. This requires zebra to track a new route state( ROUTE_ENTRY_ROUTE_REPLACING ) that can be looked at during nexthop resolution. I believe this is ok because A is a route replace operation, which could result in this: -route install failed, in which case B should be nht'ing and will receive the nht failure and the upper level protocol should remove B. -route install succeeded, no nexthop changes. In this case allowing the resolution for B is ok, NHT will not notify the upper level protocol so no action is needed. -route install succeeded, nexthops changes. In this case allowing the resolution for B is ok, NHT will notify the upper level protocol and it can decide to reinstall B or not based upon it's own algorithm. This set of events was found by the bgp_distance_change topotest(s). Effectively the tests were looking for the bug ( A, B order in the metaQ ) as the `correct` state. When under very heavy load, the A, B ordering caused A to just be installed and fully resolved in the dataplane before B is gotten to( which is entirely possible ). Signed-off-by: Donald Sharp --- tests/topotests/lib/bgp.py | 2 +- zebra/rib.h | 7 +++++++ zebra/zebra_nhg.c | 28 +++++++++++++++++++++++++--- zebra/zebra_rib.c | 17 ++++++++++++++--- 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/tests/topotests/lib/bgp.py b/tests/topotests/lib/bgp.py index 218f2dbe2b..a09b0b8b2e 100644 --- a/tests/topotests/lib/bgp.py +++ b/tests/topotests/lib/bgp.py @@ -2797,7 +2797,7 @@ def verify_best_path_as_per_admin_distance( if route in rib_routes_json: st_found = True # Verify next_hop in rib_routes_json - if rib_routes_json[route][0]["nexthops"][0]["ip"] == _next_hop: + if [nh for nh in rib_routes_json[route][0]["nexthops"] if nh['ip'] == _next_hop]: nh_found = True else: errormsg = ( diff --git a/zebra/rib.h b/zebra/rib.h index 99f52bcd4e..166500fa5c 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -158,6 +158,13 @@ struct route_entry { * differs from the rib/normal set of nexthops. */ #define ROUTE_ENTRY_USE_FIB_NHG 0x40 +/* + * Route entries that are going to the dplane for a Route Replace + * let's note the fact that this is happening. This will + * be useful when zebra is determing if a route can be + * used for nexthops + */ +#define ROUTE_ENTRY_ROUTE_REPLACING 0x80 /* Sequence value incremented for each dataplane operation */ uint32_t dplane_sequence; diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index e50023201b..ded2bd04bb 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -2369,11 +2369,33 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, resolved = 0; - /* Only useful if installed */ - if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) { + /* + * Only useful if installed or being Route Replacing + * Why Being Route Replaced as well? + * Imagine a route A and route B( that depends on A ) + * for recursive resolution and A already exists in the + * zebra rib. If zebra receives the routes + * for resolution at aproximately the same time in the [ + * B, A ] order on the workQ. If this happens then + * normal route resolution will happen and B will be + * resolved successfully and then A will be resolved + * successfully. Now imagine the reversed order [A, B]. + * A will be resolved and then scheduled for installed + * (Thus not having the ROUTE_ENTRY_INSTALLED flag ). B + * will then get resolved and fail to be installed + * because the original below test. Let's `loosen` this + * up a tiny bit and allow the + * ROUTE_ENTRY_ROUTE_REPLACING flag ( that is set when a + * Route Replace operation is being initiated on A now ) + * to now satisfy this situation. This will allow + * either order in the workQ to work properly. + */ + if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED) && + !CHECK_FLAG(match->status, + ROUTE_ENTRY_ROUTE_REPLACING)) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug( - "%s: match %p (%pNG) not installed", + "%s: match %p (%pNG) not installed or being Route Replaced", __func__, match, match->nhe); goto done_with_match; diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index fceaaaa9f0..54ef4768eb 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -292,13 +292,16 @@ static char *_dump_re_status(const struct route_entry *re, char *buf, } snprintfrr( - buf, len, "%s%s%s%s%s%s%s", + buf, len, "%s%s%s%s%s%s%s%s", CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED) ? "Removed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED) ? "Changed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED) ? "Label Changed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED) ? "Queued " : "", + CHECK_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING) + ? "Replacing" + : "", CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) ? "Installed " : "", CHECK_FLAG(re->status, ROUTE_ENTRY_FAILED) ? "Failed " : "", @@ -713,6 +716,7 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re, if (old) { SET_FLAG(old->status, ROUTE_ENTRY_QUEUED); + SET_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING); /* Free old FIB nexthop group */ UNSET_FLAG(old->status, ROUTE_ENTRY_USE_FIB_NHG); @@ -1538,6 +1542,7 @@ static void zebra_rib_fixup_system(struct route_node *rn) SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); + UNSET_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING); for (ALL_NEXTHOPS(re->nhe->nhg, nhop)) { if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE)) @@ -1995,8 +2000,12 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) } else { if (!zrouter.asic_offloaded || (CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED) || - CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOAD_FAILED))) + CHECK_FLAG(re->flags, + ZEBRA_FLAG_OFFLOAD_FAILED))) { + UNSET_FLAG(re->status, + ROUTE_ENTRY_ROUTE_REPLACING); UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); + } } } @@ -2252,8 +2261,10 @@ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx) } /* Ensure we clear the QUEUED flag */ - if (!zrouter.asic_offloaded) + if (!zrouter.asic_offloaded) { UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED); + UNSET_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING); + } /* Is this a notification that ... matters? We mostly care about * the route that is currently selected for installation; we may also -- 2.39.5