]> git.puffer.fish Git - matthieu/frr.git/commitdiff
zebra: GR code could potentially stop running
authorDonald Sharp <sharpd@nvidia.com>
Fri, 24 Mar 2023 18:29:26 +0000 (14:29 -0400)
committerDonald Sharp <sharpd@nvidia.com>
Wed, 29 Mar 2023 11:48:42 +0000 (07:48 -0400)
When GR is running and attempting to clear up a node
if the node that is currently saved and we are coming
back to happens to be deleted during the time zebra
suspends the GR code due to hitting the node limit
then zebra GR code will just completely stop processing
and potentially leave stale nodes around forever.

Let's just remove this hole and process what we can.
Can you imagine trying to debug this after the fact?

If we remove a node then that counts toward the maximum
to process of ZEBRA_MAX_STALE_ROUTE_COUNT.  This should
prevent any non-processing with a slightly larger cost
of having to look at a few nodes repeatedly

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
zebra/zebra_gr.c
zebra/zserv.c
zebra/zserv.h

index daebfa8c57c627b538055166ee9cc85a9b59497d..13e5e9c4a61434929f305da022ec0964ec2c3a68 100644 (file)
@@ -118,8 +118,6 @@ static void zebra_gr_client_info_delte(struct zserv *client,
 
        EVENT_OFF(info->t_stale_removal);
 
-       XFREE(MTYPE_ZEBRA_GR, info->current_prefix);
-
        LOG_GR("%s: Instance info is being deleted for client %s vrf %s(%u)",
               __func__, zebra_route_string(client->proto), VRF_LOGNAME(vrf),
               info->vrf_id);
@@ -489,7 +487,6 @@ static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread)
                       __func__, zebra_route_string(client->proto),
                       VRF_LOGNAME(vrf), info->vrf_id);
 
-               XFREE(MTYPE_ZEBRA_GR, info->current_prefix);
                info->current_afi = 0;
                zebra_gr_delete_stale_client(info);
        }
@@ -499,14 +496,13 @@ static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread)
 /*
  * Function to process to check if route entry is stale
  * or has been updated.
+ *
+ * Returns true when a node is deleted else false
  */
-static void zebra_gr_process_route_entry(struct zserv *client,
+static bool zebra_gr_process_route_entry(struct zserv *client,
                                         struct route_node *rn,
                                         struct route_entry *re)
 {
-       if ((client == NULL) || (rn == NULL) || (re == NULL))
-               return;
-
        /* If the route is not refreshed after restart, delete the entry */
        if (re->uptime < client->restart_time) {
                if (IS_ZEBRA_DEBUG_RIB)
@@ -514,7 +510,11 @@ static void zebra_gr_process_route_entry(struct zserv *client,
                                   __func__, zebra_route_string(client->proto),
                                   &rn->p);
                rib_delnode(rn, re);
+
+               return true;
        }
+
+       return false;
 }
 
 /*
@@ -525,7 +525,7 @@ static void zebra_gr_process_route_entry(struct zserv *client,
 static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info,
                                           struct zebra_vrf *zvrf)
 {
-       struct route_node *rn, *curr;
+       struct route_node *rn;
        struct route_entry *re;
        struct route_entry *next;
        struct route_table *table;
@@ -559,21 +559,7 @@ static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info,
                if (!table)
                        continue;
 
-               /*
-                * If the current prefix is NULL then get the first
-                * route entry in the table
-                */
-               if (info->current_prefix == NULL) {
-                       rn = route_top(table);
-                       if (rn == NULL)
-                               continue;
-                       curr = rn;
-               } else
-                       /* Get the next route entry */
-                       curr = route_table_get_next(table,
-                                                   info->current_prefix);
-
-               for (rn = curr; rn; rn = srcdest_route_next(rn)) {
+               for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) {
                        RNODE_FOREACH_RE_SAFE (rn, re, next) {
                                if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
                                        continue;
@@ -582,11 +568,10 @@ static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info,
                                 * the route
                                 */
                                if (re->type == proto &&
-                                   re->instance == instance) {
-                                       zebra_gr_process_route_entry(s_client,
-                                                                    rn, re);
+                                   re->instance == instance &&
+                                   zebra_gr_process_route_entry(s_client, rn,
+                                                                re))
                                        n++;
-                               }
 
                                /* If the max route count is reached
                                 * then timer thread will be restarted
@@ -595,21 +580,10 @@ static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info,
                                if ((n >= ZEBRA_MAX_STALE_ROUTE_COUNT) &&
                                    (info->do_delete == false)) {
                                        info->current_afi = afi;
-                                       info->current_prefix =
-                                               XCALLOC(MTYPE_ZEBRA_GR,
-                                                       sizeof(struct prefix));
-                                       prefix_copy(info->current_prefix,
-                                                   &rn->p);
                                        return n;
                                }
                        }
                }
-
-               /*
-                * Reset the current prefix to indicate processing completion
-                * of the current AFI
-                */
-               XFREE(MTYPE_ZEBRA_GR, info->current_prefix);
        }
        return 0;
 }
index 70707866ee286092a81865de8881c8e1527ea4fc..10227d17c1622906da2aa826f2ac58781725695c 100644 (file)
@@ -1153,9 +1153,6 @@ static void zebra_show_stale_client_detail(struct vty *vty,
                                }
                        }
                        vty_out(vty, "Current AFI : %d\n", info->current_afi);
-                       if (info->current_prefix)
-                               vty_out(vty, "Current prefix : %pFX\n",
-                                       info->current_prefix);
                }
        }
        vty_out(vty, "\n");
index aa58a3a29948c5942c78416caa833314c3712098..9da68612111c14d354e04f94057c62094d29def3 100644 (file)
@@ -68,7 +68,6 @@ struct client_gr_info {
        bool route_sync[AFI_MAX][SAFI_MAX];
 
        /* Book keeping */
-       struct prefix *current_prefix;
        void *stale_client_ptr;
        struct event *t_stale_removal;