From 81322b96b0b1b31a89ce8e292814ed13c2cee9eb Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 29 Mar 2023 15:27:09 -0400 Subject: [PATCH] zebra: Ensure gr events run after Meta Queue has run BGP signals to zebra that a afi has converged immediately after it has finished processing all routes for a given afi/safi. This generates events in zebra in this order a) Routes received from BGP, placed on early-rib Meta-Q b) Signal GR for the afi. Now imagine that zebra reads GR code and immediately processes routes that are in the actual rib and removes some routes. This generates a c) route deletion to the kernel for some number of routes that may be in the the early-rib Meta-Q d) Process the Meta-Q, and re-install the routes This is undesirable behavior in zebra. In that while we may end up in a correct state, there will be a blip for some number of routes that happen to be in the early rib Meta-Q. Modify the GR code to have it's own processing entry at the end of the Meta-Q. This will allow all routes to be processed and ready for handling by the Graceful Restart code. Signed-off-by: Donald Sharp --- zebra/rib.h | 8 ++++- zebra/zebra_gr.c | 78 +++++++++++++++++++++++++---------------------- zebra/zebra_rib.c | 72 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 120 insertions(+), 38 deletions(-) diff --git a/zebra/rib.h b/zebra/rib.h index 0e897881e4..91da469e2e 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -180,7 +180,7 @@ struct route_entry { * sub-queue 9: any other origin (if any) typically those that * don't generate routes */ -#define MQ_SIZE 10 +#define MQ_SIZE 11 struct meta_queue { struct list *subq[MQ_SIZE]; uint32_t size; /* sum of lengths of all subqueues */ @@ -601,6 +601,12 @@ static inline struct nexthop_group *rib_get_fib_backup_nhg( return &(re->fib_backup_ng); } +extern void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto, + uint8_t instance); + +extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto, + uint8_t instance); + extern void zebra_vty_init(void); extern pid_t pid; diff --git a/zebra/zebra_gr.c b/zebra/zebra_gr.c index b74150b8a7..40efbf5db4 100644 --- a/zebra/zebra_gr.c +++ b/zebra/zebra_gr.c @@ -400,26 +400,18 @@ void zread_client_capabilities(ZAPI_HANDLER_ARGS) __func__, zebra_route_string(client->proto), api.afi, api.safi); return; - } else { - struct zebra_gr_afi_clean *gac; - - LOG_GR("%s: Client %s vrf %s(%u) route update complete for AFI %d, SAFI %d", - __func__, zebra_route_string(client->proto), - VRF_LOGNAME(vrf), info->vrf_id, api.afi, - api.safi); - info->route_sync[api.afi] = true; - - gac = XCALLOC(MTYPE_ZEBRA_GR, sizeof(*gac)); - - gac->info = info; - gac->afi = api.afi; - gac->proto = client->proto; - gac->instance = client->instance; - - event_add_event(zrouter.master, - zebra_gr_delete_stale_route_table_afi, - gac, 0, &gac->t_gac); } + + LOG_GR("%s: Client %s vrf %s(%u) route update complete for AFI %d, SAFI %d", + __func__, zebra_route_string(client->proto), + VRF_LOGNAME(vrf), info->vrf_id, api.afi, api.safi); + info->route_sync[api.afi] = true; + + /* + * Schedule for after anything already in the meta Q + */ + rib_add_gr_run(api.afi, api.vrf_id, client->proto, + client->instance); zebra_gr_process_client_stale_routes(client, info); break; case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING: @@ -552,7 +544,6 @@ static void zebra_gr_delete_stale_route_table_afi(struct event *event) done: XFREE(MTYPE_ZEBRA_GR, gac); - return; } /* @@ -583,22 +574,12 @@ static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info, /* Process routes for all AFI */ for (afi = AFI_IP; afi < AFI_MAX; afi++) { - struct zebra_gr_afi_clean *gac = - XCALLOC(MTYPE_ZEBRA_GR, sizeof(*gac)); - - gac->info = info; - gac->afi = afi; - gac->proto = proto; - gac->instance = instance; - - if (info->do_delete) - event_execute(zrouter.master, - zebra_gr_delete_stale_route_table_afi, - gac, 0); - else - event_add_event(zrouter.master, - zebra_gr_delete_stale_route_table_afi, - gac, 0, &gac->t_gac); + + /* + * Schedule for immediately after anything in the + * meta-Q + */ + rib_add_gr_run(afi, info->vrf_id, proto, instance); } return 0; } @@ -662,3 +643,28 @@ static void zebra_gr_process_client_stale_routes(struct zserv *client, EVENT_OFF(info->t_stale_removal); } } + +void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto, + uint8_t instance) +{ + struct zserv *client = zserv_find_client(proto, instance); + struct client_gr_info *info = NULL; + struct zebra_gr_afi_clean *gac; + + TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) { + if (info->vrf_id == vrf_id) + break; + } + + if (info == NULL) + return; + + gac = XCALLOC(MTYPE_ZEBRA_GR, sizeof(*gac)); + gac->info = info; + gac->afi = afi; + gac->proto = proto; + gac->instance = instance; + + event_add_event(zrouter.master, zebra_gr_delete_stale_route_table_afi, + gac, 0, &gac->t_gac); +} diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 5a86b7d0a5..1635dc31ae 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -64,7 +64,12 @@ DEFINE_HOOK(rib_update, (struct route_node * rn, const char *reason), DEFINE_HOOK(rib_shutdown, (struct route_node * rn), (rn)); -/* Meta Q's specific names */ +/* + * Meta Q's specific names + * + * If you add something here ensure that you + * change MQ_SIZE as well over in rib.h + */ enum meta_queue_indexes { META_QUEUE_NHG, META_QUEUE_EVPN, @@ -76,6 +81,7 @@ enum meta_queue_indexes { META_QUEUE_NOTBGP, META_QUEUE_BGP, META_QUEUE_OTHER, + META_QUEUE_GR_RUN, }; /* Each route type's string and default distance value. */ @@ -250,6 +256,8 @@ static const char *subqueue2str(enum meta_queue_indexes index) return "BGP Routes"; case META_QUEUE_OTHER: return "Other Routes"; + case META_QUEUE_GR_RUN: + return "Graceful Restart"; } return "Unknown"; @@ -3089,6 +3097,23 @@ static void process_subq_early_route(struct listnode *lnode) process_subq_early_route_add(ere); } +struct meta_q_gr_run { + afi_t afi; + vrf_id_t vrf_id; + uint8_t proto; + uint8_t instance; +}; + +static void process_subq_gr_run(struct listnode *lnode) +{ + struct meta_q_gr_run *gr_run = listgetdata(lnode); + + zebra_gr_process_client(gr_run->afi, gr_run->vrf_id, gr_run->proto, + gr_run->instance); + + XFREE(MTYPE_WQ_WRAPPER, gr_run); +} + /* * Examine the specified subqueue; process one entry and return 1 if * there is a node, return 0 otherwise. @@ -3122,6 +3147,9 @@ static unsigned int process_subq(struct list *subq, case META_QUEUE_OTHER: process_subq_route(lnode, qindex); break; + case META_QUEUE_GR_RUN: + process_subq_gr_run(lnode); + break; } list_delete_node(subq, lnode); @@ -3727,6 +3755,20 @@ static void early_route_meta_queue_free(struct meta_queue *mq, struct list *l, } } +static void rib_meta_queue_gr_run_free(struct meta_queue *mq, struct list *l, + struct zebra_vrf *zvrf) +{ + struct meta_q_gr_run *gr_run; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS(l, node, nnode, gr_run)) { + if (zvrf && zvrf->vrf->vrf_id != gr_run->vrf_id) + continue; + + XFREE(MTYPE_WQ_WRAPPER, gr_run); + } +} + void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf) { enum meta_queue_indexes i; @@ -3754,6 +3796,9 @@ void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf) case META_QUEUE_OTHER: rib_meta_queue_free(mq, mq->subq[i], zvrf); break; + case META_QUEUE_GR_RUN: + rib_meta_queue_gr_run_free(mq, mq->subq[i], zvrf); + break; } if (!zvrf) list_delete(&mq->subq[i]); @@ -4094,6 +4139,17 @@ void _route_entry_dump(const char *func, union prefixconstptr pp, zlog_debug("%s: dump complete", straddr); } +static int rib_meta_queue_gr_run_add(struct meta_queue *mq, void *data) +{ + listnode_add(mq->subq[META_QUEUE_GR_RUN], data); + mq->size++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("Graceful Run adding"); + + return 0; +} + static int rib_meta_queue_early_route_add(struct meta_queue *mq, void *data) { struct zebra_early_route *ere = data; @@ -4110,6 +4166,20 @@ static int rib_meta_queue_early_route_add(struct meta_queue *mq, void *data) return 0; } +int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto, uint8_t instance) +{ + struct meta_q_gr_run *gr_run; + + gr_run = XCALLOC(MTYPE_WQ_WRAPPER, sizeof(*gr_run)); + + gr_run->afi = afi; + gr_run->proto = proto; + gr_run->vrf_id = vrf_id; + gr_run->instance = instance; + + return mq_add_handler(gr_run, rib_meta_queue_gr_run_add); +} + struct route_entry *zebra_rib_route_entry_new(vrf_id_t vrf_id, int type, uint8_t instance, uint32_t flags, uint32_t nhe_id, -- 2.39.5