From c6ed1cc16d083198cc5774971126aa371e653718 Mon Sep 17 00:00:00 2001 From: vivek Date: Sun, 25 Oct 2020 17:05:48 -0700 Subject: [PATCH] bgpd: Refine restarter operation - R-bit & F-bit Introduce BGP-wide flags to denote if BGP has started gracefully and GR is in progress or not. Use this for setting of the R-bit in the GR capability, and not a timer which is set for any new instance creation. Mark graceful restart is complete when the deferred path selection has been done and route sync with zebra as well as deferred EOR advertisement has been initiated. Introduce a function to check on F-bit setting rather than just base it on configuration. Subsequent commits will extend these functionalities. Signed-off-by: Vivek Venkatraman --- bgpd/bgp_fsm.c | 16 +++++++----- bgpd/bgp_main.c | 2 ++ bgpd/bgp_open.c | 54 +++++++++++++++++++-------------------- bgpd/bgp_packet.c | 2 +- bgpd/bgp_route.c | 18 ++++++++++++- bgpd/bgp_vty.c | 2 +- bgpd/bgpd.h | 65 +++++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 121 insertions(+), 38 deletions(-) diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 8e1462f24b..76624fee9e 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -2046,9 +2046,10 @@ static int bgp_start_deferral_timer(struct bgp *bgp, afi_t afi, safi_t safi, } gr_info->eor_required++; /* Send message to RIB indicating route update pending */ - if (gr_info->af_enabled[afi][safi] == false) { - gr_info->af_enabled[afi][safi] = true; - /* Send message to RIB */ + if (gr_info->af_enabled == false) { + gr_info->af_enabled = true; + gr_info->route_sync = false; + bgp->gr_route_sync_pending = true; bgp_zebra_update(bgp, afi, safi, ZEBRA_CLIENT_ROUTE_UPDATE_PENDING); } @@ -2082,7 +2083,7 @@ static int bgp_update_gr_info(struct peer *peer, afi_t afi, safi_t safi) if (BGP_PEER_GRACEFUL_RESTART_CAPABLE(peer) && BGP_PEER_RESTARTING_MODE(peer)) { /* Check if the forwarding state is preserved */ - if (CHECK_FLAG(bgp->flags, BGP_FLAG_GR_PRESERVE_FWD)) { + if (bgp_gr_is_forwarding_preserved(bgp)) { gr_info = &(bgp->gr_info[afi][safi]); ret = bgp_start_deferral_timer(bgp, afi, safi, gr_info); } @@ -2199,8 +2200,7 @@ bgp_establish(struct peer_connection *connection) } else { if (BGP_PEER_GRACEFUL_RESTART_CAPABLE(peer) && BGP_PEER_RESTARTING_MODE(peer) && - CHECK_FLAG(peer->bgp->flags, - BGP_FLAG_GR_PRESERVE_FWD)) + bgp_gr_is_forwarding_preserved(peer->bgp)) peer->bgp->gr_info[afi][safi] .eor_required++; } @@ -2702,10 +2702,14 @@ bgp_peer_inherit_global_gr_mode(struct peer *peer, switch (global_gr_mode) { case GLOBAL_HELPER: BGP_PEER_GR_HELPER_ENABLE(peer); + break; case GLOBAL_GR: BGP_PEER_GR_ENABLE(peer); + break; case GLOBAL_DISABLE: BGP_PEER_GR_DISABLE(peer); + break; + case GLOBAL_INVALID: default: zlog_err("Unexpected Global GR mode %d", global_gr_mode); } diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 97658d340b..bfedb50156 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -511,6 +511,7 @@ int main(int argc, char **argv) /* BGP master init. */ bgp_master_init(frr_init(), buffer_size, addresses); + bm->startup_time = monotime(NULL); bm->port = bgp_port; if (bgp_port == 0) bgp_option_set(BGP_OPT_NO_LISTEN); @@ -518,6 +519,7 @@ int main(int argc, char **argv) bgp_option_set(BGP_OPT_NO_FIB); if (no_zebra_flag) bgp_option_set(BGP_OPT_NO_ZEBRA); + SET_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART); bgp_error_init(); /* Initializations. */ libagentx_init(); diff --git a/bgpd/bgp_open.c b/bgpd/bgp_open.c index 248d478fe1..b516701b80 100644 --- a/bgpd/bgp_open.c +++ b/bgpd/bgp_open.c @@ -1587,15 +1587,12 @@ static void bgp_peer_send_gr_capability(struct stream *s, struct peer *peer, uint32_t restart_time; unsigned long capp = 0; unsigned long rcapp = 0; + struct bgp *bgp = peer->bgp; if (!CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) && !CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART_HELPER)) return; - if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) - zlog_debug("[BGP_GR] Sending helper Capability for Peer :%s :", - peer->host); - SET_FLAG(peer->cap, PEER_CAP_RESTART_ADV); stream_putc(s, BGP_OPEN_OPT_CAP); capp = stream_get_endp(s); /* Set Capability Len Pointer */ @@ -1605,42 +1602,41 @@ static void bgp_peer_send_gr_capability(struct stream *s, struct peer *peer, /* Set Restart Capability Len Pointer */ rcapp = stream_get_endp(s); stream_putc(s, 0); - restart_time = peer->bgp->restart_time; - if (peer->bgp->t_startup) { + restart_time = bgp->restart_time; + if (peer->bgp->t_startup || bgp_in_graceful_restart()) { SET_FLAG(restart_time, GRACEFUL_RESTART_R_BIT); SET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_R_BIT_ADV); - if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) - zlog_debug("[BGP_GR] Sending R-Bit for peer: %s", - peer->host); } - if (CHECK_FLAG(peer->bgp->flags, BGP_FLAG_GRACEFUL_NOTIFICATION)) { + if (CHECK_FLAG(bgp->flags, BGP_FLAG_GRACEFUL_NOTIFICATION)) { SET_FLAG(restart_time, GRACEFUL_RESTART_N_BIT); SET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV); - if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) - zlog_debug("[BGP_GR] Sending N-Bit for peer: %s", - peer->host); } stream_putw(s, restart_time); + if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) + zlog_debug("%s: Sending GR Capability, Restart time %d R-bit %s, N-bit %s", + peer->host, bgp->restart_time, + CHECK_FLAG(peer->cap, + PEER_CAP_GRACEFUL_RESTART_R_BIT_ADV) + ? "SET" + : "NOT-SET", + CHECK_FLAG(peer->cap, + PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV) + ? "SET" + : "NOT-SET"); + /* Send address-family specific graceful-restart capability * only when GR config is present */ if (CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)) { - if (CHECK_FLAG(peer->bgp->flags, BGP_FLAG_GR_PRESERVE_FWD) - && BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) - zlog_debug("[BGP_GR] F bit Set"); - FOREACH_AFI_SAFI (afi, safi) { + bool f_bit = false; + if (!peer->afc[afi][safi]) continue; - if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) - zlog_debug( - "[BGP_GR] Sending GR Capability for AFI :%d :, SAFI :%d:", - afi, safi); - /* Convert AFI, SAFI to values for * packet. */ @@ -1648,11 +1644,15 @@ static void bgp_peer_send_gr_capability(struct stream *s, struct peer *peer, &pkt_safi); stream_putw(s, pkt_afi); stream_putc(s, pkt_safi); - if (CHECK_FLAG(peer->bgp->flags, - BGP_FLAG_GR_PRESERVE_FWD)) - stream_putc(s, GRACEFUL_RESTART_F_BIT); - else - stream_putc(s, 0); + + f_bit = bgp_gr_is_forwarding_preserved(bgp); + + if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) + zlog_debug("... F-bit %s for %s", + f_bit ? "SET" : "NOT-SET", + get_afi_safi_str(afi, safi, false)); + + stream_putc(s, f_bit ? GRACEFUL_RESTART_F_BIT : 0); } } diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 8825136e17..157468f8c9 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -1296,7 +1296,7 @@ void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi, stream_putc(s, 0); gr_restart_time = peer->bgp->restart_time; - if (peer->bgp->t_startup) { + if (peer->bgp->t_startup || bgp_in_graceful_restart()) { SET_FLAG(gr_restart_time, GRACEFUL_RESTART_R_BIT); SET_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_R_BIT_ADV); } diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 9c3602311f..c7e7b7a740 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -3877,6 +3877,7 @@ void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi) struct bgp_dest *dest; int cnt = 0; struct afi_safi_info *thread_info; + bool route_sync_pending = false; if (bgp->gr_info[afi][safi].t_route_select) { struct event *t = bgp->gr_info[afi][safi].t_route_select; @@ -3886,7 +3887,7 @@ void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi) EVENT_OFF(bgp->gr_info[afi][safi].t_route_select); } - if (BGP_DEBUG(update, UPDATE_OUT)) { + if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) { zlog_debug("%s: processing route for %s : cnt %d", __func__, get_afi_safi_str(afi, safi, false), bgp->gr_info[afi][safi].gr_deferred); @@ -3919,6 +3920,21 @@ void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi) /* Send route processing complete message to RIB */ bgp_zebra_update(bgp, afi, safi, ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE); + bgp->gr_info[afi][safi].route_sync = true; + + /* If this instance is all done, check for GR completion overall */ + FOREACH_AFI_SAFI_NSF (afi, safi) { + if (bgp->gr_info[afi][safi].af_enabled && + !bgp->gr_info[afi][safi].route_sync) { + route_sync_pending = true; + break; + } + } + + if (!route_sync_pending) { + bgp->gr_route_sync_pending = false; + bgp_update_gr_completion(); + } return; } diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index f5aa2e03bf..8ba167364c 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -3652,7 +3652,7 @@ DEFUN (bgp_neighbor_graceful_restart_disable_set, ret = bgp_neighbor_graceful_restart(peer, PEER_DISABLE_CMD); if (ret == BGP_GR_SUCCESS) { - if (peer->bgp->t_startup) + if (peer->bgp->t_startup || bgp_in_graceful_restart()) bgp_peer_gr_flags_update(peer); VTY_BGP_GR_ROUTER_DETECT(bgp, peer, peer->bgp->peer); diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 77955fd5c9..3c5826113d 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -167,6 +167,8 @@ struct bgp_master { #define BM_FLAG_GR_RESTARTER (1 << 3) #define BM_FLAG_GR_DISABLED (1 << 4) #define BM_FLAG_GR_PRESERVE_FWD (1 << 5) +#define BM_FLAG_GRACEFUL_RESTART (1 << 6) +#define BM_FLAG_GR_COMPLETE (1 << 7) #define BM_FLAG_GR_CONFIGURED (BM_FLAG_GR_RESTARTER | BM_FLAG_GR_DISABLED) @@ -176,6 +178,9 @@ struct bgp_master { uint32_t select_defer_time; uint32_t rib_stale_time; + time_t startup_time; + time_t gr_completion_time; + bool terminating; /* global flag that sigint terminate seen */ /* TOS value for outgoing packets in BGP connections */ @@ -305,9 +310,9 @@ struct graceful_restart_info { /* Best route select */ struct event *t_route_select; /* AFI, SAFI enabled */ - bool af_enabled[AFI_MAX][SAFI_MAX]; + bool af_enabled; /* Route update completed */ - bool route_sync[AFI_MAX][SAFI_MAX]; + bool route_sync; }; enum global_mode { @@ -559,6 +564,9 @@ struct bgp { */ enum zebra_gr_mode present_zebra_gr_state; + /* Is deferred path selection still not complete? */ + bool gr_route_sync_pending; + /* BGP Per AF flags */ uint16_t af_flags[AFI_MAX][SAFI_MAX]; #define BGP_CONFIG_DAMPENING (1 << 0) @@ -2754,6 +2762,59 @@ static inline bool bgp_in_graceful_shutdown(struct bgp *bgp) !!CHECK_FLAG(bm->flags, BM_FLAG_GRACEFUL_SHUTDOWN)); } +static inline bool bgp_in_graceful_restart(void) +{ + /* True if BGP has (re)started gracefully (based on flags + * noted at startup) and GR is not complete. + */ + return (CHECK_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART) && + !CHECK_FLAG(bm->flags, BM_FLAG_GR_COMPLETE)); +} + +static inline bool bgp_is_graceful_restart_complete(void) +{ + /* True if BGP has (re)started gracefully (based on flags + * noted at startup) and GR is marked as complete. + */ + return (CHECK_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART) && + CHECK_FLAG(bm->flags, BM_FLAG_GR_COMPLETE)); +} + +static inline void bgp_update_gr_completion(void) +{ + struct listnode *node, *nnode; + struct bgp *bgp; + + /* + * Check and mark GR complete. This is done when deferred + * path selection has been completed for all instances and + * route-advertisement/EOR and route-sync with zebra has + * been invoked. + */ + if (!CHECK_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART) || + CHECK_FLAG(bm->flags, BM_FLAG_GR_COMPLETE)) + return; + + for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp)) { + if (bgp->gr_route_sync_pending) + return; + } + + SET_FLAG(bm->flags, BM_FLAG_GR_COMPLETE); + bm->gr_completion_time = monotime(NULL); +} + +static inline bool bgp_gr_is_forwarding_preserved(struct bgp *bgp) +{ + /* + * Is forwarding state preserved? Based either on config + * or if BGP restarted gracefully. + * TBD: Additional AFI/SAFI based checks etc. + */ + return (CHECK_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART) || + CHECK_FLAG(bgp->flags, BGP_FLAG_GR_PRESERVE_FWD)); +} + /* For benefit of rfapi */ extern struct peer *peer_new(struct bgp *bgp); -- 2.39.5