From: Donatas Abraitis Date: Mon, 20 Dec 2021 21:03:09 +0000 (+0200) Subject: bgpd: Implement LLGR helper mode X-Git-Tag: base_8.2~93^2~2 X-Git-Url: https://git.puffer.fish/?a=commitdiff_plain;h=1479ed2fb35f4a5ae1017201a7ee37ba2727163a;p=mirror%2Ffrr.git bgpd: Implement LLGR helper mode Tested between GoBGP and FRR (this commit). ``` ┌───────────┐ ┌────────────┐ │ │ │ │ │ GoBGPD │ │ FRRouting │ │ (restart) │ │ │ │ │ │ │ └──────┬────┘ └───────┬────┘ │ │ │ │ │ │ │ ┌───────────┐ │ │ │ │ │ │ │ │ │ └─────┤ FRRouting ├────────┘ │ (helper) │ │ │ └───────────┘ // GoBGPD % cat /etc/gobgp/config.toml [global.config] as = 65002 router-id = "2.2.2.2" port = 179 [[neighbors]] [neighbors.config] peer-as = 65001 neighbor-address = "2a02:abc::123" [neighbors.graceful-restart.config] enabled = true restart-time = 3 long-lived-enabled = true [[neighbors.afi-safis]] [neighbors.afi-safis.config] afi-safi-name = "ipv6-unicast" [neighbors.afi-safis.mp-graceful-restart.config] enabled = true [neighbors.afi-safis.long-lived-graceful-restart.config] enabled = true restart-time = 10 [[neighbors.afi-safis]] [neighbors.afi-safis.config] afi-safi-name = "ipv4-unicast" [neighbors.afi-safis.mp-graceful-restart.config] enabled = true [neighbors.afi-safis.long-lived-graceful-restart.config] enabled = true restart-time = 20 % ./gobgp global rib add -a ipv6 2001:db8:4::/64 % ./gobgp global rib add -a ipv6 2001:db8:5::/64 community 65535:7 % ./gobgp global rib add -a ipv4 100.100.100.100/32 % ./gobgp global rib add -a ipv4 100.100.100.200/32 community 65535:7 ``` 1. When killing GoBGPD, graceful restart timer starts in FRR helper router; 2. When GR timer expires in helper router: a) LLGR_STALE community is attached to routes to be retained; b) Clear stale routes that have NO_LLGR community attached; c) Start LLGR timer per AFI/SAFI; d) Recompute bestpath and reannounce routes to peers; d) When LLGR timer expires, clear all routes on particular AFI/SAFI. Signed-off-by: Donatas Abraitis --- diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 15e9955872..1e9dd21fd1 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -48,6 +48,7 @@ #include "bgpd/bgp_dump.h" #include "bgpd/bgp_open.h" #include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_community.h" #include "bgpd/bgp_updgrp.h" #include "bgpd/bgp_nht.h" #include "bgpd/bgp_bfd.h" @@ -297,6 +298,7 @@ static struct peer *peer_xfer_conn(struct peer *from_peer) peer->afc_adv[afi][safi] = from_peer->afc_adv[afi][safi]; peer->afc_recv[afi][safi] = from_peer->afc_recv[afi][safi]; peer->orf_plist[afi][safi] = from_peer->orf_plist[afi][safi]; + peer->llgr[afi][safi] = from_peer->llgr[afi][safi]; } if (bgp_getsockname(peer) < 0) { @@ -352,6 +354,9 @@ static struct peer *peer_xfer_conn(struct peer *from_peer) structure. */ void bgp_timer_set(struct peer *peer) { + afi_t afi; + safi_t safi; + switch (peer->status) { case Idle: /* First entry point of peer's finite state machine. In Idle @@ -465,6 +470,10 @@ void bgp_timer_set(struct peer *peer) case Deleted: BGP_TIMER_OFF(peer->t_gr_restart); BGP_TIMER_OFF(peer->t_gr_stale); + + FOREACH_AFI_SAFI (afi, safi) + BGP_TIMER_OFF(peer->t_llgr_stale[afi][safi]); + BGP_TIMER_OFF(peer->t_pmax_restart); BGP_TIMER_OFF(peer->t_refresh_stalepath); /* fallthru */ @@ -641,22 +650,132 @@ const char *const peer_down_str[] = {"", "Reached received prefix count", "Socket Error"}; -static int bgp_graceful_restart_timer_expire(struct thread *thread) +static void bgp_graceful_restart_timer_off(struct peer *peer) { - struct peer *peer; afi_t afi; safi_t safi; - peer = THREAD_ARG(thread); - - /* NSF delete stale route */ - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi <= SAFI_MPLS_VPN; safi++) - if (peer->nsf[afi][safi]) - bgp_clear_stale_route(peer, afi, safi); + FOREACH_AFI_SAFI (afi, safi) + if (CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_LLGR_WAIT)) + return; UNSET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); BGP_TIMER_OFF(peer->t_gr_stale); + bgp_timer_set(peer); +} + +static int bgp_llgr_stale_timer_expire(struct thread *thread) +{ + struct peer_af *paf; + struct peer *peer; + afi_t afi; + safi_t safi; + + paf = THREAD_ARG(thread); + + peer = paf->peer; + afi = paf->afi; + safi = paf->safi; + + /* If the timer for the "Long-lived Stale Time" expires before the + * session is re-established, the helper MUST delete all the + * stale routes from the neighbor that it is retaining. + */ + if (bgp_debug_neighbor_events(peer)) + zlog_debug("%s Long-lived stale timer (%s) expired", peer->host, + get_afi_safi_str(afi, safi, false)); + + UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_LLGR_WAIT); + + bgp_clear_stale_route(peer, afi, safi); + + bgp_graceful_restart_timer_off(peer); + + return 0; +} + +static void bgp_set_llgr_stale(struct peer *peer, afi_t afi, safi_t safi) +{ + struct bgp_dest *dest; + struct bgp_path_info *pi; + struct bgp_table *table; + struct attr attr; + + if (safi == SAFI_MPLS_VPN || safi == SAFI_ENCAP || safi == SAFI_EVPN) { + for (dest = bgp_table_top(peer->bgp->rib[afi][safi]); dest; + dest = bgp_route_next(dest)) { + struct bgp_dest *rm; + + table = bgp_dest_get_bgp_table_info(dest); + if (!table) + continue; + + for (rm = bgp_table_top(table); rm; + rm = bgp_route_next(rm)) + for (pi = bgp_dest_get_bgp_path_info(rm); pi; + pi = pi->next) { + if (pi->peer != peer) + continue; + + if (pi->attr->community && + community_include( + pi->attr->community, + COMMUNITY_NO_LLGR)) + continue; + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s Long-lived set stale community (LLGR_STALE) for: %pFX", + peer->host, &dest->p); + + attr = *pi->attr; + bgp_attr_add_llgr_community(&attr); + pi->attr = bgp_attr_intern(&attr); + bgp_recalculate_afi_safi_bestpaths( + peer->bgp, afi, safi); + + break; + } + } + } else { + for (dest = bgp_table_top(peer->bgp->rib[afi][safi]); dest; + dest = bgp_route_next(dest)) + for (pi = bgp_dest_get_bgp_path_info(dest); pi; + pi = pi->next) { + if (pi->peer != peer) + continue; + + if (pi->attr->community && + community_include(pi->attr->community, + COMMUNITY_NO_LLGR)) + continue; + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s Long-lived set stale community (LLGR_STALE) for: %pFX", + peer->host, &dest->p); + + attr = *pi->attr; + bgp_attr_add_llgr_community(&attr); + pi->attr = bgp_attr_intern(&attr); + bgp_recalculate_afi_safi_bestpaths(peer->bgp, + afi, safi); + + break; + } + } +} + +static int bgp_graceful_restart_timer_expire(struct thread *thread) +{ + struct peer *peer, *tmp_peer; + struct listnode *node, *nnode; + struct peer_af *paf; + afi_t afi; + safi_t safi; + + peer = THREAD_ARG(thread); if (bgp_debug_neighbor_events(peer)) { zlog_debug("%s graceful restart timer expired", peer->host); @@ -664,7 +783,54 @@ static int bgp_graceful_restart_timer_expire(struct thread *thread) peer->host); } - bgp_timer_set(peer); + FOREACH_AFI_SAFI (afi, safi) { + if (!peer->nsf[afi][safi]) + continue; + + /* Once the "Restart Time" period ends, the LLGR period is + * said to have begun and the following procedures MUST be + * performed: + * + * The helper router MUST start a timer for the + * "Long-lived Stale Time". + * + * The helper router MUST attach the LLGR_STALE community + * for the stale routes being retained. Note that this + * requirement implies that the routes would need to be + * readvertised, to disseminate the modified community. + */ + if (peer->llgr[afi][safi].stale_time) { + paf = peer_af_find(peer, afi, safi); + if (!paf) + continue; + + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s Long-lived stale timer (%s) started for %d sec", + peer->host, + get_afi_safi_str(afi, safi, false), + peer->llgr[afi][safi].stale_time); + + SET_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_LLGR_WAIT); + + bgp_set_llgr_stale(peer, afi, safi); + bgp_clear_stale_route(peer, afi, safi); + + thread_add_timer(bm->master, + bgp_llgr_stale_timer_expire, paf, + peer->llgr[afi][safi].stale_time, + &peer->t_llgr_stale[afi][safi]); + + for (ALL_LIST_ELEMENTS(peer->bgp->peer, node, nnode, + tmp_peer)) + bgp_announce_route(tmp_peer, afi, safi, false); + } else { + bgp_clear_stale_route(peer, afi, safi); + } + } + + bgp_graceful_restart_timer_off(peer); return 0; } diff --git a/bgpd/bgp_open.c b/bgpd/bgp_open.c index a05921e7b6..6bdefd0e9b 100644 --- a/bgpd/bgp_open.c +++ b/bgpd/bgp_open.c @@ -606,8 +606,15 @@ static int bgp_capability_llgr(struct peer *peer, peer->host, iana_afi2str(pkt_afi), iana_safi2str(pkt_safi)); } else { + if (bgp_debug_neighbor_events(peer)) + zlog_debug( + "%s Addr-family %s/%s(afi/safi) Long-lived Graceful Restart capability stale time %u sec", + peer->host, iana_afi2str(pkt_afi), + iana_safi2str(pkt_safi), stale_time); + peer->llgr[afi][safi].flags = flags; - peer->llgr[afi][safi].stale_time = stale_time; + peer->llgr[afi][safi].stale_time = + MIN(stale_time, peer->bgp->llgr_stale_time); SET_FLAG(peer->af_cap[afi][safi], PEER_CAP_LLGR_AF_RCV); } } diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index cc0ee9e8f2..4bb08404d6 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -1869,11 +1869,11 @@ static int bgp_update_receive(struct peer *peer, bgp_size_t size) if (peer->nsf[afi][safi]) bgp_clear_stale_route(peer, afi, safi); - zlog_info( - "%s: rcvd End-of-RIB for %s from %s in vrf %s", - __func__, get_afi_safi_str(afi, safi, false), - peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME); - } + zlog_info( + "%s: rcvd End-of-RIB for %s from %s in vrf %s", + __func__, get_afi_safi_str(afi, safi, false), + peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME); + } } /* Everything is done. We unintern temporary structures which diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index d8c2c98f4f..d3e058dd09 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -632,6 +632,33 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, newattr = new->attr; existattr = exist->attr; + /* A BGP speaker that has advertised the "Long-lived Graceful Restart + * Capability" to a neighbor MUST perform the following upon receiving + * a route from that neighbor with the "LLGR_STALE" community, or upon + * attaching the "LLGR_STALE" community itself per Section 4.2: + * + * Treat the route as the least-preferred in route selection (see + * below). See the Risks of Depreferencing Routes section (Section 5.2) + * for a discussion of potential risks inherent in doing this. + */ + if (newattr->community && + community_include(newattr->community, COMMUNITY_LLGR_STALE)) { + if (debug) + zlog_debug( + "%s: %s wins over %s due to LLGR_STALE community", + pfx_buf, new_buf, exist_buf); + return 0; + } + + if (existattr->community && + community_include(existattr->community, COMMUNITY_LLGR_STALE)) { + if (debug) + zlog_debug( + "%s: %s loses to %s due to LLGR_STALE community", + pfx_buf, new_buf, exist_buf); + return 1; + } + new_p = bgp_dest_get_prefix(new->net); /* For EVPN routes, we cannot just go by local vs remote, we have to @@ -1708,6 +1735,36 @@ static void bgp_peer_as_override(struct bgp *bgp, afi_t afi, safi_t safi, } } +void bgp_attr_add_llgr_community(struct attr *attr) +{ + struct community *old; + struct community *new; + struct community *merge; + struct community *llgr; + + old = attr->community; + llgr = community_str2com("llgr-stale"); + + assert(llgr); + + if (old) { + merge = community_merge(community_dup(old), llgr); + + if (old->refcnt == 0) + community_free(&old); + + new = community_uniq_sort(merge); + community_free(&merge); + } else { + new = community_dup(llgr); + } + + community_free(&llgr); + + attr->community = new; + attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_COMMUNITIES); +} + void bgp_attr_add_gshut_community(struct attr *attr) { struct community *old; @@ -2183,6 +2240,20 @@ bool subgroup_announce_check(struct bgp_dest *dest, struct bgp_path_info *pi, } } + /* A BGP speaker that has advertised the "Long-lived Graceful Restart + * Capability" to a neighbor MUST perform the following upon receiving + * a route from that neighbor with the "LLGR_STALE" community, or upon + * attaching the "LLGR_STALE" community itself per Section 4.2: + * + * The route SHOULD NOT be advertised to any neighbor from which the + * Long-lived Graceful Restart Capability has not been received. + */ + if (attr->community && + community_include(attr->community, COMMUNITY_LLGR_STALE) && + !CHECK_FLAG(peer->cap, PEER_CAP_LLGR_RCV) && + !CHECK_FLAG(peer->cap, PEER_CAP_LLGR_ADV)) + return false; + /* After route-map has been applied, we check to see if the nexthop to * be carried in the attribute (that is used for the announcement) can * be cleared off or not. We do this in all cases where we would be @@ -5269,6 +5340,11 @@ void bgp_clear_adj_in(struct peer *peer, afi_t afi, safi_t safi) } } +/* If any of the routes from the peer have been marked with the NO_LLGR + * community, either as sent by the peer, or as the result of a configured + * policy, they MUST NOT be retained, but MUST be removed as per the normal + * operation of [RFC4271]. + */ void bgp_clear_stale_route(struct peer *peer, afi_t afi, safi_t safi) { struct bgp_dest *dest; @@ -5291,6 +5367,14 @@ void bgp_clear_stale_route(struct peer *peer, afi_t afi, safi_t safi) pi = pi->next) { if (pi->peer != peer) continue; + if (CHECK_FLAG( + peer->af_sflags[afi][safi], + PEER_STATUS_LLGR_WAIT) && + pi->attr->community && + !community_include( + pi->attr->community, + COMMUNITY_NO_LLGR)) + break; if (!CHECK_FLAG(pi->flags, BGP_PATH_STALE)) break; @@ -5306,6 +5390,12 @@ void bgp_clear_stale_route(struct peer *peer, afi_t afi, safi_t safi) pi = pi->next) { if (pi->peer != peer) continue; + if (CHECK_FLAG(peer->af_sflags[afi][safi], + PEER_STATUS_LLGR_WAIT) && + pi->attr->community && + !community_include(pi->attr->community, + COMMUNITY_NO_LLGR)) + break; if (!CHECK_FLAG(pi->flags, BGP_PATH_STALE)) break; bgp_rib_remove(dest, pi, peer, afi, safi); @@ -11412,7 +11502,7 @@ void route_vty_out_detail_header(struct vty *vty, struct bgp *bgp, ", attach RT as-is for VPNv6 route filtering"); else if (llgr_stale) vty_out(vty, - ", mark routes to be retained for a longer time. Requeres support for Long-lived BGP Graceful Restart"); + ", mark routes to be retained for a longer time. Requires support for Long-lived BGP Graceful Restart"); else if (no_llgr) vty_out(vty, ", mark routes to not be treated according to Long-lived BGP Graceful Restart operations"); diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 3e957630d8..741690a028 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -802,6 +802,7 @@ extern int bgp_path_info_cmp_compatible(struct bgp *bgp, struct bgp_path_info *exist, char *pfx_buf, afi_t afi, safi_t safi, enum bgp_path_selection_reason *reason); +extern void bgp_attr_add_llgr_community(struct attr *attr); extern void bgp_attr_add_gshut_community(struct attr *attr); extern void bgp_best_selection(struct bgp *bgp, struct bgp_dest *dest, diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 5a6d95666f..789c156da4 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1656,8 +1656,7 @@ void bgp_peer_conf_if_to_su_update(struct peer *peer) hash_get(peer->bgp->peerhash, peer, hash_alloc_intern); } -static void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi, - safi_t safi) +void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi, safi_t safi) { struct bgp_dest *dest, *ndest; struct bgp_table *table; diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 7c91065601..2fcebe69d5 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -1402,6 +1402,8 @@ struct peer { #define PEER_STATUS_BORR_RECEIVED (1U << 8) /* BoRR received from peer */ #define PEER_STATUS_EORR_SEND (1U << 9) /* EoRR send to peer */ #define PEER_STATUS_EORR_RECEIVED (1U << 10) /* EoRR received from peer */ +/* LLGR aware peer */ +#define PEER_STATUS_LLGR_WAIT (1U << 11) /* Configured timer values. */ _Atomic uint32_t holdtime; @@ -1433,6 +1435,7 @@ struct peer { struct thread *t_pmax_restart; struct thread *t_gr_restart; struct thread *t_gr_stale; + struct thread *t_llgr_stale[AFI_MAX][SAFI_MAX]; struct thread *t_generate_updgrp_packets; struct thread *t_process_packet; struct thread *t_process_packet_error; @@ -2473,4 +2476,7 @@ void peer_nsf_stop(struct peer *peer); void peer_tcp_mss_set(struct peer *peer, uint32_t tcp_mss); void peer_tcp_mss_unset(struct peer *peer); + +extern void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi, + safi_t safi); #endif /* _QUAGGA_BGPD_H */