Tested between GoBGP and FRR (this commit).
```
┌───────────┐ ┌────────────┐
│ │ │ │
│ GoBGPD │ │ FRRouting │
│ (restart) │ │ │
│ │ │ │
└──────┬────┘ └───────┬────┘
│ │
│ │
│ │
│ ┌───────────┐ │
│ │ │ │
│ │ │ │
└─────┤ FRRouting ├────────┘
│ (helper) │
│ │
└───────────┘
// GoBGPD
% cat /etc/gobgp/config.toml
[global.config]
as = 65002
router-id = "2.2.2.2"
port = 179
[[neighbors]]
[neighbors.config]
peer-as = 65001
neighbor-address = "2a02:abc::123"
[neighbors.graceful-restart.config]
enabled = true
restart-time = 3
long-lived-enabled = true
[[neighbors.afi-safis]]
[neighbors.afi-safis.config]
afi-safi-name = "ipv6-unicast"
[neighbors.afi-safis.mp-graceful-restart.config]
enabled = true
[neighbors.afi-safis.long-lived-graceful-restart.config]
enabled = true
restart-time = 10
[[neighbors.afi-safis]]
[neighbors.afi-safis.config]
afi-safi-name = "ipv4-unicast"
[neighbors.afi-safis.mp-graceful-restart.config]
enabled = true
[neighbors.afi-safis.long-lived-graceful-restart.config]
enabled = true
restart-time = 20
% ./gobgp global rib add -a ipv6 2001:db8:4::/64
% ./gobgp global rib add -a ipv6 2001:db8:5::/64 community 65535:7
% ./gobgp global rib add -a ipv4 100.100.100.100/32
% ./gobgp global rib add -a ipv4 100.100.100.200/32 community 65535:7
```
1. When killing GoBGPD, graceful restart timer starts in FRR helper router;
2. When GR timer expires in helper router:
a) LLGR_STALE community is attached to routes to be retained;
b) Clear stale routes that have NO_LLGR community attached;
c) Start LLGR timer per AFI/SAFI;
d) Recompute bestpath and reannounce routes to peers;
d) When LLGR timer expires, clear all routes on particular AFI/SAFI.
Signed-off-by: Donatas Abraitis <donatas.abraitis@gmail.com>
#include "bgpd/bgp_dump.h"
#include "bgpd/bgp_open.h"
#include "bgpd/bgp_advertise.h"
+#include "bgpd/bgp_community.h"
#include "bgpd/bgp_updgrp.h"
#include "bgpd/bgp_nht.h"
#include "bgpd/bgp_bfd.h"
peer->afc_adv[afi][safi] = from_peer->afc_adv[afi][safi];
peer->afc_recv[afi][safi] = from_peer->afc_recv[afi][safi];
peer->orf_plist[afi][safi] = from_peer->orf_plist[afi][safi];
+ peer->llgr[afi][safi] = from_peer->llgr[afi][safi];
}
if (bgp_getsockname(peer) < 0) {
structure. */
void bgp_timer_set(struct peer *peer)
{
+ afi_t afi;
+ safi_t safi;
+
switch (peer->status) {
case Idle:
/* First entry point of peer's finite state machine. In Idle
case Deleted:
BGP_TIMER_OFF(peer->t_gr_restart);
BGP_TIMER_OFF(peer->t_gr_stale);
+
+ FOREACH_AFI_SAFI (afi, safi)
+ BGP_TIMER_OFF(peer->t_llgr_stale[afi][safi]);
+
BGP_TIMER_OFF(peer->t_pmax_restart);
BGP_TIMER_OFF(peer->t_refresh_stalepath);
/* fallthru */
"Reached received prefix count",
"Socket Error"};
-static int bgp_graceful_restart_timer_expire(struct thread *thread)
+static void bgp_graceful_restart_timer_off(struct peer *peer)
{
- struct peer *peer;
afi_t afi;
safi_t safi;
- peer = THREAD_ARG(thread);
-
- /* NSF delete stale route */
- for (afi = AFI_IP; afi < AFI_MAX; afi++)
- for (safi = SAFI_UNICAST; safi <= SAFI_MPLS_VPN; safi++)
- if (peer->nsf[afi][safi])
- bgp_clear_stale_route(peer, afi, safi);
+ FOREACH_AFI_SAFI (afi, safi)
+ if (CHECK_FLAG(peer->af_sflags[afi][safi],
+ PEER_STATUS_LLGR_WAIT))
+ return;
UNSET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
BGP_TIMER_OFF(peer->t_gr_stale);
+ bgp_timer_set(peer);
+}
+
+static int bgp_llgr_stale_timer_expire(struct thread *thread)
+{
+ struct peer_af *paf;
+ struct peer *peer;
+ afi_t afi;
+ safi_t safi;
+
+ paf = THREAD_ARG(thread);
+
+ peer = paf->peer;
+ afi = paf->afi;
+ safi = paf->safi;
+
+ /* If the timer for the "Long-lived Stale Time" expires before the
+ * session is re-established, the helper MUST delete all the
+ * stale routes from the neighbor that it is retaining.
+ */
+ if (bgp_debug_neighbor_events(peer))
+ zlog_debug("%s Long-lived stale timer (%s) expired", peer->host,
+ get_afi_safi_str(afi, safi, false));
+
+ UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_LLGR_WAIT);
+
+ bgp_clear_stale_route(peer, afi, safi);
+
+ bgp_graceful_restart_timer_off(peer);
+
+ return 0;
+}
+
+static void bgp_set_llgr_stale(struct peer *peer, afi_t afi, safi_t safi)
+{
+ struct bgp_dest *dest;
+ struct bgp_path_info *pi;
+ struct bgp_table *table;
+ struct attr attr;
+
+ if (safi == SAFI_MPLS_VPN || safi == SAFI_ENCAP || safi == SAFI_EVPN) {
+ for (dest = bgp_table_top(peer->bgp->rib[afi][safi]); dest;
+ dest = bgp_route_next(dest)) {
+ struct bgp_dest *rm;
+
+ table = bgp_dest_get_bgp_table_info(dest);
+ if (!table)
+ continue;
+
+ for (rm = bgp_table_top(table); rm;
+ rm = bgp_route_next(rm))
+ for (pi = bgp_dest_get_bgp_path_info(rm); pi;
+ pi = pi->next) {
+ if (pi->peer != peer)
+ continue;
+
+ if (pi->attr->community &&
+ community_include(
+ pi->attr->community,
+ COMMUNITY_NO_LLGR))
+ continue;
+
+ if (bgp_debug_neighbor_events(peer))
+ zlog_debug(
+ "%s Long-lived set stale community (LLGR_STALE) for: %pFX",
+ peer->host, &dest->p);
+
+ attr = *pi->attr;
+ bgp_attr_add_llgr_community(&attr);
+ pi->attr = bgp_attr_intern(&attr);
+ bgp_recalculate_afi_safi_bestpaths(
+ peer->bgp, afi, safi);
+
+ break;
+ }
+ }
+ } else {
+ for (dest = bgp_table_top(peer->bgp->rib[afi][safi]); dest;
+ dest = bgp_route_next(dest))
+ for (pi = bgp_dest_get_bgp_path_info(dest); pi;
+ pi = pi->next) {
+ if (pi->peer != peer)
+ continue;
+
+ if (pi->attr->community &&
+ community_include(pi->attr->community,
+ COMMUNITY_NO_LLGR))
+ continue;
+
+ if (bgp_debug_neighbor_events(peer))
+ zlog_debug(
+ "%s Long-lived set stale community (LLGR_STALE) for: %pFX",
+ peer->host, &dest->p);
+
+ attr = *pi->attr;
+ bgp_attr_add_llgr_community(&attr);
+ pi->attr = bgp_attr_intern(&attr);
+ bgp_recalculate_afi_safi_bestpaths(peer->bgp,
+ afi, safi);
+
+ break;
+ }
+ }
+}
+
+static int bgp_graceful_restart_timer_expire(struct thread *thread)
+{
+ struct peer *peer, *tmp_peer;
+ struct listnode *node, *nnode;
+ struct peer_af *paf;
+ afi_t afi;
+ safi_t safi;
+
+ peer = THREAD_ARG(thread);
if (bgp_debug_neighbor_events(peer)) {
zlog_debug("%s graceful restart timer expired", peer->host);
peer->host);
}
- bgp_timer_set(peer);
+ FOREACH_AFI_SAFI (afi, safi) {
+ if (!peer->nsf[afi][safi])
+ continue;
+
+ /* Once the "Restart Time" period ends, the LLGR period is
+ * said to have begun and the following procedures MUST be
+ * performed:
+ *
+ * The helper router MUST start a timer for the
+ * "Long-lived Stale Time".
+ *
+ * The helper router MUST attach the LLGR_STALE community
+ * for the stale routes being retained. Note that this
+ * requirement implies that the routes would need to be
+ * readvertised, to disseminate the modified community.
+ */
+ if (peer->llgr[afi][safi].stale_time) {
+ paf = peer_af_find(peer, afi, safi);
+ if (!paf)
+ continue;
+
+ if (bgp_debug_neighbor_events(peer))
+ zlog_debug(
+ "%s Long-lived stale timer (%s) started for %d sec",
+ peer->host,
+ get_afi_safi_str(afi, safi, false),
+ peer->llgr[afi][safi].stale_time);
+
+ SET_FLAG(peer->af_sflags[afi][safi],
+ PEER_STATUS_LLGR_WAIT);
+
+ bgp_set_llgr_stale(peer, afi, safi);
+ bgp_clear_stale_route(peer, afi, safi);
+
+ thread_add_timer(bm->master,
+ bgp_llgr_stale_timer_expire, paf,
+ peer->llgr[afi][safi].stale_time,
+ &peer->t_llgr_stale[afi][safi]);
+
+ for (ALL_LIST_ELEMENTS(peer->bgp->peer, node, nnode,
+ tmp_peer))
+ bgp_announce_route(tmp_peer, afi, safi, false);
+ } else {
+ bgp_clear_stale_route(peer, afi, safi);
+ }
+ }
+
+ bgp_graceful_restart_timer_off(peer);
return 0;
}
peer->host, iana_afi2str(pkt_afi),
iana_safi2str(pkt_safi));
} else {
+ if (bgp_debug_neighbor_events(peer))
+ zlog_debug(
+ "%s Addr-family %s/%s(afi/safi) Long-lived Graceful Restart capability stale time %u sec",
+ peer->host, iana_afi2str(pkt_afi),
+ iana_safi2str(pkt_safi), stale_time);
+
peer->llgr[afi][safi].flags = flags;
- peer->llgr[afi][safi].stale_time = stale_time;
+ peer->llgr[afi][safi].stale_time =
+ MIN(stale_time, peer->bgp->llgr_stale_time);
SET_FLAG(peer->af_cap[afi][safi], PEER_CAP_LLGR_AF_RCV);
}
}
if (peer->nsf[afi][safi])
bgp_clear_stale_route(peer, afi, safi);
- zlog_info(
- "%s: rcvd End-of-RIB for %s from %s in vrf %s",
- __func__, get_afi_safi_str(afi, safi, false),
- peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
- }
+ zlog_info(
+ "%s: rcvd End-of-RIB for %s from %s in vrf %s",
+ __func__, get_afi_safi_str(afi, safi, false),
+ peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
+ }
}
/* Everything is done. We unintern temporary structures which
newattr = new->attr;
existattr = exist->attr;
+ /* A BGP speaker that has advertised the "Long-lived Graceful Restart
+ * Capability" to a neighbor MUST perform the following upon receiving
+ * a route from that neighbor with the "LLGR_STALE" community, or upon
+ * attaching the "LLGR_STALE" community itself per Section 4.2:
+ *
+ * Treat the route as the least-preferred in route selection (see
+ * below). See the Risks of Depreferencing Routes section (Section 5.2)
+ * for a discussion of potential risks inherent in doing this.
+ */
+ if (newattr->community &&
+ community_include(newattr->community, COMMUNITY_LLGR_STALE)) {
+ if (debug)
+ zlog_debug(
+ "%s: %s wins over %s due to LLGR_STALE community",
+ pfx_buf, new_buf, exist_buf);
+ return 0;
+ }
+
+ if (existattr->community &&
+ community_include(existattr->community, COMMUNITY_LLGR_STALE)) {
+ if (debug)
+ zlog_debug(
+ "%s: %s loses to %s due to LLGR_STALE community",
+ pfx_buf, new_buf, exist_buf);
+ return 1;
+ }
+
new_p = bgp_dest_get_prefix(new->net);
/* For EVPN routes, we cannot just go by local vs remote, we have to
}
}
+void bgp_attr_add_llgr_community(struct attr *attr)
+{
+ struct community *old;
+ struct community *new;
+ struct community *merge;
+ struct community *llgr;
+
+ old = attr->community;
+ llgr = community_str2com("llgr-stale");
+
+ assert(llgr);
+
+ if (old) {
+ merge = community_merge(community_dup(old), llgr);
+
+ if (old->refcnt == 0)
+ community_free(&old);
+
+ new = community_uniq_sort(merge);
+ community_free(&merge);
+ } else {
+ new = community_dup(llgr);
+ }
+
+ community_free(&llgr);
+
+ attr->community = new;
+ attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_COMMUNITIES);
+}
+
void bgp_attr_add_gshut_community(struct attr *attr)
{
struct community *old;
}
}
+ /* A BGP speaker that has advertised the "Long-lived Graceful Restart
+ * Capability" to a neighbor MUST perform the following upon receiving
+ * a route from that neighbor with the "LLGR_STALE" community, or upon
+ * attaching the "LLGR_STALE" community itself per Section 4.2:
+ *
+ * The route SHOULD NOT be advertised to any neighbor from which the
+ * Long-lived Graceful Restart Capability has not been received.
+ */
+ if (attr->community &&
+ community_include(attr->community, COMMUNITY_LLGR_STALE) &&
+ !CHECK_FLAG(peer->cap, PEER_CAP_LLGR_RCV) &&
+ !CHECK_FLAG(peer->cap, PEER_CAP_LLGR_ADV))
+ return false;
+
/* After route-map has been applied, we check to see if the nexthop to
* be carried in the attribute (that is used for the announcement) can
* be cleared off or not. We do this in all cases where we would be
}
}
+/* If any of the routes from the peer have been marked with the NO_LLGR
+ * community, either as sent by the peer, or as the result of a configured
+ * policy, they MUST NOT be retained, but MUST be removed as per the normal
+ * operation of [RFC4271].
+ */
void bgp_clear_stale_route(struct peer *peer, afi_t afi, safi_t safi)
{
struct bgp_dest *dest;
pi = pi->next) {
if (pi->peer != peer)
continue;
+ if (CHECK_FLAG(
+ peer->af_sflags[afi][safi],
+ PEER_STATUS_LLGR_WAIT) &&
+ pi->attr->community &&
+ !community_include(
+ pi->attr->community,
+ COMMUNITY_NO_LLGR))
+ break;
if (!CHECK_FLAG(pi->flags,
BGP_PATH_STALE))
break;
pi = pi->next) {
if (pi->peer != peer)
continue;
+ if (CHECK_FLAG(peer->af_sflags[afi][safi],
+ PEER_STATUS_LLGR_WAIT) &&
+ pi->attr->community &&
+ !community_include(pi->attr->community,
+ COMMUNITY_NO_LLGR))
+ break;
if (!CHECK_FLAG(pi->flags, BGP_PATH_STALE))
break;
bgp_rib_remove(dest, pi, peer, afi, safi);
", attach RT as-is for VPNv6 route filtering");
else if (llgr_stale)
vty_out(vty,
- ", mark routes to be retained for a longer time. Requeres support for Long-lived BGP Graceful Restart");
+ ", mark routes to be retained for a longer time. Requires support for Long-lived BGP Graceful Restart");
else if (no_llgr)
vty_out(vty,
", mark routes to not be treated according to Long-lived BGP Graceful Restart operations");
struct bgp_path_info *exist,
char *pfx_buf, afi_t afi, safi_t safi,
enum bgp_path_selection_reason *reason);
+extern void bgp_attr_add_llgr_community(struct attr *attr);
extern void bgp_attr_add_gshut_community(struct attr *attr);
extern void bgp_best_selection(struct bgp *bgp, struct bgp_dest *dest,
hash_get(peer->bgp->peerhash, peer, hash_alloc_intern);
}
-static void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi,
- safi_t safi)
+void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi, safi_t safi)
{
struct bgp_dest *dest, *ndest;
struct bgp_table *table;
#define PEER_STATUS_BORR_RECEIVED (1U << 8) /* BoRR received from peer */
#define PEER_STATUS_EORR_SEND (1U << 9) /* EoRR send to peer */
#define PEER_STATUS_EORR_RECEIVED (1U << 10) /* EoRR received from peer */
+/* LLGR aware peer */
+#define PEER_STATUS_LLGR_WAIT (1U << 11)
/* Configured timer values. */
_Atomic uint32_t holdtime;
struct thread *t_pmax_restart;
struct thread *t_gr_restart;
struct thread *t_gr_stale;
+ struct thread *t_llgr_stale[AFI_MAX][SAFI_MAX];
struct thread *t_generate_updgrp_packets;
struct thread *t_process_packet;
struct thread *t_process_packet_error;
void peer_tcp_mss_set(struct peer *peer, uint32_t tcp_mss);
void peer_tcp_mss_unset(struct peer *peer);
+
+extern void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi,
+ safi_t safi);
#endif /* _QUAGGA_BGPD_H */