]> git.puffer.fish Git - mirror/frr.git/commitdiff
bgpd: Implement LLGR helper mode
authorDonatas Abraitis <donatas.abraitis@gmail.com>
Mon, 20 Dec 2021 21:03:09 +0000 (23:03 +0200)
committerDonatas Abraitis <donatas.abraitis@gmail.com>
Tue, 28 Dec 2021 14:07:59 +0000 (16:07 +0200)
Tested between GoBGP and FRR (this commit).

```
┌───────────┐             ┌────────────┐
│           │             │            │
│ GoBGPD    │             │ FRRouting  │
│ (restart) │             │            │
│           │             │            │
└──────┬────┘             └───────┬────┘
       │                          │
       │                          │
       │                          │
       │     ┌───────────┐        │
       │     │           │        │
       │     │           │        │
       └─────┤ FRRouting ├────────┘
             │ (helper)  │
             │           │
             └───────────┘

// GoBGPD
% cat /etc/gobgp/config.toml
[global.config]
    as = 65002
    router-id = "2.2.2.2"
    port = 179

[[neighbors]]
    [neighbors.config]
        peer-as = 65001
        neighbor-address = "2a02:abc::123"
    [neighbors.graceful-restart.config]
        enabled = true
        restart-time = 3
        long-lived-enabled = true
    [[neighbors.afi-safis]]
        [neighbors.afi-safis.config]
            afi-safi-name = "ipv6-unicast"
        [neighbors.afi-safis.mp-graceful-restart.config]
            enabled = true
        [neighbors.afi-safis.long-lived-graceful-restart.config]
            enabled = true
            restart-time = 10
    [[neighbors.afi-safis]]
        [neighbors.afi-safis.config]
            afi-safi-name = "ipv4-unicast"
        [neighbors.afi-safis.mp-graceful-restart.config]
            enabled = true
        [neighbors.afi-safis.long-lived-graceful-restart.config]
            enabled = true
            restart-time = 20

% ./gobgp global rib add -a ipv6 2001:db8:4::/64
% ./gobgp global rib add -a ipv6 2001:db8:5::/64 community 65535:7
% ./gobgp global rib add -a ipv4 100.100.100.100/32
% ./gobgp global rib add -a ipv4 100.100.100.200/32 community 65535:7
```

1. When killing GoBGPD, graceful restart timer starts in FRR helper router;
2. When GR timer expires in helper router:
   a) LLGR_STALE community is attached to routes to be retained;
   b) Clear stale routes that have NO_LLGR community attached;
   c) Start LLGR timer per AFI/SAFI;
   d) Recompute bestpath and reannounce routes to peers;
   d) When LLGR timer expires, clear all routes on particular AFI/SAFI.

Signed-off-by: Donatas Abraitis <donatas.abraitis@gmail.com>
bgpd/bgp_fsm.c
bgpd/bgp_open.c
bgpd/bgp_packet.c
bgpd/bgp_route.c
bgpd/bgp_route.h
bgpd/bgpd.c
bgpd/bgpd.h

index 15e9955872d749e50c9a77825d1ad988d8a30def..1e9dd21fd1e47eaeaa5ac4a947e637ce1e03fb52 100644 (file)
@@ -48,6 +48,7 @@
 #include "bgpd/bgp_dump.h"
 #include "bgpd/bgp_open.h"
 #include "bgpd/bgp_advertise.h"
+#include "bgpd/bgp_community.h"
 #include "bgpd/bgp_updgrp.h"
 #include "bgpd/bgp_nht.h"
 #include "bgpd/bgp_bfd.h"
@@ -297,6 +298,7 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
                peer->afc_adv[afi][safi] = from_peer->afc_adv[afi][safi];
                peer->afc_recv[afi][safi] = from_peer->afc_recv[afi][safi];
                peer->orf_plist[afi][safi] = from_peer->orf_plist[afi][safi];
+               peer->llgr[afi][safi] = from_peer->llgr[afi][safi];
        }
 
        if (bgp_getsockname(peer) < 0) {
@@ -352,6 +354,9 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
    structure. */
 void bgp_timer_set(struct peer *peer)
 {
+       afi_t afi;
+       safi_t safi;
+
        switch (peer->status) {
        case Idle:
                /* First entry point of peer's finite state machine.  In Idle
@@ -465,6 +470,10 @@ void bgp_timer_set(struct peer *peer)
        case Deleted:
                BGP_TIMER_OFF(peer->t_gr_restart);
                BGP_TIMER_OFF(peer->t_gr_stale);
+
+               FOREACH_AFI_SAFI (afi, safi)
+                       BGP_TIMER_OFF(peer->t_llgr_stale[afi][safi]);
+
                BGP_TIMER_OFF(peer->t_pmax_restart);
                BGP_TIMER_OFF(peer->t_refresh_stalepath);
        /* fallthru */
@@ -641,22 +650,132 @@ const char *const peer_down_str[] = {"",
                               "Reached received prefix count",
                               "Socket Error"};
 
-static int bgp_graceful_restart_timer_expire(struct thread *thread)
+static void bgp_graceful_restart_timer_off(struct peer *peer)
 {
-       struct peer *peer;
        afi_t afi;
        safi_t safi;
 
-       peer = THREAD_ARG(thread);
-
-       /* NSF delete stale route */
-       for (afi = AFI_IP; afi < AFI_MAX; afi++)
-               for (safi = SAFI_UNICAST; safi <= SAFI_MPLS_VPN; safi++)
-                       if (peer->nsf[afi][safi])
-                               bgp_clear_stale_route(peer, afi, safi);
+       FOREACH_AFI_SAFI (afi, safi)
+               if (CHECK_FLAG(peer->af_sflags[afi][safi],
+                              PEER_STATUS_LLGR_WAIT))
+                       return;
 
        UNSET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
        BGP_TIMER_OFF(peer->t_gr_stale);
+       bgp_timer_set(peer);
+}
+
+static int bgp_llgr_stale_timer_expire(struct thread *thread)
+{
+       struct peer_af *paf;
+       struct peer *peer;
+       afi_t afi;
+       safi_t safi;
+
+       paf = THREAD_ARG(thread);
+
+       peer = paf->peer;
+       afi = paf->afi;
+       safi = paf->safi;
+
+       /* If the timer for the "Long-lived Stale Time" expires before the
+        * session is re-established, the helper MUST delete all the
+        * stale routes from the neighbor that it is retaining.
+        */
+       if (bgp_debug_neighbor_events(peer))
+               zlog_debug("%s Long-lived stale timer (%s) expired", peer->host,
+                          get_afi_safi_str(afi, safi, false));
+
+       UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_LLGR_WAIT);
+
+       bgp_clear_stale_route(peer, afi, safi);
+
+       bgp_graceful_restart_timer_off(peer);
+
+       return 0;
+}
+
+static void bgp_set_llgr_stale(struct peer *peer, afi_t afi, safi_t safi)
+{
+       struct bgp_dest *dest;
+       struct bgp_path_info *pi;
+       struct bgp_table *table;
+       struct attr attr;
+
+       if (safi == SAFI_MPLS_VPN || safi == SAFI_ENCAP || safi == SAFI_EVPN) {
+               for (dest = bgp_table_top(peer->bgp->rib[afi][safi]); dest;
+                    dest = bgp_route_next(dest)) {
+                       struct bgp_dest *rm;
+
+                       table = bgp_dest_get_bgp_table_info(dest);
+                       if (!table)
+                               continue;
+
+                       for (rm = bgp_table_top(table); rm;
+                            rm = bgp_route_next(rm))
+                               for (pi = bgp_dest_get_bgp_path_info(rm); pi;
+                                    pi = pi->next) {
+                                       if (pi->peer != peer)
+                                               continue;
+
+                                       if (pi->attr->community &&
+                                           community_include(
+                                                   pi->attr->community,
+                                                   COMMUNITY_NO_LLGR))
+                                               continue;
+
+                                       if (bgp_debug_neighbor_events(peer))
+                                               zlog_debug(
+                                                       "%s Long-lived set stale community (LLGR_STALE) for: %pFX",
+                                                       peer->host, &dest->p);
+
+                                       attr = *pi->attr;
+                                       bgp_attr_add_llgr_community(&attr);
+                                       pi->attr = bgp_attr_intern(&attr);
+                                       bgp_recalculate_afi_safi_bestpaths(
+                                               peer->bgp, afi, safi);
+
+                                       break;
+                               }
+               }
+       } else {
+               for (dest = bgp_table_top(peer->bgp->rib[afi][safi]); dest;
+                    dest = bgp_route_next(dest))
+                       for (pi = bgp_dest_get_bgp_path_info(dest); pi;
+                            pi = pi->next) {
+                               if (pi->peer != peer)
+                                       continue;
+
+                               if (pi->attr->community &&
+                                   community_include(pi->attr->community,
+                                                     COMMUNITY_NO_LLGR))
+                                       continue;
+
+                               if (bgp_debug_neighbor_events(peer))
+                                       zlog_debug(
+                                               "%s Long-lived set stale community (LLGR_STALE) for: %pFX",
+                                               peer->host, &dest->p);
+
+                               attr = *pi->attr;
+                               bgp_attr_add_llgr_community(&attr);
+                               pi->attr = bgp_attr_intern(&attr);
+                               bgp_recalculate_afi_safi_bestpaths(peer->bgp,
+                                                                  afi, safi);
+
+                               break;
+                       }
+       }
+}
+
+static int bgp_graceful_restart_timer_expire(struct thread *thread)
+{
+       struct peer *peer, *tmp_peer;
+       struct listnode *node, *nnode;
+       struct peer_af *paf;
+       afi_t afi;
+       safi_t safi;
+
+       peer = THREAD_ARG(thread);
 
        if (bgp_debug_neighbor_events(peer)) {
                zlog_debug("%s graceful restart timer expired", peer->host);
@@ -664,7 +783,54 @@ static int bgp_graceful_restart_timer_expire(struct thread *thread)
                           peer->host);
        }
 
-       bgp_timer_set(peer);
+       FOREACH_AFI_SAFI (afi, safi) {
+               if (!peer->nsf[afi][safi])
+                       continue;
+
+               /* Once the "Restart Time" period ends, the LLGR period is
+                * said to have begun and the following procedures MUST be
+                * performed:
+                *
+                * The helper router MUST start a timer for the
+                * "Long-lived Stale Time".
+                *
+                * The helper router MUST attach the LLGR_STALE community
+                * for the stale routes being retained. Note that this
+                * requirement implies that the routes would need to be
+                * readvertised, to disseminate the modified community.
+                */
+               if (peer->llgr[afi][safi].stale_time) {
+                       paf = peer_af_find(peer, afi, safi);
+                       if (!paf)
+                               continue;
+
+                       if (bgp_debug_neighbor_events(peer))
+                               zlog_debug(
+                                       "%s Long-lived stale timer (%s) started for %d sec",
+                                       peer->host,
+                                       get_afi_safi_str(afi, safi, false),
+                                       peer->llgr[afi][safi].stale_time);
+
+                       SET_FLAG(peer->af_sflags[afi][safi],
+                                PEER_STATUS_LLGR_WAIT);
+
+                       bgp_set_llgr_stale(peer, afi, safi);
+                       bgp_clear_stale_route(peer, afi, safi);
+
+                       thread_add_timer(bm->master,
+                                        bgp_llgr_stale_timer_expire, paf,
+                                        peer->llgr[afi][safi].stale_time,
+                                        &peer->t_llgr_stale[afi][safi]);
+
+                       for (ALL_LIST_ELEMENTS(peer->bgp->peer, node, nnode,
+                                              tmp_peer))
+                               bgp_announce_route(tmp_peer, afi, safi, false);
+               } else {
+                       bgp_clear_stale_route(peer, afi, safi);
+               }
+       }
+
+       bgp_graceful_restart_timer_off(peer);
 
        return 0;
 }
index a05921e7b60fc4c9619bd3f00d06da907b0747b1..6bdefd0e9b7e3ae6fd3b4635d587b27f433dfb13 100644 (file)
@@ -606,8 +606,15 @@ static int bgp_capability_llgr(struct peer *peer,
                                        peer->host, iana_afi2str(pkt_afi),
                                        iana_safi2str(pkt_safi));
                } else {
+                       if (bgp_debug_neighbor_events(peer))
+                               zlog_debug(
+                                       "%s Addr-family %s/%s(afi/safi) Long-lived Graceful Restart capability stale time %u sec",
+                                       peer->host, iana_afi2str(pkt_afi),
+                                       iana_safi2str(pkt_safi), stale_time);
+
                        peer->llgr[afi][safi].flags = flags;
-                       peer->llgr[afi][safi].stale_time = stale_time;
+                       peer->llgr[afi][safi].stale_time =
+                               MIN(stale_time, peer->bgp->llgr_stale_time);
                        SET_FLAG(peer->af_cap[afi][safi], PEER_CAP_LLGR_AF_RCV);
                }
        }
index cc0ee9e8f26f2a7cc11bb5f130009a75a0e9cc29..4bb08404d604e6a7c574ff7a977ea2f3f89c515a 100644 (file)
@@ -1869,11 +1869,11 @@ static int bgp_update_receive(struct peer *peer, bgp_size_t size)
                        if (peer->nsf[afi][safi])
                                bgp_clear_stale_route(peer, afi, safi);
 
-                        zlog_info(
-                            "%s: rcvd End-of-RIB for %s from %s in vrf %s",
-                            __func__, get_afi_safi_str(afi, safi, false),
-                            peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
-                }
+                       zlog_info(
+                               "%s: rcvd End-of-RIB for %s from %s in vrf %s",
+                               __func__, get_afi_safi_str(afi, safi, false),
+                               peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
+               }
        }
 
        /* Everything is done.  We unintern temporary structures which
index d8c2c98f4f2011d56f25d43684c7412e3c03b082..d3e058dd09d9ec7616fe74c7489595f2597fd8c3 100644 (file)
@@ -632,6 +632,33 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
        newattr = new->attr;
        existattr = exist->attr;
 
+       /* A BGP speaker that has advertised the "Long-lived Graceful Restart
+        * Capability" to a neighbor MUST perform the following upon receiving
+        * a route from that neighbor with the "LLGR_STALE" community, or upon
+        * attaching the "LLGR_STALE" community itself per Section 4.2:
+        *
+        * Treat the route as the least-preferred in route selection (see
+        * below). See the Risks of Depreferencing Routes section (Section 5.2)
+        * for a discussion of potential risks inherent in doing this.
+        */
+       if (newattr->community &&
+           community_include(newattr->community, COMMUNITY_LLGR_STALE)) {
+               if (debug)
+                       zlog_debug(
+                               "%s: %s wins over %s due to LLGR_STALE community",
+                               pfx_buf, new_buf, exist_buf);
+               return 0;
+       }
+
+       if (existattr->community &&
+           community_include(existattr->community, COMMUNITY_LLGR_STALE)) {
+               if (debug)
+                       zlog_debug(
+                               "%s: %s loses to %s due to LLGR_STALE community",
+                               pfx_buf, new_buf, exist_buf);
+               return 1;
+       }
+
        new_p = bgp_dest_get_prefix(new->net);
 
        /* For EVPN routes, we cannot just go by local vs remote, we have to
@@ -1708,6 +1735,36 @@ static void bgp_peer_as_override(struct bgp *bgp, afi_t afi, safi_t safi,
        }
 }
 
+void bgp_attr_add_llgr_community(struct attr *attr)
+{
+       struct community *old;
+       struct community *new;
+       struct community *merge;
+       struct community *llgr;
+
+       old = attr->community;
+       llgr = community_str2com("llgr-stale");
+
+       assert(llgr);
+
+       if (old) {
+               merge = community_merge(community_dup(old), llgr);
+
+               if (old->refcnt == 0)
+                       community_free(&old);
+
+               new = community_uniq_sort(merge);
+               community_free(&merge);
+       } else {
+               new = community_dup(llgr);
+       }
+
+       community_free(&llgr);
+
+       attr->community = new;
+       attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_COMMUNITIES);
+}
+
 void bgp_attr_add_gshut_community(struct attr *attr)
 {
        struct community *old;
@@ -2183,6 +2240,20 @@ bool subgroup_announce_check(struct bgp_dest *dest, struct bgp_path_info *pi,
                }
        }
 
+       /* A BGP speaker that has advertised the "Long-lived Graceful Restart
+        * Capability" to a neighbor MUST perform the following upon receiving
+        * a route from that neighbor with the "LLGR_STALE" community, or upon
+        * attaching the "LLGR_STALE" community itself per Section 4.2:
+        *
+        * The route SHOULD NOT be advertised to any neighbor from which the
+        * Long-lived Graceful Restart Capability has not been received.
+        */
+       if (attr->community &&
+           community_include(attr->community, COMMUNITY_LLGR_STALE) &&
+           !CHECK_FLAG(peer->cap, PEER_CAP_LLGR_RCV) &&
+           !CHECK_FLAG(peer->cap, PEER_CAP_LLGR_ADV))
+               return false;
+
        /* After route-map has been applied, we check to see if the nexthop to
         * be carried in the attribute (that is used for the announcement) can
         * be cleared off or not. We do this in all cases where we would be
@@ -5269,6 +5340,11 @@ void bgp_clear_adj_in(struct peer *peer, afi_t afi, safi_t safi)
        }
 }
 
+/* If any of the routes from the peer have been marked with the NO_LLGR
+ * community, either as sent by the peer, or as the result of a configured
+ * policy, they MUST NOT be retained, but MUST be removed as per the normal
+ * operation of [RFC4271].
+ */
 void bgp_clear_stale_route(struct peer *peer, afi_t afi, safi_t safi)
 {
        struct bgp_dest *dest;
@@ -5291,6 +5367,14 @@ void bgp_clear_stale_route(struct peer *peer, afi_t afi, safi_t safi)
                                     pi = pi->next) {
                                        if (pi->peer != peer)
                                                continue;
+                                       if (CHECK_FLAG(
+                                                   peer->af_sflags[afi][safi],
+                                                   PEER_STATUS_LLGR_WAIT) &&
+                                           pi->attr->community &&
+                                           !community_include(
+                                                   pi->attr->community,
+                                                   COMMUNITY_NO_LLGR))
+                                               break;
                                        if (!CHECK_FLAG(pi->flags,
                                                        BGP_PATH_STALE))
                                                break;
@@ -5306,6 +5390,12 @@ void bgp_clear_stale_route(struct peer *peer, afi_t afi, safi_t safi)
                             pi = pi->next) {
                                if (pi->peer != peer)
                                        continue;
+                               if (CHECK_FLAG(peer->af_sflags[afi][safi],
+                                              PEER_STATUS_LLGR_WAIT) &&
+                                   pi->attr->community &&
+                                   !community_include(pi->attr->community,
+                                                      COMMUNITY_NO_LLGR))
+                                       break;
                                if (!CHECK_FLAG(pi->flags, BGP_PATH_STALE))
                                        break;
                                bgp_rib_remove(dest, pi, peer, afi, safi);
@@ -11412,7 +11502,7 @@ void route_vty_out_detail_header(struct vty *vty, struct bgp *bgp,
                        ", attach RT as-is for VPNv6 route filtering");
                else if (llgr_stale)
                        vty_out(vty,
-                       ", mark routes to be retained for a longer time. Requeres support for Long-lived BGP Graceful Restart");
+                               ", mark routes to be retained for a longer time. Requires support for Long-lived BGP Graceful Restart");
                else if (no_llgr)
                        vty_out(vty,
                        ", mark routes to not be treated according to Long-lived BGP Graceful Restart operations");
index 3e957630d8ca041f03fb217a475e60dbf3cf28a5..741690a0280517d65f9e941548028712451aa40d 100644 (file)
@@ -802,6 +802,7 @@ extern int bgp_path_info_cmp_compatible(struct bgp *bgp,
                                        struct bgp_path_info *exist,
                                        char *pfx_buf, afi_t afi, safi_t safi,
                                        enum bgp_path_selection_reason *reason);
+extern void bgp_attr_add_llgr_community(struct attr *attr);
 extern void bgp_attr_add_gshut_community(struct attr *attr);
 
 extern void bgp_best_selection(struct bgp *bgp, struct bgp_dest *dest,
index 5a6d95666f35ccb20d1ec4683607833421c84744..789c156da412919c36b89b15a5436ec12c84a1c1 100644 (file)
@@ -1656,8 +1656,7 @@ void bgp_peer_conf_if_to_su_update(struct peer *peer)
        hash_get(peer->bgp->peerhash, peer, hash_alloc_intern);
 }
 
-static void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi,
-                                              safi_t safi)
+void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi, safi_t safi)
 {
        struct bgp_dest *dest, *ndest;
        struct bgp_table *table;
index 7c9106560118cc63e8bad609e56513c195930141..2fcebe69d5d20775e78680735312986516b0d9eb 100644 (file)
@@ -1402,6 +1402,8 @@ struct peer {
 #define PEER_STATUS_BORR_RECEIVED (1U << 8) /* BoRR received from peer */
 #define PEER_STATUS_EORR_SEND (1U << 9) /* EoRR send to peer */
 #define PEER_STATUS_EORR_RECEIVED (1U << 10) /* EoRR received from peer */
+/* LLGR aware peer */
+#define PEER_STATUS_LLGR_WAIT (1U << 11)
 
        /* Configured timer values. */
        _Atomic uint32_t holdtime;
@@ -1433,6 +1435,7 @@ struct peer {
        struct thread *t_pmax_restart;
        struct thread *t_gr_restart;
        struct thread *t_gr_stale;
+       struct thread *t_llgr_stale[AFI_MAX][SAFI_MAX];
        struct thread *t_generate_updgrp_packets;
        struct thread *t_process_packet;
        struct thread *t_process_packet_error;
@@ -2473,4 +2476,7 @@ void peer_nsf_stop(struct peer *peer);
 
 void peer_tcp_mss_set(struct peer *peer, uint32_t tcp_mss);
 void peer_tcp_mss_unset(struct peer *peer);
+
+extern void bgp_recalculate_afi_safi_bestpaths(struct bgp *bgp, afi_t afi,
+                                              safi_t safi);
 #endif /* _QUAGGA_BGPD_H */