]> git.puffer.fish Git - mirror/frr.git/commitdiff
bgpd: Handle TCP connection errors with connection callbacks for RPKI 10647/head
authorDonatas Abraitis <donatas.abraitis@gmail.com>
Thu, 27 Jan 2022 09:14:11 +0000 (11:14 +0200)
committermergify-bot <noreply@mergify.com>
Thu, 24 Feb 2022 06:56:39 +0000 (06:56 +0000)
Before this patch, if the first server crashed or was terminated, RPKI
connection keeps _active_ forever.

With this patch, if we catch connection problem (FATAL), we reset RPKI, to
switch to another available RTR-Server by using configured preference.

Signed-off-by: Donatas Abraitis <donatas.abraitis@gmail.com>
(cherry picked from commit 38775a3c5fdfece7002f0495a76e3c96a7d6417c)

bgpd/bgp_rpki.c

index 6a89a7195c3aa3e683e26f75e76a4eeb927e962a..e5027359e71a56feb0dfff31deea4afee1c8460a 100644 (file)
@@ -63,6 +63,8 @@
 #include "bgpd/bgp_rpki_clippy.c"
 #endif
 
+static struct thread *t_rpki;
+
 DEFINE_MTYPE_STATIC(BGPD, BGP_RPKI_CACHE, "BGP RPKI Cache server");
 DEFINE_MTYPE_STATIC(BGPD, BGP_RPKI_CACHE_GROUP, "BGP RPKI Cache server group");
 
@@ -361,14 +363,13 @@ static int bgpd_sync_callback(struct thread *thread)
        struct listnode *node;
        struct prefix *prefix;
        struct pfx_record rec;
+       int retval;
+       int socket = THREAD_FD(thread);
 
-       thread_add_read(bm->master, bgpd_sync_callback, NULL,
-                       rpki_sync_socket_bgpd, NULL);
+       thread_add_read(bm->master, bgpd_sync_callback, NULL, socket, &t_rpki);
 
        if (atomic_load_explicit(&rtr_update_overflow, memory_order_seq_cst)) {
-               while (read(rpki_sync_socket_bgpd, &rec,
-                           sizeof(struct pfx_record))
-                      != -1)
+               while (read(socket, &rec, sizeof(rec) != -1))
                        ;
 
                atomic_store_explicit(&rtr_update_overflow, 0,
@@ -377,12 +378,20 @@ static int bgpd_sync_callback(struct thread *thread)
                return 0;
        }
 
-       int retval =
-               read(rpki_sync_socket_bgpd, &rec, sizeof(struct pfx_record));
-       if (retval != sizeof(struct pfx_record)) {
-               RPKI_DEBUG("Could not read from rpki_sync_socket_bgpd");
+       retval = read(socket, &rec, sizeof(rec));
+       if (retval != sizeof(rec)) {
+               RPKI_DEBUG("Could not read from socket");
                return retval;
        }
+
+       /* RTR-Server crashed/terminated, let's handle and switch
+        * to the second available RTR-Server according to preference.
+        */
+       if (rec.socket && rec.socket->state == RTR_ERROR_FATAL) {
+               reset(true);
+               return 0;
+       }
+
        prefix = pfx_record_to_prefix(&rec);
 
        afi_t afi = (rec.prefix.ver == LRTR_IPV4) ? AFI_IP : AFI_IP6;
@@ -441,29 +450,53 @@ static void revalidate_all_routes(void)
 {
        struct bgp *bgp;
        struct listnode *node;
+       afi_t afi;
+       safi_t safi;
 
        for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp)) {
                struct peer *peer;
                struct listnode *peer_listnode;
 
                for (ALL_LIST_ELEMENTS_RO(bgp->peer, peer_listnode, peer)) {
+                       FOREACH_AFI_SAFI (afi, safi) {
+                               if (!peer->afc_nego[afi][safi])
+                                       continue;
 
-                       for (size_t i = 0; i < 2; i++) {
-                               safi_t safi;
-                               afi_t afi = (i == 0) ? AFI_IP : AFI_IP6;
-
-                               for (safi = SAFI_UNICAST; safi < SAFI_MAX;
-                                    safi++) {
-                                       if (!peer->bgp->rib[afi][safi])
-                                               continue;
+                               if (!peer->bgp->rib[afi][safi])
+                                       continue;
 
-                                       bgp_soft_reconfig_in(peer, afi, safi);
-                               }
+                               bgp_soft_reconfig_in(peer, afi, safi);
                        }
                }
        }
 }
 
+static void rpki_connection_status_cb(const struct rtr_mgr_group *group
+                                     __attribute__((unused)),
+                                     enum rtr_mgr_status status,
+                                     const struct rtr_socket *socket
+                                     __attribute__((unused)),
+                                     void *data __attribute__((unused)))
+{
+       struct pfx_record rec = {0};
+       int retval;
+
+       if (rtr_is_stopping ||
+           atomic_load_explicit(&rtr_update_overflow, memory_order_seq_cst))
+               return;
+
+       if (status == RTR_MGR_ERROR)
+               rec.socket = socket;
+
+       retval = write(rpki_sync_socket_rtr, &rec, sizeof(rec));
+       if (retval == -1 && (errno == EAGAIN || errno == EWOULDBLOCK))
+               atomic_store_explicit(&rtr_update_overflow, 1,
+                                     memory_order_seq_cst);
+
+       else if (retval != sizeof(rec))
+               RPKI_DEBUG("Could not write to rpki_sync_socket_rtr");
+}
+
 static void rpki_update_cb_sync_rtr(struct pfx_table *p __attribute__((unused)),
                                    const struct pfx_record rec,
                                    const bool added __attribute__((unused)))
@@ -505,9 +538,8 @@ static void rpki_init_sync_socket(void)
                goto err;
        }
 
-
        thread_add_read(bm->master, bgpd_sync_callback, NULL,
-                       rpki_sync_socket_bgpd, NULL);
+                       rpki_sync_socket_bgpd, &t_rpki);
 
        return;
 
@@ -575,7 +607,8 @@ static int start(void)
        RPKI_DEBUG("Polling period: %d", polling_period);
        ret = rtr_mgr_init(&rtr_config, groups, groups_len, polling_period,
                           expire_interval, retry_interval,
-                          rpki_update_cb_sync_rtr, NULL, NULL, NULL);
+                          rpki_update_cb_sync_rtr, NULL,
+                          rpki_connection_status_cb, NULL);
        if (ret == RTR_ERROR) {
                RPKI_DEBUG("Init rtr_mgr failed.");
                return ERROR;