From 38775a3c5fdfece7002f0495a76e3c96a7d6417c Mon Sep 17 00:00:00 2001 From: Donatas Abraitis Date: Thu, 27 Jan 2022 11:14:11 +0200 Subject: [PATCH] bgpd: Handle TCP connection errors with connection callbacks for RPKI Before this patch, if the first server crashed or was terminated, RPKI connection keeps _active_ forever. With this patch, if we catch connection problem (FATAL), we reset RPKI, to switch to another available RTR-Server by using configured preference. Signed-off-by: Donatas Abraitis --- bgpd/bgp_rpki.c | 77 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/bgpd/bgp_rpki.c b/bgpd/bgp_rpki.c index 6a89a7195c..e5027359e7 100644 --- a/bgpd/bgp_rpki.c +++ b/bgpd/bgp_rpki.c @@ -63,6 +63,8 @@ #include "bgpd/bgp_rpki_clippy.c" #endif +static struct thread *t_rpki; + DEFINE_MTYPE_STATIC(BGPD, BGP_RPKI_CACHE, "BGP RPKI Cache server"); DEFINE_MTYPE_STATIC(BGPD, BGP_RPKI_CACHE_GROUP, "BGP RPKI Cache server group"); @@ -361,14 +363,13 @@ static int bgpd_sync_callback(struct thread *thread) struct listnode *node; struct prefix *prefix; struct pfx_record rec; + int retval; + int socket = THREAD_FD(thread); - thread_add_read(bm->master, bgpd_sync_callback, NULL, - rpki_sync_socket_bgpd, NULL); + thread_add_read(bm->master, bgpd_sync_callback, NULL, socket, &t_rpki); if (atomic_load_explicit(&rtr_update_overflow, memory_order_seq_cst)) { - while (read(rpki_sync_socket_bgpd, &rec, - sizeof(struct pfx_record)) - != -1) + while (read(socket, &rec, sizeof(rec) != -1)) ; atomic_store_explicit(&rtr_update_overflow, 0, @@ -377,12 +378,20 @@ static int bgpd_sync_callback(struct thread *thread) return 0; } - int retval = - read(rpki_sync_socket_bgpd, &rec, sizeof(struct pfx_record)); - if (retval != sizeof(struct pfx_record)) { - RPKI_DEBUG("Could not read from rpki_sync_socket_bgpd"); + retval = read(socket, &rec, sizeof(rec)); + if (retval != sizeof(rec)) { + RPKI_DEBUG("Could not read from socket"); return retval; } + + /* RTR-Server crashed/terminated, let's handle and switch + * to the second available RTR-Server according to preference. + */ + if (rec.socket && rec.socket->state == RTR_ERROR_FATAL) { + reset(true); + return 0; + } + prefix = pfx_record_to_prefix(&rec); afi_t afi = (rec.prefix.ver == LRTR_IPV4) ? AFI_IP : AFI_IP6; @@ -441,29 +450,53 @@ static void revalidate_all_routes(void) { struct bgp *bgp; struct listnode *node; + afi_t afi; + safi_t safi; for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp)) { struct peer *peer; struct listnode *peer_listnode; for (ALL_LIST_ELEMENTS_RO(bgp->peer, peer_listnode, peer)) { + FOREACH_AFI_SAFI (afi, safi) { + if (!peer->afc_nego[afi][safi]) + continue; - for (size_t i = 0; i < 2; i++) { - safi_t safi; - afi_t afi = (i == 0) ? AFI_IP : AFI_IP6; - - for (safi = SAFI_UNICAST; safi < SAFI_MAX; - safi++) { - if (!peer->bgp->rib[afi][safi]) - continue; + if (!peer->bgp->rib[afi][safi]) + continue; - bgp_soft_reconfig_in(peer, afi, safi); - } + bgp_soft_reconfig_in(peer, afi, safi); } } } } +static void rpki_connection_status_cb(const struct rtr_mgr_group *group + __attribute__((unused)), + enum rtr_mgr_status status, + const struct rtr_socket *socket + __attribute__((unused)), + void *data __attribute__((unused))) +{ + struct pfx_record rec = {0}; + int retval; + + if (rtr_is_stopping || + atomic_load_explicit(&rtr_update_overflow, memory_order_seq_cst)) + return; + + if (status == RTR_MGR_ERROR) + rec.socket = socket; + + retval = write(rpki_sync_socket_rtr, &rec, sizeof(rec)); + if (retval == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) + atomic_store_explicit(&rtr_update_overflow, 1, + memory_order_seq_cst); + + else if (retval != sizeof(rec)) + RPKI_DEBUG("Could not write to rpki_sync_socket_rtr"); +} + static void rpki_update_cb_sync_rtr(struct pfx_table *p __attribute__((unused)), const struct pfx_record rec, const bool added __attribute__((unused))) @@ -505,9 +538,8 @@ static void rpki_init_sync_socket(void) goto err; } - thread_add_read(bm->master, bgpd_sync_callback, NULL, - rpki_sync_socket_bgpd, NULL); + rpki_sync_socket_bgpd, &t_rpki); return; @@ -575,7 +607,8 @@ static int start(void) RPKI_DEBUG("Polling period: %d", polling_period); ret = rtr_mgr_init(&rtr_config, groups, groups_len, polling_period, expire_interval, retry_interval, - rpki_update_cb_sync_rtr, NULL, NULL, NULL); + rpki_update_cb_sync_rtr, NULL, + rpki_connection_status_cb, NULL); if (ret == RTR_ERROR) { RPKI_DEBUG("Init rtr_mgr failed."); return ERROR; -- 2.39.5