From 9bf904cc8b05b0771665291d9a7df34a253ac6de Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Tue, 11 Sep 2018 08:13:42 -0400 Subject: [PATCH] bgpd: Try to notice when configuration changes during startup During peer startup there exists the possibility that both locally and remote peers try to start communication at the same time. In addition it is possible for local configuration to change at the same time this is going on. When this happens try to notice that the remote peer may be in opensent or openconfirm and if so we need to restart the connection from both sides. Additionally try to write a bit of extra code in peer_xfer_conn to notice when this happens and to emit a error message to the end user about this happening so that it can be cleaned up. Signed-off-by: Donald Sharp --- bgpd/bgp_errors.c | 6 ++++++ bgpd/bgp_errors.h | 1 + bgpd/bgp_fsm.c | 14 ++++++++++++++ bgpd/bgpd.c | 18 ++++++++++++++++++ 4 files changed, 39 insertions(+) diff --git a/bgpd/bgp_errors.c b/bgpd/bgp_errors.c index bd42901c2d..7cebd0e484 100644 --- a/bgpd/bgp_errors.c +++ b/bgpd/bgp_errors.c @@ -462,6 +462,12 @@ static struct log_ref ferr_bgp_err[] = { .description = "The BGP flowspec subsystem has detected that there was a failure for installation/removal/modification of Flowspec from the dataplane", .suggestion = "Gather log files from the router and open an issue, Restart FRR" }, + { + .code = EC_BGP_DOPPELGANGER_CONFIG, + .title = "BGP has detected a configuration overwrite during peer collision resolution", + .description = "As part of BGP startup, the peer and ourselves can start connections to each other at the same time. During this process BGP received additional configuration, but it was only applied to one of the two nascent connections. Depending on the result of collision detection and resolution this configuration might be lost. To remedy this, after performing collision detection and resolution the peer session has been reset in order to apply the new configuration.", + .suggestion = "Gather data and open a Issue so that this developmental escape can be fixed, the peer should have been reset", + }, { .code = END_FERR, } diff --git a/bgpd/bgp_errors.h b/bgpd/bgp_errors.h index 853f2da222..13bd318e27 100644 --- a/bgpd/bgp_errors.h +++ b/bgpd/bgp_errors.h @@ -99,6 +99,7 @@ enum bgp_log_refs { EC_BGP_CAPABILITY_VENDOR, EC_BGP_CAPABILITY_UNKNOWN, EC_BGP_INVALID_NEXTHOP_LENGTH, + EC_BGP_DOPPELGANGER_CONFIG, }; extern void bgp_error_init(void); diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 384d2bca82..65b8b5bd2d 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -125,6 +125,20 @@ static struct peer *peer_xfer_conn(struct peer *from_peer) if (!peer || !CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE)) return from_peer; + /* + * Let's check that we are not going to loose known configuration + * state based upon doppelganger rules. + */ + FOREACH_AFI_SAFI (afi, safi) { + if (from_peer->afc[afi][safi] != peer->afc[afi][safi]) { + flog_err( + EC_BGP_DOPPELGANGER_CONFIG, + "from_peer->afc[%d][%d] is not the same as what we are overwriting", + afi, safi); + return NULL; + } + } + if (bgp_debug_neighbor_events(peer)) zlog_debug("%s: peer transfer %p fd %d -> %p fd %d)", from_peer->host, from_peer, from_peer->fd, peer, diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index e4dedc2420..fcb7eca0f1 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1802,6 +1802,7 @@ static void peer_group2peer_config_copy_af(struct peer_group *group, static int peer_activate_af(struct peer *peer, afi_t afi, safi_t safi) { int active; + struct peer *other; if (CHECK_FLAG(peer->sflags, PEER_STATUS_GROUP)) { flog_err(EC_BGP_PEER_GROUP, "%s was called for peer-group %s", @@ -1852,6 +1853,23 @@ static int peer_activate_af(struct peer *peer, afi_t afi, safi_t safi) bgp_notify_send(peer, BGP_NOTIFY_CEASE, BGP_NOTIFY_CEASE_CONFIG_CHANGE); } + /* + * If we are turning on a AFI/SAFI locally and we've + * started bringing a peer up, we need to tell + * the other peer to restart because we might loose + * configuration here because when the doppelganger + * gets to a established state due to how + * we resolve we could just overwrite the afi/safi + * activation. + */ + other = peer->doppelganger; + if (other + && (other->status == OpenSent + || other->status == OpenConfirm)) { + other->last_reset = PEER_DOWN_AF_ACTIVATE; + bgp_notify_send(other, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } } return 0; -- 2.39.5