summaryrefslogtreecommitdiff
path: root/bgpd
diff options
context:
space:
mode:
authorDavid Lamparter <equinox@opensourcerouting.org>2021-04-22 11:04:52 +0200
committerDavid Lamparter <equinox@opensourcerouting.org>2022-05-19 12:14:40 +0200
commitbd9fb6f368049bd5f1f6a2b7bc97fbd51c9300cc (patch)
treef172dc19ee071f21387bc212197d4913b99a881a /bgpd
parent18028bdb9b5d0e5cd6081d8df387551c206ce7ab (diff)
bgpd: implement SendHoldTimer
As described by https://www.ietf.org/archive/id/draft-spaghetti-idr-bgp-sendholdtimer-04.html Since this replicates the HoldTime check on the receiver that is already part of the protocol, I do not believe it necessary to wait for IETF progress on this draft. It's just replicating an existing element of the protocol at the other side of the session. Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
Diffstat (limited to 'bgpd')
-rw-r--r--bgpd/bgp_errors.c12
-rw-r--r--bgpd/bgp_errors.h2
-rw-r--r--bgpd/bgp_io.c10
-rw-r--r--bgpd/bgp_packet.c37
-rw-r--r--bgpd/bgpd.h5
5 files changed, 63 insertions, 3 deletions
diff --git a/bgpd/bgp_errors.c b/bgpd/bgp_errors.c
index 193c96a169..9f87f8ce7a 100644
--- a/bgpd/bgp_errors.c
+++ b/bgpd/bgp_errors.c
@@ -182,6 +182,12 @@ static struct log_ref ferr_bgp_warn[] = {
.suggestion = "Gather log files and open an Issue, restart FRR",
},
{
+ .code = EC_BGP_SENDQ_STUCK_WARN,
+ .title = "BGP has been unable to send anything to a peer for an extended time",
+ .description = "The BGP peer does not seem to be receiving or processing any data received from us, causing updates to be delayed.",
+ .suggestion = "Check connectivity to the peer and that it is not overloaded",
+ },
+ {
.code = END_FERR,
}
};
@@ -481,6 +487,12 @@ static struct log_ref ferr_bgp_err[] = {
.suggestion = "Add a v6 LL address to the outgoing interfaces as per RFC",
},
{
+ .code = EC_BGP_SENDQ_STUCK_PROPER,
+ .title = "BGP is shutting down a peer due to being unable to send anything for an extended time",
+ .description = "No BGP updates were successfully sent to the peer for more than twice the holdtime.",
+ .suggestion = "Check connectivity to the peer and that it is not overloaded",
+ },
+ {
.code = END_FERR,
}
};
diff --git a/bgpd/bgp_errors.h b/bgpd/bgp_errors.h
index 0b71af3fc6..0c0917c49e 100644
--- a/bgpd/bgp_errors.h
+++ b/bgpd/bgp_errors.h
@@ -102,6 +102,8 @@ enum bgp_log_refs {
EC_BGP_INVALID_BGP_INSTANCE,
EC_BGP_INVALID_ROUTE,
EC_BGP_NO_LL_ADDRESS_AVAILABLE,
+ EC_BGP_SENDQ_STUCK_WARN,
+ EC_BGP_SENDQ_STUCK_PROPER,
};
extern void bgp_error_init(void);
diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c
index bd0dfb3a6d..75d34a84e0 100644
--- a/bgpd/bgp_io.c
+++ b/bgpd/bgp_io.c
@@ -298,6 +298,7 @@ static uint16_t bgp_write(struct peer *peer)
unsigned int iovsz;
unsigned int strmsz;
unsigned int total_written;
+ time_t now;
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
memory_order_relaxed);
@@ -430,19 +431,22 @@ static uint16_t bgp_write(struct peer *peer)
}
done : {
+ now = bgp_clock();
/*
* Update last_update if UPDATEs were written.
* Note: that these are only updated at end,
* not per message (i.e., per loop)
*/
if (uo)
- atomic_store_explicit(&peer->last_update, bgp_clock(),
+ atomic_store_explicit(&peer->last_update, now,
memory_order_relaxed);
/* If we TXed any flavor of packet */
- if (update_last_write)
- atomic_store_explicit(&peer->last_write, bgp_clock(),
+ if (update_last_write) {
+ atomic_store_explicit(&peer->last_write, now,
memory_order_relaxed);
+ peer->last_sendq_ok = now;
+ }
}
return status;
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index cacc4995fc..e4697fdb2b 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -122,8 +122,45 @@ void bgp_packet_set_size(struct stream *s)
*/
static void bgp_packet_add(struct peer *peer, struct stream *s)
{
+ intmax_t delta;
+ uint32_t holdtime;
+
frr_with_mutex(&peer->io_mtx) {
+ /* if the queue is empty, reset the "last OK" timestamp to
+ * now, otherwise if we write another packet immediately
+ * after it'll get confused
+ */
+ if (!stream_fifo_count_safe(peer->obuf))
+ peer->last_sendq_ok = bgp_clock();
+
stream_fifo_push(peer->obuf, s);
+
+ delta = bgp_clock() - peer->last_sendq_ok;
+ holdtime = atomic_load_explicit(&peer->holdtime,
+ memory_order_relaxed);
+
+ /* Note that when we're here, we're adding some packet to the
+ * OutQ. That includes keepalives when there is nothing to
+ * do, so there's a guarantee we pass by here once in a while.
+ *
+ * That implies there is no need to go set up another separate
+ * timer that ticks down SendHoldTime, as we'll be here sooner
+ * or later anyway and will see the checks below failing.
+ */
+ if (delta > 2 * (intmax_t)holdtime) {
+ flog_err(
+ EC_BGP_SENDQ_STUCK_PROPER,
+ "%s has not made any SendQ progress for 2 holdtimes, terminating session",
+ peer->host);
+ BGP_EVENT_ADD(peer, TCP_fatal_error);
+ } else if (delta > (intmax_t)holdtime &&
+ bgp_clock() - peer->last_sendq_warn > 5) {
+ flog_warn(
+ EC_BGP_SENDQ_STUCK_WARN,
+ "%s has not made any SendQ progress for 1 holdtime, peer overloaded?",
+ peer->host);
+ peer->last_sendq_warn = bgp_clock();
+ }
}
}
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 6b58c101a7..d380d9d678 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -1533,6 +1533,11 @@ struct peer {
/* timestamp when the last msg was written */
_Atomic time_t last_update;
+ /* only updated under io_mtx.
+ * last_sendq_warn is only for ratelimiting log warning messages.
+ */
+ time_t last_sendq_ok, last_sendq_warn;
+
/* Notify data. */
struct bgp_notify notify;