diff options
| author | David Lamparter <equinox@opensourcerouting.org> | 2021-04-22 11:04:52 +0200 |
|---|---|---|
| committer | David Lamparter <equinox@opensourcerouting.org> | 2022-05-19 12:14:40 +0200 |
| commit | bd9fb6f368049bd5f1f6a2b7bc97fbd51c9300cc (patch) | |
| tree | f172dc19ee071f21387bc212197d4913b99a881a /bgpd | |
| parent | 18028bdb9b5d0e5cd6081d8df387551c206ce7ab (diff) | |
bgpd: implement SendHoldTimer
As described by
https://www.ietf.org/archive/id/draft-spaghetti-idr-bgp-sendholdtimer-04.html
Since this replicates the HoldTime check on the receiver that is already
part of the protocol, I do not believe it necessary to wait for IETF
progress on this draft. It's just replicating an existing element of
the protocol at the other side of the session.
Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
Diffstat (limited to 'bgpd')
| -rw-r--r-- | bgpd/bgp_errors.c | 12 | ||||
| -rw-r--r-- | bgpd/bgp_errors.h | 2 | ||||
| -rw-r--r-- | bgpd/bgp_io.c | 10 | ||||
| -rw-r--r-- | bgpd/bgp_packet.c | 37 | ||||
| -rw-r--r-- | bgpd/bgpd.h | 5 |
5 files changed, 63 insertions, 3 deletions
diff --git a/bgpd/bgp_errors.c b/bgpd/bgp_errors.c index 193c96a169..9f87f8ce7a 100644 --- a/bgpd/bgp_errors.c +++ b/bgpd/bgp_errors.c @@ -182,6 +182,12 @@ static struct log_ref ferr_bgp_warn[] = { .suggestion = "Gather log files and open an Issue, restart FRR", }, { + .code = EC_BGP_SENDQ_STUCK_WARN, + .title = "BGP has been unable to send anything to a peer for an extended time", + .description = "The BGP peer does not seem to be receiving or processing any data received from us, causing updates to be delayed.", + .suggestion = "Check connectivity to the peer and that it is not overloaded", + }, + { .code = END_FERR, } }; @@ -481,6 +487,12 @@ static struct log_ref ferr_bgp_err[] = { .suggestion = "Add a v6 LL address to the outgoing interfaces as per RFC", }, { + .code = EC_BGP_SENDQ_STUCK_PROPER, + .title = "BGP is shutting down a peer due to being unable to send anything for an extended time", + .description = "No BGP updates were successfully sent to the peer for more than twice the holdtime.", + .suggestion = "Check connectivity to the peer and that it is not overloaded", + }, + { .code = END_FERR, } }; diff --git a/bgpd/bgp_errors.h b/bgpd/bgp_errors.h index 0b71af3fc6..0c0917c49e 100644 --- a/bgpd/bgp_errors.h +++ b/bgpd/bgp_errors.h @@ -102,6 +102,8 @@ enum bgp_log_refs { EC_BGP_INVALID_BGP_INSTANCE, EC_BGP_INVALID_ROUTE, EC_BGP_NO_LL_ADDRESS_AVAILABLE, + EC_BGP_SENDQ_STUCK_WARN, + EC_BGP_SENDQ_STUCK_PROPER, }; extern void bgp_error_init(void); diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c index bd0dfb3a6d..75d34a84e0 100644 --- a/bgpd/bgp_io.c +++ b/bgpd/bgp_io.c @@ -298,6 +298,7 @@ static uint16_t bgp_write(struct peer *peer) unsigned int iovsz; unsigned int strmsz; unsigned int total_written; + time_t now; wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta, memory_order_relaxed); @@ -430,19 +431,22 @@ static uint16_t bgp_write(struct peer *peer) } done : { + now = bgp_clock(); /* * Update last_update if UPDATEs were written. * Note: that these are only updated at end, * not per message (i.e., per loop) */ if (uo) - atomic_store_explicit(&peer->last_update, bgp_clock(), + atomic_store_explicit(&peer->last_update, now, memory_order_relaxed); /* If we TXed any flavor of packet */ - if (update_last_write) - atomic_store_explicit(&peer->last_write, bgp_clock(), + if (update_last_write) { + atomic_store_explicit(&peer->last_write, now, memory_order_relaxed); + peer->last_sendq_ok = now; + } } return status; diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index cacc4995fc..e4697fdb2b 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -122,8 +122,45 @@ void bgp_packet_set_size(struct stream *s) */ static void bgp_packet_add(struct peer *peer, struct stream *s) { + intmax_t delta; + uint32_t holdtime; + frr_with_mutex(&peer->io_mtx) { + /* if the queue is empty, reset the "last OK" timestamp to + * now, otherwise if we write another packet immediately + * after it'll get confused + */ + if (!stream_fifo_count_safe(peer->obuf)) + peer->last_sendq_ok = bgp_clock(); + stream_fifo_push(peer->obuf, s); + + delta = bgp_clock() - peer->last_sendq_ok; + holdtime = atomic_load_explicit(&peer->holdtime, + memory_order_relaxed); + + /* Note that when we're here, we're adding some packet to the + * OutQ. That includes keepalives when there is nothing to + * do, so there's a guarantee we pass by here once in a while. + * + * That implies there is no need to go set up another separate + * timer that ticks down SendHoldTime, as we'll be here sooner + * or later anyway and will see the checks below failing. + */ + if (delta > 2 * (intmax_t)holdtime) { + flog_err( + EC_BGP_SENDQ_STUCK_PROPER, + "%s has not made any SendQ progress for 2 holdtimes, terminating session", + peer->host); + BGP_EVENT_ADD(peer, TCP_fatal_error); + } else if (delta > (intmax_t)holdtime && + bgp_clock() - peer->last_sendq_warn > 5) { + flog_warn( + EC_BGP_SENDQ_STUCK_WARN, + "%s has not made any SendQ progress for 1 holdtime, peer overloaded?", + peer->host); + peer->last_sendq_warn = bgp_clock(); + } } } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 6b58c101a7..d380d9d678 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -1533,6 +1533,11 @@ struct peer { /* timestamp when the last msg was written */ _Atomic time_t last_update; + /* only updated under io_mtx. + * last_sendq_warn is only for ratelimiting log warning messages. + */ + time_t last_sendq_ok, last_sendq_warn; + /* Notify data. */ struct bgp_notify notify; |
