.description = "BGP is in the process of building NLRI information for a peer and has discovered an inconsistent internal state",
.suggestion = "Gather log files and open an Issue, restart FRR",
},
+ {
+ .code = EC_BGP_SENDQ_STUCK_WARN,
+ .title = "BGP has been unable to send anything to a peer for an extended time",
+ .description = "The BGP peer does not seem to be receiving or processing any data received from us, causing updates to be delayed.",
+ .suggestion = "Check connectivity to the peer and that it is not overloaded",
+ },
{
.code = END_FERR,
}
.description = "BGP when using a v6 peer requires a v6 LL address to be configured on the outgoing interface as per RFC 4291 section 2.1",
.suggestion = "Add a v6 LL address to the outgoing interfaces as per RFC",
},
+ {
+ .code = EC_BGP_SENDQ_STUCK_PROPER,
+ .title = "BGP is shutting down a peer due to being unable to send anything for an extended time",
+ .description = "No BGP updates were successfully sent to the peer for more than twice the holdtime.",
+ .suggestion = "Check connectivity to the peer and that it is not overloaded",
+ },
{
.code = END_FERR,
}
EC_BGP_INVALID_BGP_INSTANCE,
EC_BGP_INVALID_ROUTE,
EC_BGP_NO_LL_ADDRESS_AVAILABLE,
+ EC_BGP_SENDQ_STUCK_WARN,
+ EC_BGP_SENDQ_STUCK_PROPER,
};
extern void bgp_error_init(void);
unsigned int iovsz;
unsigned int strmsz;
unsigned int total_written;
+ time_t now;
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
memory_order_relaxed);
}
done : {
+ now = bgp_clock();
/*
* Update last_update if UPDATEs were written.
* Note: that these are only updated at end,
* not per message (i.e., per loop)
*/
if (uo)
- atomic_store_explicit(&peer->last_update, bgp_clock(),
+ atomic_store_explicit(&peer->last_update, now,
memory_order_relaxed);
/* If we TXed any flavor of packet */
- if (update_last_write)
- atomic_store_explicit(&peer->last_write, bgp_clock(),
+ if (update_last_write) {
+ atomic_store_explicit(&peer->last_write, now,
memory_order_relaxed);
+ peer->last_sendq_ok = now;
+ }
}
return status;
*/
static void bgp_packet_add(struct peer *peer, struct stream *s)
{
+ intmax_t delta;
+ uint32_t holdtime;
+
frr_with_mutex(&peer->io_mtx) {
+ /* if the queue is empty, reset the "last OK" timestamp to
+ * now, otherwise if we write another packet immediately
+ * after it'll get confused
+ */
+ if (!stream_fifo_count_safe(peer->obuf))
+ peer->last_sendq_ok = bgp_clock();
+
stream_fifo_push(peer->obuf, s);
+
+ delta = bgp_clock() - peer->last_sendq_ok;
+ holdtime = atomic_load_explicit(&peer->holdtime,
+ memory_order_relaxed);
+
+ /* Note that when we're here, we're adding some packet to the
+ * OutQ. That includes keepalives when there is nothing to
+ * do, so there's a guarantee we pass by here once in a while.
+ *
+ * That implies there is no need to go set up another separate
+ * timer that ticks down SendHoldTime, as we'll be here sooner
+ * or later anyway and will see the checks below failing.
+ */
+ if (delta > 2 * (intmax_t)holdtime) {
+ flog_err(
+ EC_BGP_SENDQ_STUCK_PROPER,
+ "%s has not made any SendQ progress for 2 holdtimes, terminating session",
+ peer->host);
+ BGP_EVENT_ADD(peer, TCP_fatal_error);
+ } else if (delta > (intmax_t)holdtime &&
+ bgp_clock() - peer->last_sendq_warn > 5) {
+ flog_warn(
+ EC_BGP_SENDQ_STUCK_WARN,
+ "%s has not made any SendQ progress for 1 holdtime, peer overloaded?",
+ peer->host);
+ peer->last_sendq_warn = bgp_clock();
+ }
}
}
/* timestamp when the last msg was written */
_Atomic time_t last_update;
+ /* only updated under io_mtx.
+ * last_sendq_warn is only for ratelimiting log warning messages.
+ */
+ time_t last_sendq_ok, last_sendq_warn;
+
/* Notify data. */
struct bgp_notify notify;