summaryrefslogtreecommitdiff
path: root/bgpd
diff options
context:
space:
mode:
authorDonald Sharp <sharpd@nvidia.com>2024-10-24 14:17:51 -0400
committerDonald Sharp <sharpd@nvidia.com>2025-01-24 13:24:40 -0500
commit4d52a7056c3c5b886f2ae226cc4ebcd54f26d042 (patch)
treeb077c6c3be0703d01f2262d41652a62799b6396c /bgpd
parent989328c7e47c7bbcf3364d5424ac17f49fd05c57 (diff)
bgpd: Fix deadlock in bgp_keepalive and master pthreads
(gdb) bt 0 futex_wait (private=0, expected=2, futex_word=0x5c438e9a98d8) at ../sysdeps/nptl/futex-internal.h:146 1 __GI___lll_lock_wait (futex=futex@entry=0x5c438e9a98d8, private=0) at ./nptl/lowlevellock.c:49 2 0x00007af16d698002 in lll_mutex_lock_optimized (mutex=0x5c438e9a98d8) at ./nptl/pthread_mutex_lock.c:48 3 ___pthread_mutex_lock (mutex=0x5c438e9a98d8) at ./nptl/pthread_mutex_lock.c:93 4 0x00005c4369c17e70 in _frr_mtx_lock (mutex=0x5c438e9a98d8, func=0x5c4369dc2750 <__func__.265> "bgp_notify_send_internal") at ./lib/frr_pthread.h:258 5 0x00005c4369c1a07a in bgp_notify_send_internal (connection=0x5c438e9a98c0, code=8 '\b', sub_code=0 '\000', data=0x0, datalen=0, use_curr=true) at bgpd/bgp_packet.c:928 6 0x00005c4369c1a707 in bgp_notify_send (connection=0x5c438e9a98c0, code=8 '\b', sub_code=0 '\000') at bgpd/bgp_packet.c:1069 7 0x00005c4369bea422 in bgp_stop_with_notify (connection=0x5c438e9a98c0, code=8 '\b', sub_code=0 '\000') at bgpd/bgp_fsm.c:1597 8 0x00005c4369c18480 in bgp_packet_add (connection=0x5c438e9a98c0, peer=0x5c438e9b6010, s=0x7af15c06bf70) at bgpd/bgp_packet.c:151 9 0x00005c4369c19816 in bgp_keepalive_send (peer=0x5c438e9b6010) at bgpd/bgp_packet.c:639 10 0x00005c4369bf01fd in peer_process (hb=0x5c438ed05520, arg=0x7af16bdffaf0) at bgpd/bgp_keepalives.c:111 11 0x00007af16dacd8e6 in hash_iterate (hash=0x7af15c000be0, func=0x5c4369bf005e <peer_process>, arg=0x7af16bdffaf0) at lib/hash.c:252 12 0x00005c4369bf0679 in bgp_keepalives_start (arg=0x5c438e0db110) at bgpd/bgp_keepalives.c:214 13 0x00007af16dac9932 in frr_pthread_inner (arg=0x5c438e0db110) at lib/frr_pthread.c:180 14 0x00007af16d694ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 15 0x00007af16d726850 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 (gdb) The bgp keepalive pthread gets deadlocked with itself and consequently the bgp master pthread gets locked when it attempts to lock the peerhash_mtx, since it is also locked by the keepalive_pthread The keepalive pthread is locking the peerhash_mtx in bgp_keepalives_start. Next the connection->io_mtx mutex in bgp_keepalives_send is locked and then when it notices a problem it invokes bgp_stop_with_notify which relocks the same mutex ( and of course the relock causes it to get stuck on itself ). This generates a deadlock condition. Modify the code to only hold the connection->io_mtx as short as possible. Signed-off-by: Donald Sharp <sharpd@nvidia.com>
Diffstat (limited to 'bgpd')
-rw-r--r--bgpd/bgp_packet.c59
1 files changed, 27 insertions, 32 deletions
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index 62be7ffbf7..8051dfb622 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -122,42 +122,37 @@ static void bgp_packet_add(struct peer_connection *connection,
peer->last_sendq_ok = monotime(NULL);
stream_fifo_push(connection->obuf, s);
+ }
- delta = monotime(NULL) - peer->last_sendq_ok;
+ delta = monotime(NULL) - peer->last_sendq_ok;
- if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
- holdtime = atomic_load_explicit(&peer->holdtime,
- memory_order_relaxed);
- else
- holdtime = peer->bgp->default_holdtime;
+ if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
+ holdtime = atomic_load_explicit(&peer->holdtime, memory_order_relaxed);
+ else
+ holdtime = peer->bgp->default_holdtime;
- sendholdtime = holdtime * 2;
+ sendholdtime = holdtime * 2;
- /* Note that when we're here, we're adding some packet to the
- * OutQ. That includes keepalives when there is nothing to
- * do, so there's a guarantee we pass by here once in a while.
- *
- * That implies there is no need to go set up another separate
- * timer that ticks down SendHoldTime, as we'll be here sooner
- * or later anyway and will see the checks below failing.
- */
- if (!holdtime) {
- /* no holdtime, do nothing. */
- } else if (delta > sendholdtime) {
- flog_err(
- EC_BGP_SENDQ_STUCK_PROPER,
- "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
- peer, sendholdtime);
- bgp_stop_with_notify(connection,
- BGP_NOTIFY_SEND_HOLD_ERR, 0);
- } else if (delta > (intmax_t)holdtime &&
- monotime(NULL) - peer->last_sendq_warn > 5) {
- flog_warn(
- EC_BGP_SENDQ_STUCK_WARN,
- "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
- peer, holdtime);
- peer->last_sendq_warn = monotime(NULL);
- }
+ /* Note that when we're here, we're adding some packet to the
+ * OutQ. That includes keepalives when there is nothing to
+ * do, so there's a guarantee we pass by here once in a while.
+ *
+ * That implies there is no need to go set up another separate
+ * timer that ticks down SendHoldTime, as we'll be here sooner
+ * or later anyway and will see the checks below failing.
+ */
+ if (!holdtime) {
+ /* no holdtime, do nothing. */
+ } else if (delta > sendholdtime) {
+ flog_err(EC_BGP_SENDQ_STUCK_PROPER,
+ "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
+ peer, sendholdtime);
+ bgp_stop_with_notify(connection, BGP_NOTIFY_SEND_HOLD_ERR, 0);
+ } else if (delta > (intmax_t)holdtime && monotime(NULL) - peer->last_sendq_warn > 5) {
+ flog_warn(EC_BGP_SENDQ_STUCK_WARN,
+ "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
+ peer, holdtime);
+ peer->last_sendq_warn = monotime(NULL);
}
}