diff options
| author | Donald Sharp <sharpd@nvidia.com> | 2024-10-24 14:17:51 -0400 | 
|---|---|---|
| committer | Donald Sharp <sharpd@nvidia.com> | 2025-01-24 13:25:25 -0500 | 
| commit | 4785e84f4329dc0bbe2942a0cf9b7e7df8cfd914 (patch) | |
| tree | baa5f43b740e137656641d5b44dcdcf7936b7a8d | |
| parent | 3ff48febe7b95d69fec6e14e686c9497ec517dfd (diff) | |
bgpd: Fix deadlock in bgp_keepalive and master pthreads
(gdb) bt
0  futex_wait (private=0, expected=2, futex_word=0x5c438e9a98d8) at ../sysdeps/nptl/futex-internal.h:146
1  __GI___lll_lock_wait (futex=futex@entry=0x5c438e9a98d8, private=0) at ./nptl/lowlevellock.c:49
2  0x00007af16d698002 in lll_mutex_lock_optimized (mutex=0x5c438e9a98d8) at ./nptl/pthread_mutex_lock.c:48
3  ___pthread_mutex_lock (mutex=0x5c438e9a98d8) at ./nptl/pthread_mutex_lock.c:93
4  0x00005c4369c17e70 in _frr_mtx_lock (mutex=0x5c438e9a98d8, func=0x5c4369dc2750 <__func__.265> "bgp_notify_send_internal") at ./lib/frr_pthread.h:258
5  0x00005c4369c1a07a in bgp_notify_send_internal (connection=0x5c438e9a98c0, code=8 '\b', sub_code=0 '\000', data=0x0, datalen=0, use_curr=true) at bgpd/bgp_packet.c:928
6  0x00005c4369c1a707 in bgp_notify_send (connection=0x5c438e9a98c0, code=8 '\b', sub_code=0 '\000') at bgpd/bgp_packet.c:1069
7  0x00005c4369bea422 in bgp_stop_with_notify (connection=0x5c438e9a98c0, code=8 '\b', sub_code=0 '\000') at bgpd/bgp_fsm.c:1597
8  0x00005c4369c18480 in bgp_packet_add (connection=0x5c438e9a98c0, peer=0x5c438e9b6010, s=0x7af15c06bf70) at bgpd/bgp_packet.c:151
9  0x00005c4369c19816 in bgp_keepalive_send (peer=0x5c438e9b6010) at bgpd/bgp_packet.c:639
10 0x00005c4369bf01fd in peer_process (hb=0x5c438ed05520, arg=0x7af16bdffaf0) at bgpd/bgp_keepalives.c:111
11 0x00007af16dacd8e6 in hash_iterate (hash=0x7af15c000be0, func=0x5c4369bf005e <peer_process>, arg=0x7af16bdffaf0) at lib/hash.c:252
12 0x00005c4369bf0679 in bgp_keepalives_start (arg=0x5c438e0db110) at bgpd/bgp_keepalives.c:214
13 0x00007af16dac9932 in frr_pthread_inner (arg=0x5c438e0db110) at lib/frr_pthread.c:180
14 0x00007af16d694ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
15 0x00007af16d726850 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
(gdb)
The bgp keepalive pthread gets deadlocked with itself and consequently
the bgp master pthread gets locked when it attempts to lock
the peerhash_mtx, since it is also locked by the keepalive_pthread
The keepalive pthread is locking the peerhash_mtx in
bgp_keepalives_start.  Next the connection->io_mtx mutex in
bgp_keepalives_send is locked and then when it notices a problem it invokes
bgp_stop_with_notify which relocks the same mutex ( and of course
the relock causes it to get stuck on itself ).  This generates a
deadlock condition.
Modify the code to only hold the connection->io_mtx as short as
possible.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
| -rw-r--r-- | bgpd/bgp_packet.c | 59 | 
1 files changed, 27 insertions, 32 deletions
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 010a31a3a8..c0b5deb6fc 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -122,42 +122,37 @@ static void bgp_packet_add(struct peer_connection *connection,  			peer->last_sendq_ok = monotime(NULL);  		stream_fifo_push(connection->obuf, s); +	} -		delta = monotime(NULL) - peer->last_sendq_ok; +	delta = monotime(NULL) - peer->last_sendq_ok; -		if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) -			holdtime = atomic_load_explicit(&peer->holdtime, -							memory_order_relaxed); -		else -			holdtime = peer->bgp->default_holdtime; +	if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) +		holdtime = atomic_load_explicit(&peer->holdtime, memory_order_relaxed); +	else +		holdtime = peer->bgp->default_holdtime; -		sendholdtime = holdtime * 2; +	sendholdtime = holdtime * 2; -		/* Note that when we're here, we're adding some packet to the -		 * OutQ.  That includes keepalives when there is nothing to -		 * do, so there's a guarantee we pass by here once in a while. -		 * -		 * That implies there is no need to go set up another separate -		 * timer that ticks down SendHoldTime, as we'll be here sooner -		 * or later anyway and will see the checks below failing. -		 */ -		if (!holdtime) { -			/* no holdtime, do nothing. */ -		} else if (delta > sendholdtime) { -			flog_err( -				EC_BGP_SENDQ_STUCK_PROPER, -				"%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session", -				peer, sendholdtime); -			bgp_stop_with_notify(connection, -					     BGP_NOTIFY_SEND_HOLD_ERR, 0); -		} else if (delta > (intmax_t)holdtime && -			   monotime(NULL) - peer->last_sendq_warn > 5) { -			flog_warn( -				EC_BGP_SENDQ_STUCK_WARN, -				"%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?", -				peer, holdtime); -			peer->last_sendq_warn = monotime(NULL); -		} +	/* Note that when we're here, we're adding some packet to the +	 * OutQ.  That includes keepalives when there is nothing to +	 * do, so there's a guarantee we pass by here once in a while. +	 * +	 * That implies there is no need to go set up another separate +	 * timer that ticks down SendHoldTime, as we'll be here sooner +	 * or later anyway and will see the checks below failing. +	 */ +	if (!holdtime) { +		/* no holdtime, do nothing. */ +	} else if (delta > sendholdtime) { +		flog_err(EC_BGP_SENDQ_STUCK_PROPER, +			 "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session", +			 peer, sendholdtime); +		bgp_stop_with_notify(connection, BGP_NOTIFY_SEND_HOLD_ERR, 0); +	} else if (delta > (intmax_t)holdtime && monotime(NULL) - peer->last_sendq_warn > 5) { +		flog_warn(EC_BGP_SENDQ_STUCK_WARN, +			  "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?", +			  peer, holdtime); +		peer->last_sendq_warn = monotime(NULL);  	}  }  | 
