From 8c9d306c8d2adf34d11bb1566fda6f0a8bfe7990 Mon Sep 17 00:00:00 2001 From: Samanvitha B Bhargav Date: Sun, 15 Jan 2023 20:40:54 -0800 Subject: [PATCH] bgpd: Fix crash during shutdown due to race condition MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit [New LWP 2524] [New LWP 2539] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Core was generated by `/opt/avi/bin/bgpd -f /run/frr/avi_ns3_bgpd.config -i /opt/avi/etc/avi_ns3_bgpd.'. Program terminated with signal SIGABRT, Aborted. [Current thread is 1 (Thread 0x7f92ac8f1740 (LWP 2524))] 0 0x00007f92acb3800b in raise () from /lib/x86_64-linux-gnu/libc.so.6 [Current thread is 1 (Thread 0x7f92ac8f1740 (LWP 2524))] 0 0x00007f92acb3800b in raise () from /lib/x86_64-linux-gnu/libc.so.6 1 0x00007f92acb17859 in abort () from /lib/x86_64-linux-gnu/libc.so.6 2 0x00007f92acb17729 in ?? () from /lib/x86_64-linux-gnu/libc.so.6 3 0x00007f92acb28fd6 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6 4 0x00007f92accf2164 in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 5 0x000055b46be1ef63 in bgp_keepalives_wake () at bgpd/bgp_keepalives.c:311 6 0x000055b46be1f111 in bgp_keepalives_stop (fpt=0x55b46cfacf20, result=) at bgpd/bgp_keepalives.c:323 7 0x00007f92acea9521 in frr_pthread_stop (fpt=0x55b46cfacf20, result=result@entry=0x0) at lib/frr_pthread.c:176 8 0x00007f92acea9586 in frr_pthread_stop_all () at lib/frr_pthread.c:188 9 0x000055b46bdde54a in bgp_pthreads_finish () at bgpd/bgpd.c:8150 10 0x000055b46bd696ca in bgp_exit (status=0) at bgpd/bgp_main.c:210 11 sigint () at bgpd/bgp_main.c:154 12 0x00007f92acecc1e9 in quagga_sigevent_process () at lib/sigevent.c:105 13 0x00007f92aced689a in thread_fetch (m=m@entry=0x55b46cf23540, fetch=fetch@entry=0x7fff95379238) at lib/thread.c:1487 14 0x00007f92aceb2681 in frr_run (master=0x55b46cf23540) at lib/libfrr.c:1010 15 0x000055b46bd676f4 in main (argc=11, argv=0x7fff953795a8) at bgpd/bgp_main.c:482 Root cause: This is due to race condition between main thread & keepalive thread during clean-up. This happens when the keepalive thread is processing a wake signal owning the mutex, when meanwhile the main thread tries to stop the keepalives thread. In main thread, the keepalive thread’s running bit (fpt->running) is set to false, without taking the mutex & then it blocks on mutex. Meanwhile, keepalive thread which owns the mutex sees that the running bit is false & executes bgp_keepalives_finish() which also frees up mutex. Main thread that is waiting on mutex with pthread_mutex_lock() will cause core while trying to access mutex. Fix: Take the lock in main thread while setting the fpt->running to false. Signed-off-by: Samanvitha B Bhargav --- bgpd/bgp_keepalives.c | 17 +++++++---------- bgpd/bgp_keepalives.h | 12 ------------ 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/bgpd/bgp_keepalives.c b/bgpd/bgp_keepalives.c index 604d6c9509..d9c653f562 100644 --- a/bgpd/bgp_keepalives.c +++ b/bgpd/bgp_keepalives.c @@ -269,8 +269,9 @@ void bgp_keepalives_on(struct peer *peer) peer_lock(peer); } SET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON); + /* Force the keepalive thread to wake up */ + pthread_cond_signal(peerhash_cond); } - bgp_keepalives_wake(); } void bgp_keepalives_off(struct peer *peer) @@ -300,19 +301,15 @@ void bgp_keepalives_off(struct peer *peer) } } -void bgp_keepalives_wake(void) -{ - frr_with_mutex (peerhash_mtx) { - pthread_cond_signal(peerhash_cond); - } -} - int bgp_keepalives_stop(struct frr_pthread *fpt, void **result) { assert(fpt->running); - atomic_store_explicit(&fpt->running, false, memory_order_relaxed); - bgp_keepalives_wake(); + frr_with_mutex (peerhash_mtx) { + atomic_store_explicit(&fpt->running, false, + memory_order_relaxed); + pthread_cond_signal(peerhash_cond); + } pthread_join(fpt->thread, result); return 0; diff --git a/bgpd/bgp_keepalives.h b/bgpd/bgp_keepalives.h index d1cb7d2462..e8d0d3d401 100644 --- a/bgpd/bgp_keepalives.h +++ b/bgpd/bgp_keepalives.h @@ -73,18 +73,6 @@ extern void bgp_keepalives_init(void); */ extern void *bgp_keepalives_start(void *arg); -/** - * Poking function for keepalives pthread. - * - * Under normal circumstances the pthread will automatically wake itself - * whenever it is necessary to do work. This function may be used to force the - * thread to wake up and see if there is any work to do, or if it is time to - * die. - * - * It is not necessary to call this after bgp_keepalives_on(). - */ -extern void bgp_keepalives_wake(void); - /** * Stops the thread and blocks until it terminates. */ -- 2.39.5