From 27ccfd9aa69f05646439e46db6e25945a9ce8c19 Mon Sep 17 00:00:00 2001 From: Rajasekar Raja Date: Thu, 17 Aug 2023 00:47:05 -0700 Subject: [PATCH] zebra: Fix zebra crash when replacing NHE during shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit During replace of a NHE from upper proto in zebra_nhg_proto_add(), - rib_handle_nhg_replace() is invoked with old NHE where we walk all RNs/REs & replace the re->nhe whose address points to old NHE. - In this walk, if prev re->nhe refcnt is decremented to 0, we free up the memory which the old NHE is pointing to. Later in zebra_nhg_proto_add(), we end up accessing this freed memory and crash. Logs: 1380766 2023/08/16 22:34:11.994671 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 2 => 1 1380773 2023/08/16 22:34:11.994678 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 1 => 0 1380777 2023/08/16 22:34:11.994844 ZEBRA: [JE46R-G2NEE] zebra_nhg_release: nhe 0x56091d890840 (70312519[2756/2762/2810]) 1380778 2023/08/16 22:34:11.994849 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (70312519[2756/2762/2810]), refcnt 0 1380782 2023/08/16 22:34:11.995000 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (0[]), refcnt 0 1380783 2023/08/16 22:34:11.995011 ZEBRA: lib/memory.c:84: mt_count_free(): assertion (mt->n_alloc) failed Backtrace: 0  0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6 1  0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 2  0x00007f833f636648 in ?? () from /lib/x86_64-linux-gnu/libc.so.6 3  0x00007f833f63cd6a in ?? () from /lib/x86_64-linux-gnu/libc.so.6 4  0x00007f833f63cfb4 in ?? () from /lib/x86_64-linux-gnu/libc.so.6 5  0x00007f833f63fbc8 in ?? () from /lib/x86_64-linux-gnu/libc.so.6 6  0x00007f833f64172a in malloc () from /lib/x86_64-linux-gnu/libc.so.6 7  0x00007f833f6c3fd2 in backtrace_symbols () from /lib/x86_64-linux-gnu/libc.so.6 8  0x00007f833f9013fc in zlog_backtrace_sigsafe (priority=priority@entry=2, program_counter=program_counter@entry=0x7f833f5f48eb ) at lib/log.c:222 9  0x00007f833f901593 in zlog_signal (signo=signo@entry=6, action=action@entry=0x7f833f988ee8 "aborting...", siginfo_v=siginfo_v@entry=0x7ffee1ce4a30,     program_counter=program_counter@entry=0x7f833f5f48eb ) at lib/log.c:154 10 0x00007f833f92dbd1 in core_handler (signo=6, siginfo=0x7ffee1ce4a30, context=) at lib/sigevent.c:254 11 12 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6 13 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 14 0x00007f833f958f96 in _zlog_assert_failed (xref=xref@entry=0x7f833f9e4080 <_xref.10705>, extra=extra@entry=0x0) at lib/zlog.c:680 15 0x00007f833f905400 in mt_count_free (mt=0x7f833fa02800 , ptr=0x51) at lib/memory.c:84 16 mt_count_free (ptr=0x51, mt=0x7f833fa02800 ) at lib/memory.c:80 17 qfree (mt=0x7f833fa02800 , ptr=0x51) at lib/memory.c:140 18 0x00007f833f90799c in nexthop_del_labels (nexthop=nexthop@entry=0x56091d776640) at lib/nexthop.c:563 19 0x00007f833f907b91 in nexthop_free (nexthop=0x56091d776640) at lib/nexthop.c:393 20 0x00007f833f907be8 in nexthops_free (nexthop=) at lib/nexthop.c:408 21 0x000056091c21aa76 in zebra_nhg_free_members (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628 22 zebra_nhg_free (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628 23 0x000056091c21bab2 in zebra_nhg_proto_add (id=, type=9, instance=, session=0, nhg=nhg@entry=0x56091d7da028, afi=afi@entry=AFI_UNSPEC)     at zebra/zebra_nhg.c:3532 24 0x000056091c22bc4e in process_subq_nhg (lnode=0x56091d88c540) at zebra/zebra_rib.c:2689 25 process_subq (qindex=META_QUEUE_NHG, subq=0x56091d24cea0) at zebra/zebra_rib.c:3290 26 meta_queue_process (dummy=, data=0x56091d24d4c0) at zebra/zebra_rib.c:3343 27 0x00007f833f9492c8 in work_queue_run (thread=0x7ffee1ce55a0) at lib/workqueue.c:285 28 0x00007f833f93f60d in thread_call (thread=thread@entry=0x7ffee1ce55a0) at lib/thread.c:2008 29 0x00007f833f8f9888 in frr_run (master=0x56091d068660) at lib/libfrr.c:1223 30 0x000056091c1b8366 in main (argc=12, argv=0x7ffee1ce5988) at zebra/main.c:551 Issue: 3492162 Ticket# 3492162 Signed-off-by: Chirag Shah Signed-off-by: Rajasekar Raja --- zebra/rib.h | 4 ++-- zebra/zebra_nhg.c | 38 ++++++++++++++++++++++++-------------- zebra/zebra_rib.c | 26 ++++++++++++++++++++++---- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/zebra/rib.h b/zebra/rib.h index e20084f8cf..e70b5c1423 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -337,8 +337,8 @@ int route_entry_update_nhe(struct route_entry *re, struct nhg_hash_entry *new_nhghe); /* NHG replace has happend, we have to update route_entry pointers to new one */ -void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, - struct nhg_hash_entry *new_entry); +int rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry); #define route_entry_dump(prefix, src, re) _route_entry_dump(__func__, prefix, src, re) extern void _route_entry_dump(const char *func, union prefixconstptr pp, diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index d3c86e2a37..3a56bf2a50 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -3420,6 +3420,7 @@ struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, struct nhg_connected *rb_node_dep = NULL; struct nexthop *newhop; bool replace = false; + int ret = 0; if (!nhg->nexthop) { if (IS_ZEBRA_DEBUG_NHG) @@ -3517,22 +3518,31 @@ struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED)) zebra_nhg_increment_ref(old); - rib_handle_nhg_replace(old, new); + ret = rib_handle_nhg_replace(old, new); + if (ret) + /* + * if ret > 0, some previous re->nhe has freed the + * address to which old_entry is pointing. Hence mark + * the old NHE as NULL + */ + old = NULL; + else { + /* We have to decrement its singletons + * because some might not exist in NEW. + */ + if (!zebra_nhg_depends_is_empty(old)) { + frr_each (nhg_connected_tree, &old->nhg_depends, + rb_node_dep) + zebra_nhg_decrement_ref( + rb_node_dep->nhe); + } - /* We have to decrement its singletons - * because some might not exist in NEW. - */ - if (!zebra_nhg_depends_is_empty(old)) { - frr_each (nhg_connected_tree, &old->nhg_depends, - rb_node_dep) - zebra_nhg_decrement_ref(rb_node_dep->nhe); + /* Dont call the dec API, we dont want to uninstall the ID */ + old->refcnt = 0; + EVENT_OFF(old->timer); + zebra_nhg_free(old); + old = NULL; } - - /* Dont call the dec API, we dont want to uninstall the ID */ - old->refcnt = 0; - EVENT_OFF(old->timer); - zebra_nhg_free(old); - old = NULL; } if (IS_ZEBRA_DEBUG_NHG_DETAIL) diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 5a32cf6bb9..c05d69a2dd 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -428,18 +428,29 @@ int route_entry_update_nhe(struct route_entry *re, done: /* Detach / deref previous nhg */ - if (old_nhg) + + if (old_nhg) { + /* + * Return true if we are deleting the previous NHE + * Note: we dont check the return value of the function anywhere + * except at rib_handle_nhg_replace(). + */ + if (old_nhg->refcnt == 1) + ret = 1; + zebra_nhg_decrement_ref(old_nhg); + } return ret; } -void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, - struct nhg_hash_entry *new_entry) +int rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry) { struct zebra_router_table *zrt; struct route_node *rn; struct route_entry *re, *next; + int ret = 0; if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: replacing routes nhe (%u) OLD %p NEW %p", @@ -451,10 +462,17 @@ void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, rn = srcdest_route_next(rn)) { RNODE_FOREACH_RE_SAFE (rn, re, next) { if (re->nhe && re->nhe == old_entry) - route_entry_update_nhe(re, new_entry); + ret += route_entry_update_nhe(re, + new_entry); } } } + + /* + * if ret > 0, some previous re->nhe has freed the address to which + * old_entry is pointing. + */ + return ret; } struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, -- 2.39.5