diff options
| author | Rajasekar Raja <rajasekarr@nvidia.com> | 2023-08-17 00:47:05 -0700 |
|---|---|---|
| committer | Donatas Abraitis <donatas@opensourcerouting.org> | 2023-09-05 16:45:46 +0300 |
| commit | 6b26eb543d7074fc064614f754a6bdc24799ff4c (patch) | |
| tree | 15affae16a9cfebfa98f19d0556df716d297dfaa | |
| parent | d55c06ff26c7b984e83de1331d0adecfedc5a26c (diff) | |
zebra: Fix zebra crash when replacing NHE during shutdown
During replace of a NHE from upper proto in zebra_nhg_proto_add(),
- rib_handle_nhg_replace() is invoked with old NHE where we walk all
RNs/REs & replace the re->nhe whose address points to old NHE.
- In this walk, if prev re->nhe refcnt is decremented to 0, we free up
the memory which the old NHE is pointing to.
Later in zebra_nhg_proto_add(), we end up accessing this freed memory
and crash.
Logs:
1380766 2023/08/16 22:34:11.994671 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 2 => 1
1380773 2023/08/16 22:34:11.994678 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 1 => 0
1380777 2023/08/16 22:34:11.994844 ZEBRA: [JE46R-G2NEE] zebra_nhg_release: nhe 0x56091d890840 (70312519[2756/2762/2810])
1380778 2023/08/16 22:34:11.994849 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (70312519[2756/2762/2810]), refcnt 0
1380782 2023/08/16 22:34:11.995000 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (0[]), refcnt 0
1380783 2023/08/16 22:34:11.995011 ZEBRA: lib/memory.c:84: mt_count_free(): assertion (mt->n_alloc) failed
Backtrace:
0 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
1 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
2 0x00007f833f636648 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
3 0x00007f833f63cd6a in ?? () from /lib/x86_64-linux-gnu/libc.so.6
4 0x00007f833f63cfb4 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
5 0x00007f833f63fbc8 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
6 0x00007f833f64172a in malloc () from /lib/x86_64-linux-gnu/libc.so.6
7 0x00007f833f6c3fd2 in backtrace_symbols () from /lib/x86_64-linux-gnu/libc.so.6
8 0x00007f833f9013fc in zlog_backtrace_sigsafe (priority=priority@entry=2, program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:222
9 0x00007f833f901593 in zlog_signal (signo=signo@entry=6, action=action@entry=0x7f833f988ee8 "aborting...", siginfo_v=siginfo_v@entry=0x7ffee1ce4a30,
program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:154
10 0x00007f833f92dbd1 in core_handler (signo=6, siginfo=0x7ffee1ce4a30, context=<optimized out>) at lib/sigevent.c:254
11 <signal handler called>
12 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
13 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
14 0x00007f833f958f96 in _zlog_assert_failed (xref=xref@entry=0x7f833f9e4080 <_xref.10705>, extra=extra@entry=0x0) at lib/zlog.c:680
15 0x00007f833f905400 in mt_count_free (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:84
16 mt_count_free (ptr=0x51, mt=0x7f833fa02800 <MTYPE_NH_LABEL>) at lib/memory.c:80
17 qfree (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:140
18 0x00007f833f90799c in nexthop_del_labels (nexthop=nexthop@entry=0x56091d776640) at lib/nexthop.c:563
19 0x00007f833f907b91 in nexthop_free (nexthop=0x56091d776640) at lib/nexthop.c:393
20 0x00007f833f907be8 in nexthops_free (nexthop=<optimized out>) at lib/nexthop.c:408
21 0x000056091c21aa76 in zebra_nhg_free_members (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
22 zebra_nhg_free (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
23 0x000056091c21bab2 in zebra_nhg_proto_add (id=<optimized out>, type=9, instance=<optimized out>, session=0, nhg=nhg@entry=0x56091d7da028, afi=afi@entry=AFI_UNSPEC)
at zebra/zebra_nhg.c:3532
24 0x000056091c22bc4e in process_subq_nhg (lnode=0x56091d88c540) at zebra/zebra_rib.c:2689
25 process_subq (qindex=META_QUEUE_NHG, subq=0x56091d24cea0) at zebra/zebra_rib.c:3290
26 meta_queue_process (dummy=<optimized out>, data=0x56091d24d4c0) at zebra/zebra_rib.c:3343
27 0x00007f833f9492c8 in work_queue_run (thread=0x7ffee1ce55a0) at lib/workqueue.c:285
28 0x00007f833f93f60d in thread_call (thread=thread@entry=0x7ffee1ce55a0) at lib/thread.c:2008
29 0x00007f833f8f9888 in frr_run (master=0x56091d068660) at lib/libfrr.c:1223
30 0x000056091c1b8366 in main (argc=12, argv=0x7ffee1ce5988) at zebra/main.c:551
Issue: 3492162
Ticket# 3492162
Signed-off-by: Chirag Shah <chirag@nvidia.com>
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
| -rw-r--r-- | zebra/rib.h | 4 | ||||
| -rw-r--r-- | zebra/zebra_nhg.c | 39 | ||||
| -rw-r--r-- | zebra/zebra_rib.c | 26 |
3 files changed, 49 insertions, 20 deletions
diff --git a/zebra/rib.h b/zebra/rib.h index dd22d79835..a02a461e89 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -344,8 +344,8 @@ int route_entry_update_nhe(struct route_entry *re, struct nhg_hash_entry *new_nhghe); /* NHG replace has happend, we have to update route_entry pointers to new one */ -void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, - struct nhg_hash_entry *new_entry); +int rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry); #define route_entry_dump(prefix, src, re) _route_entry_dump(__func__, prefix, src, re) extern void _route_entry_dump(const char *func, union prefixconstptr pp, diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 58fa245eba..b3336abc3c 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -3320,6 +3320,7 @@ struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, struct nhg_connected *rb_node_dep = NULL; struct nexthop *newhop; bool replace = false; + int ret = 0; if (!nhg->nexthop) { if (IS_ZEBRA_DEBUG_NHG) @@ -3417,22 +3418,32 @@ struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED)) zebra_nhg_increment_ref(old); - rib_handle_nhg_replace(old, new); + ret = rib_handle_nhg_replace(old, new); + if (ret) + /* + * if ret > 0, some previous re->nhe has freed the + * address to which old_entry is pointing. Hence mark + * the old NHE as NULL + */ + old = NULL; + else { + /* We have to decrement its singletons + * because some might not exist in NEW. + */ + if (!zebra_nhg_depends_is_empty(old)) { + frr_each (nhg_connected_tree, &old->nhg_depends, + rb_node_dep) + zebra_nhg_decrement_ref( + rb_node_dep->nhe); + } - /* We have to decrement its singletons - * because some might not exist in NEW. - */ - if (!zebra_nhg_depends_is_empty(old)) { - frr_each (nhg_connected_tree, &old->nhg_depends, - rb_node_dep) - zebra_nhg_decrement_ref(rb_node_dep->nhe); + /* Dont call the dec API, we dont want to uninstall the + * ID */ + old->refcnt = 0; + THREAD_OFF(old->timer); + zebra_nhg_free(old); + old = NULL; } - - /* Dont call the dec API, we dont want to uninstall the ID */ - old->refcnt = 0; - THREAD_OFF(old->timer); - zebra_nhg_free(old); - old = NULL; } if (IS_ZEBRA_DEBUG_NHG_DETAIL) diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index bfe40f1026..186740b013 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -398,18 +398,29 @@ int route_entry_update_nhe(struct route_entry *re, done: /* Detach / deref previous nhg */ - if (old_nhg) + + if (old_nhg) { + /* + * Return true if we are deleting the previous NHE + * Note: we dont check the return value of the function anywhere + * except at rib_handle_nhg_replace(). + */ + if (old_nhg->refcnt == 1) + ret = 1; + zebra_nhg_decrement_ref(old_nhg); + } return ret; } -void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, - struct nhg_hash_entry *new_entry) +int rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry) { struct zebra_router_table *zrt; struct route_node *rn; struct route_entry *re, *next; + int ret = 0; if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: replacing routes nhe (%u) OLD %p NEW %p", @@ -421,10 +432,17 @@ void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, rn = srcdest_route_next(rn)) { RNODE_FOREACH_RE_SAFE (rn, re, next) { if (re->nhe && re->nhe == old_entry) - route_entry_update_nhe(re, new_entry); + ret += route_entry_update_nhe( + re, new_entry); } } } + + /* + * if ret > 0, some previous re->nhe has freed the address to which + * old_entry is pointing. + */ + return ret; } struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, |
