During replace of a NHE from upper proto in zebra_nhg_proto_add(),
- rib_handle_nhg_replace() is invoked with old NHE where we walk all
RNs/REs & replace the re->nhe whose address points to old NHE.
- In this walk, if prev re->nhe refcnt is decremented to 0, we free up
the memory which the old NHE is pointing to.
Later in zebra_nhg_proto_add(), we end up accessing this freed memory
and crash.
Logs:
1380766 2023/08/16 22:34:11.994671 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (
70312519[2756/2762/2810]) 2 => 1
1380773 2023/08/16 22:34:11.994678 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (
70312519[2756/2762/2810]) 1 => 0
1380777 2023/08/16 22:34:11.994844 ZEBRA: [JE46R-G2NEE] zebra_nhg_release: nhe 0x56091d890840 (
70312519[2756/2762/2810])
1380778 2023/08/16 22:34:11.994849 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (
70312519[2756/2762/2810]), refcnt 0
1380782 2023/08/16 22:34:11.995000 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (0[]), refcnt 0
1380783 2023/08/16 22:34:11.995011 ZEBRA: lib/memory.c:84: mt_count_free(): assertion (mt->n_alloc) failed
Backtrace:
0 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
1 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
2 0x00007f833f636648 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
3 0x00007f833f63cd6a in ?? () from /lib/x86_64-linux-gnu/libc.so.6
4 0x00007f833f63cfb4 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
5 0x00007f833f63fbc8 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
6 0x00007f833f64172a in malloc () from /lib/x86_64-linux-gnu/libc.so.6
7 0x00007f833f6c3fd2 in backtrace_symbols () from /lib/x86_64-linux-gnu/libc.so.6
8 0x00007f833f9013fc in zlog_backtrace_sigsafe (priority=priority@entry=2, program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:222
9 0x00007f833f901593 in zlog_signal (signo=signo@entry=6, action=action@entry=0x7f833f988ee8 "aborting...", siginfo_v=siginfo_v@entry=0x7ffee1ce4a30,
program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:154
10 0x00007f833f92dbd1 in core_handler (signo=6, siginfo=0x7ffee1ce4a30, context=<optimized out>) at lib/sigevent.c:254
11 <signal handler called>
12 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
13 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
14 0x00007f833f958f96 in _zlog_assert_failed (xref=xref@entry=0x7f833f9e4080 <_xref.10705>, extra=extra@entry=0x0) at lib/zlog.c:680
15 0x00007f833f905400 in mt_count_free (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:84
16 mt_count_free (ptr=0x51, mt=0x7f833fa02800 <MTYPE_NH_LABEL>) at lib/memory.c:80
17 qfree (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:140
18 0x00007f833f90799c in nexthop_del_labels (nexthop=nexthop@entry=0x56091d776640) at lib/nexthop.c:563
19 0x00007f833f907b91 in nexthop_free (nexthop=0x56091d776640) at lib/nexthop.c:393
20 0x00007f833f907be8 in nexthops_free (nexthop=<optimized out>) at lib/nexthop.c:408
21 0x000056091c21aa76 in zebra_nhg_free_members (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
22 zebra_nhg_free (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
23 0x000056091c21bab2 in zebra_nhg_proto_add (id=<optimized out>, type=9, instance=<optimized out>, session=0, nhg=nhg@entry=0x56091d7da028, afi=afi@entry=AFI_UNSPEC)
at zebra/zebra_nhg.c:3532
24 0x000056091c22bc4e in process_subq_nhg (lnode=0x56091d88c540) at zebra/zebra_rib.c:2689
25 process_subq (qindex=META_QUEUE_NHG, subq=0x56091d24cea0) at zebra/zebra_rib.c:3290
26 meta_queue_process (dummy=<optimized out>, data=0x56091d24d4c0) at zebra/zebra_rib.c:3343
27 0x00007f833f9492c8 in work_queue_run (thread=0x7ffee1ce55a0) at lib/workqueue.c:285
28 0x00007f833f93f60d in thread_call (thread=thread@entry=0x7ffee1ce55a0) at lib/thread.c:2008
29 0x00007f833f8f9888 in frr_run (master=0x56091d068660) at lib/libfrr.c:1223
30 0x000056091c1b8366 in main (argc=12, argv=0x7ffee1ce5988) at zebra/main.c:551
Issue:
3492162
Ticket#
3492162
Signed-off-by: Chirag Shah <chirag@nvidia.com>
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
(cherry picked from commit
27ccfd9aa69f05646439e46db6e25945a9ce8c19)
struct nhg_hash_entry *new_nhghe);
/* NHG replace has happend, we have to update route_entry pointers to new one */
-void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry,
- struct nhg_hash_entry *new_entry);
+int rib_handle_nhg_replace(struct nhg_hash_entry *old_entry,
+ struct nhg_hash_entry *new_entry);
#define route_entry_dump(prefix, src, re) _route_entry_dump(__func__, prefix, src, re)
extern void _route_entry_dump(const char *func, union prefixconstptr pp,
struct nhg_connected *rb_node_dep = NULL;
struct nexthop *newhop;
bool replace = false;
+ int ret = 0;
if (!nhg->nexthop) {
if (IS_ZEBRA_DEBUG_NHG)
if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED))
zebra_nhg_increment_ref(old);
- rib_handle_nhg_replace(old, new);
+ ret = rib_handle_nhg_replace(old, new);
+ if (ret)
+ /*
+ * if ret > 0, some previous re->nhe has freed the
+ * address to which old_entry is pointing. Hence mark
+ * the old NHE as NULL
+ */
+ old = NULL;
+ else {
+ /* We have to decrement its singletons
+ * because some might not exist in NEW.
+ */
+ if (!zebra_nhg_depends_is_empty(old)) {
+ frr_each (nhg_connected_tree, &old->nhg_depends,
+ rb_node_dep)
+ zebra_nhg_decrement_ref(
+ rb_node_dep->nhe);
+ }
- /* We have to decrement its singletons
- * because some might not exist in NEW.
- */
- if (!zebra_nhg_depends_is_empty(old)) {
- frr_each (nhg_connected_tree, &old->nhg_depends,
- rb_node_dep)
- zebra_nhg_decrement_ref(rb_node_dep->nhe);
+ /* Dont call the dec API, we dont want to uninstall the ID */
+ old->refcnt = 0;
+ EVENT_OFF(old->timer);
+ zebra_nhg_free(old);
+ old = NULL;
}
-
- /* Dont call the dec API, we dont want to uninstall the ID */
- old->refcnt = 0;
- EVENT_OFF(old->timer);
- zebra_nhg_free(old);
- old = NULL;
}
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
done:
/* Detach / deref previous nhg */
- if (old_nhg)
+
+ if (old_nhg) {
+ /*
+ * Return true if we are deleting the previous NHE
+ * Note: we dont check the return value of the function anywhere
+ * except at rib_handle_nhg_replace().
+ */
+ if (old_nhg->refcnt == 1)
+ ret = 1;
+
zebra_nhg_decrement_ref(old_nhg);
+ }
return ret;
}
-void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry,
- struct nhg_hash_entry *new_entry)
+int rib_handle_nhg_replace(struct nhg_hash_entry *old_entry,
+ struct nhg_hash_entry *new_entry)
{
struct zebra_router_table *zrt;
struct route_node *rn;
struct route_entry *re, *next;
+ int ret = 0;
if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL)
zlog_debug("%s: replacing routes nhe (%u) OLD %p NEW %p",
rn = srcdest_route_next(rn)) {
RNODE_FOREACH_RE_SAFE (rn, re, next) {
if (re->nhe && re->nhe == old_entry)
- route_entry_update_nhe(re, new_entry);
+ ret += route_entry_update_nhe(re,
+ new_entry);
}
}
}
+
+ /*
+ * if ret > 0, some previous re->nhe has freed the address to which
+ * old_entry is pointing.
+ */
+ return ret;
}
struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id,