From 1629c05924fe96ab487d3ba6bafbe7f11679274c Mon Sep 17 00:00:00 2001 From: "G. Paul Ziemba" Date: Sun, 30 Mar 2025 15:43:04 -0700 Subject: [PATCH] bgpd: rfapi: track outstanding rib and import timers, free mem at exit While here, also make "VPN SAFI clear" test wait for clear result (tests/topotests/bgp_rfapi_basic_sanity{,_config2}) Original RFAPI code relied on the frr timer system to remember various allocations that were supposed to be freed at future times rather than manage a parallel database. However, if bgpd is terminated before the times expire, those pending allocations are marked as memory leaks, even though they wouldn't be leaks under normal operation. This change adds some hash tables to track these outstanding allocations that are associated with pending timers, and uses those tables to free the allocations when bgpd exits. Signed-off-by: G. Paul Ziemba --- bgpd/bgpd.c | 3 + bgpd/rfapi/rfapi.c | 8 + bgpd/rfapi/rfapi_backend.h | 1 + bgpd/rfapi/rfapi_import.c | 66 +++++++++ bgpd/rfapi/rfapi_import.h | 3 + bgpd/rfapi/rfapi_rib.c | 138 ++++++++++++------ bgpd/rfapi/rfapi_rib.h | 3 + .../bgp_rfapi_basic_sanity/r3/bgpd.conf | 1 + .../scripts/cleanup_all.py | 8 +- .../r3/bgpd.conf | 1 + 10 files changed, 181 insertions(+), 51 deletions(-) diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index efb2c00fa5..2a17dd668d 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -8846,6 +8846,9 @@ void bgp_terminate(void) EVENT_OFF(bm->t_bgp_zebra_l3_vni); bgp_mac_finish(); +#if ENABLE_BGP_VNC + rfapi_terminate(); +#endif } struct peer *peer_lookup_in_view(struct vty *vty, struct bgp *bgp, diff --git a/bgpd/rfapi/rfapi.c b/bgpd/rfapi/rfapi.c index 241cbcb359..dcd1400e69 100644 --- a/bgpd/rfapi/rfapi.c +++ b/bgpd/rfapi/rfapi.c @@ -3546,6 +3546,8 @@ DEFUN (skiplist_debug_cli, void rfapi_init(void) { + rfapi_rib_init(); + rfapi_import_init(); bgp_rfapi_cfg_init(); vnc_debug_init(); @@ -3576,6 +3578,12 @@ void rfapi_init(void) rfapi_vty_init(); } +void rfapi_terminate(void) +{ + rfapi_import_terminate(); + rfapi_rib_terminate(); +} + #ifdef DEBUG_RFAPI static void rfapi_print_exported(struct bgp *bgp) { diff --git a/bgpd/rfapi/rfapi_backend.h b/bgpd/rfapi/rfapi_backend.h index 32ea0a2821..b89df74b9a 100644 --- a/bgpd/rfapi/rfapi_backend.h +++ b/bgpd/rfapi/rfapi_backend.h @@ -14,6 +14,7 @@ #include "bgpd/bgp_nexthop.h" extern void rfapi_init(void); +extern void rfapi_terminate(void); extern void vnc_zebra_init(struct event_loop *master); extern void vnc_zebra_destroy(void); diff --git a/bgpd/rfapi/rfapi_import.c b/bgpd/rfapi/rfapi_import.c index 99d8bcfce4..e2ee3b8e1c 100644 --- a/bgpd/rfapi/rfapi_import.c +++ b/bgpd/rfapi/rfapi_import.c @@ -52,14 +52,23 @@ #undef DEBUG_IT_NODES #undef DEBUG_BI_SEARCH +/* + * Hash to keep track of outstanding timers so we can force them to + * expire at shutdown time, thus freeing their allocated memory. + */ +PREDECL_HASH(rwcb); + /* * Allocated for each withdraw timer instance; freed when the timer * expires or is canceled */ struct rfapi_withdraw { + struct rwcb_item rwcbi; + struct rfapi_import_table *import_table; struct agg_node *node; struct bgp_path_info *info; + void (*timer_service_func)(struct event *t); /* for cleanup */ safi_t safi; /* used only for bulk operations */ /* * For import table node reference count checking (i.e., debugging). @@ -72,6 +81,19 @@ struct rfapi_withdraw { int lockoffset; }; +static int _rwcb_cmp(const struct rfapi_withdraw *w1, const struct rfapi_withdraw *w2) +{ + return (w1 != w2); +} + +static uint32_t _rwcb_hash(const struct rfapi_withdraw *w) +{ + return (uintptr_t)w & 0xffffffff; +} + +DECLARE_HASH(rwcb, struct rfapi_withdraw, rwcbi, _rwcb_cmp, _rwcb_hash); +static struct rwcb_head _rwcbhash; + /* * DEBUG FUNCTION * Count remote routes and compare with actively-maintained values. @@ -826,6 +848,7 @@ static void rfapiBgpInfoChainFree(struct bgp_path_info *bpi) struct rfapi_withdraw *wcb = EVENT_ARG(bpi->extra->vnc->vnc.import.timer); + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); EVENT_OFF(bpi->extra->vnc->vnc.import.timer); } @@ -2349,6 +2372,7 @@ static void rfapiWithdrawTimerVPN(struct event *t) /* This callback is responsible for the withdraw object's memory */ if (early_exit) { + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); return; } @@ -2462,6 +2486,7 @@ done: RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_MPLS_VPN, 1 + wcb->lockoffset); agg_unlock_node(wcb->node); /* decr ref count */ + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); } @@ -2705,6 +2730,7 @@ static void rfapiWithdrawTimerEncap(struct event *t) done: RFAPI_CHECK_REFCOUNT(wcb->node, SAFI_ENCAP, 1); agg_unlock_node(wcb->node); /* decr ref count */ + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); skiplist_free(vpn_node_sl); } @@ -2754,6 +2780,8 @@ rfapiBiStartWithdrawTimer(struct rfapi_import_table *import_table, wcb->node = rn; wcb->info = bpi; wcb->import_table = import_table; + wcb->timer_service_func = timer_service_func; + rwcb_add(&_rwcbhash, wcb); bgp_attr_intern(bpi->attr); if (VNC_DEBUG(VERBOSE)) { @@ -2819,6 +2847,7 @@ static void rfapiExpireEncapNow(struct rfapi_import_table *it, wcb->info = bpi; wcb->node = rn; wcb->import_table = it; + rwcb_add(&_rwcbhash, wcb); memset(&t, 0, sizeof(t)); t.arg = wcb; rfapiWithdrawTimerEncap(&t); /* frees wcb */ @@ -3057,6 +3086,7 @@ static void rfapiBgpInfoFilteredImportEncap( struct rfapi_withdraw *wcb = EVENT_ARG( bpi->extra->vnc->vnc.import.timer); + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); EVENT_OFF(bpi->extra->vnc->vnc.import .timer); @@ -3083,6 +3113,7 @@ static void rfapiBgpInfoFilteredImportEncap( wcb->info = bpi; wcb->node = rn; wcb->import_table = import_table; + rwcb_add(&_rwcbhash, wcb); memset(&t, 0, sizeof(t)); t.arg = wcb; rfapiWithdrawTimerEncap( @@ -3149,6 +3180,7 @@ static void rfapiBgpInfoFilteredImportEncap( struct rfapi_withdraw *wcb = EVENT_ARG(bpi->extra->vnc->vnc.import.timer); + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); EVENT_OFF(bpi->extra->vnc->vnc.import.timer); } @@ -3293,6 +3325,7 @@ static void rfapiExpireVpnNow(struct rfapi_import_table *it, wcb->node = rn; wcb->import_table = it; wcb->lockoffset = lockoffset; + rwcb_add(&_rwcbhash, wcb); memset(&t, 0, sizeof(t)); t.arg = wcb; rfapiWithdrawTimerVPN(&t); /* frees wcb */ @@ -3510,6 +3543,7 @@ void rfapiBgpInfoFilteredImportVPN( struct rfapi_withdraw *wcb = EVENT_ARG( bpi->extra->vnc->vnc.import.timer); + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); EVENT_OFF(bpi->extra->vnc->vnc.import .timer); @@ -3729,6 +3763,7 @@ void rfapiBgpInfoFilteredImportVPN( struct rfapi_withdraw *wcb = EVENT_ARG(bpi->extra->vnc->vnc.import.timer); + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); EVENT_OFF(bpi->extra->vnc->vnc.import.timer); } @@ -4474,6 +4509,7 @@ static void rfapiDeleteRemotePrefixesIt( RFAPI_UPDATE_ITABLE_COUNT( bpi, wcb->import_table, afi, 1); + rwcb_del(&_rwcbhash, wcb); XFREE(MTYPE_RFAPI_WITHDRAW, wcb); EVENT_OFF(bpi->extra->vnc->vnc @@ -4798,3 +4834,33 @@ uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime) else return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY; } + +void rfapi_import_init(void) +{ + rwcb_init(&_rwcbhash); +} + +void rfapi_import_terminate(void) +{ + struct rfapi_withdraw *wcb; + struct bgp_path_info *bpi; + void (*timer_service_func)(struct event *t); + struct event t; + + vnc_zlog_debug_verbose("%s: cleaning up %zu pending timers", __func__, + rwcb_count(&_rwcbhash)); + + /* + * clean up memory allocations stored in pending timers + */ + while ((wcb = rwcb_pop(&_rwcbhash))) { + bpi = wcb->info; + assert(wcb == EVENT_ARG(bpi->extra->vnc->vnc.import.timer)); + EVENT_OFF(bpi->extra->vnc->vnc.import.timer); + + timer_service_func = wcb->timer_service_func; + memset(&t, 0, sizeof(t)); + t.arg = wcb; + (*timer_service_func)(&t); /* frees wcb */ + } +} diff --git a/bgpd/rfapi/rfapi_import.h b/bgpd/rfapi/rfapi_import.h index 1a37e1c2db..536b8f0525 100644 --- a/bgpd/rfapi/rfapi_import.h +++ b/bgpd/rfapi/rfapi_import.h @@ -225,4 +225,7 @@ extern void rfapiCountAllItRoutes(int *pALRcount, /* active local routes */ --------------------------------------------*/ extern uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime); +extern void rfapi_import_init(void); +extern void rfapi_import_terminate(void); + #endif /* QUAGGA_HGP_RFAPI_IMPORT_H */ diff --git a/bgpd/rfapi/rfapi_rib.c b/bgpd/rfapi/rfapi_rib.c index 53e416b2ee..fe6ad50f92 100644 --- a/bgpd/rfapi/rfapi_rib.c +++ b/bgpd/rfapi/rfapi_rib.c @@ -18,6 +18,7 @@ #include "lib/log.h" #include "lib/skiplist.h" #include "lib/workqueue.h" +#include #include "bgpd/bgpd.h" #include "bgpd/bgp_route.h" @@ -40,12 +41,11 @@ #define DEBUG_PENDING_DELETE_ROUTE 0 #define DEBUG_NHL 0 #define DEBUG_RIB_SL_RD 0 -#define DEBUG_CLEANUP 0 +#define DEBUG_CLEANUP 0 /* forward decl */ #if DEBUG_NHL -static void rfapiRibShowRibSl(void *stream, struct prefix *pfx, - struct skiplist *sl); +static void rfapiRibShowRibSl(void *stream, const struct prefix *pfx, struct skiplist *sl); #endif /* @@ -234,9 +234,45 @@ void rfapiFreeRfapiVnOptionChain(struct rfapi_vn_option *p) } +/* + * Hash to keep track of outstanding timers so we can force them to + * expire at shutdown time, thus freeing their allocated memory. + */ +PREDECL_HASH(rrtcb); + +/* + * Timer control block for recently-deleted and expired routes + */ +struct rfapi_rib_tcb { + struct rrtcb_item tcbi; + + struct rfapi_descriptor *rfd; + struct skiplist *sl; + struct rfapi_info *ri; + struct agg_node *rn; + int flags; +#define RFAPI_RIB_TCB_FLAG_DELETED 0x00000001 +}; + +static int _rrtcb_cmp(const struct rfapi_rib_tcb *t1, const struct rfapi_rib_tcb *t2) +{ + return (t1 != t2); +} + +static uint32_t _rrtcb_hash(const struct rfapi_rib_tcb *t) +{ + return (uintptr_t)t & 0xffffffff; +} + +DECLARE_HASH(rrtcb, struct rfapi_rib_tcb, tcbi, _rrtcb_cmp, _rrtcb_hash); +static struct rrtcb_head _rrtcbhash; + static void rfapi_info_free(struct rfapi_info *goner) { if (goner) { +#if DEBUG_CLEANUP + zlog_debug("%s: ri %p, timer %p", __func__, goner, goner->timer); +#endif if (goner->tea_options) { rfapiFreeBgpTeaOptionChain(goner->tea_options); goner->tea_options = NULL; @@ -253,32 +289,19 @@ static void rfapi_info_free(struct rfapi_info *goner) struct rfapi_rib_tcb *tcb; tcb = EVENT_ARG(goner->timer); +#if DEBUG_CLEANUP + zlog_debug("%s: ri %p, tcb %p", __func__, goner, tcb); +#endif EVENT_OFF(goner->timer); + rrtcb_del(&_rrtcbhash, tcb); XFREE(MTYPE_RFAPI_RECENT_DELETE, tcb); } XFREE(MTYPE_RFAPI_INFO, goner); } } -/* - * Timer control block for recently-deleted and expired routes - */ -struct rfapi_rib_tcb { - struct rfapi_descriptor *rfd; - struct skiplist *sl; - struct rfapi_info *ri; - struct agg_node *rn; - int flags; -#define RFAPI_RIB_TCB_FLAG_DELETED 0x00000001 -}; - -/* - * remove route from rib - */ -static void rfapiRibExpireTimer(struct event *t) +static void _rfapiRibExpireTimer(struct rfapi_rib_tcb *tcb) { - struct rfapi_rib_tcb *tcb = EVENT_ARG(t); - RFAPI_RIB_CHECK_COUNTS(1, 0); /* @@ -309,11 +332,22 @@ static void rfapiRibExpireTimer(struct event *t) agg_unlock_node(tcb->rn); } + rrtcb_del(&_rrtcbhash, tcb); XFREE(MTYPE_RFAPI_RECENT_DELETE, tcb); RFAPI_RIB_CHECK_COUNTS(1, 0); } +/* + * remove route from rib + */ +static void rfapiRibExpireTimer(struct event *t) +{ + struct rfapi_rib_tcb *tcb = EVENT_ARG(t); + + _rfapiRibExpireTimer(tcb); +} + static void rfapiRibStartTimer(struct rfapi_descriptor *rfd, struct rfapi_info *ri, struct agg_node *rn, /* route node attached to */ @@ -349,6 +383,8 @@ static void rfapiRibStartTimer(struct rfapi_descriptor *rfd, event_add_timer(bm->master, rfapiRibExpireTimer, tcb, ri->lifetime, &ri->timer); + + rrtcb_add(&_rrtcbhash, tcb); } extern void rfapi_rib_key_init(struct prefix *prefix, /* may be NULL */ @@ -519,6 +555,7 @@ void rfapiRibClear(struct rfapi_descriptor *rfd) tcb = EVENT_ARG( ri->timer); EVENT_OFF(ri->timer); + rrtcb_del(&_rrtcbhash, tcb); XFREE(MTYPE_RFAPI_RECENT_DELETE, tcb); } @@ -852,11 +889,6 @@ static void process_pending_node(struct bgp *bgp, struct rfapi_descriptor *rfd, int rib_node_started_nonempty = 0; int sendingsomeroutes = 0; const struct prefix *p; -#if DEBUG_PROCESS_PENDING_NODE - unsigned int count_rib_initial = 0; - unsigned int count_pend_vn_initial = 0; - unsigned int count_pend_cost_initial = 0; -#endif assert(pn); p = agg_node_get_prefix(pn); @@ -885,19 +917,6 @@ static void process_pending_node(struct bgp *bgp, struct rfapi_descriptor *rfd, slPendPt = (struct skiplist *)(pn->aggregate); lPendCost = (struct list *)(pn->info); -#if DEBUG_PROCESS_PENDING_NODE - /* debugging */ - if (slRibPt) - count_rib_initial = skiplist_count(slRibPt); - - if (slPendPt) - count_pend_vn_initial = skiplist_count(slPendPt); - - if (lPendCost && lPendCost != (struct list *)1) - count_pend_cost_initial = lPendCost->count; -#endif - - /* * Handle special case: delete all routes at prefix */ @@ -920,6 +939,7 @@ static void process_pending_node(struct bgp *bgp, struct rfapi_descriptor *rfd, tcb = EVENT_ARG(ri->timer); EVENT_OFF(ri->timer); + rrtcb_del(&_rrtcbhash, tcb); XFREE(MTYPE_RFAPI_RECENT_DELETE, tcb); } @@ -1005,6 +1025,7 @@ static void process_pending_node(struct bgp *bgp, struct rfapi_descriptor *rfd, tcb = EVENT_ARG(ori->timer); EVENT_OFF(ori->timer); + rrtcb_del(&_rrtcbhash, tcb); XFREE(MTYPE_RFAPI_RECENT_DELETE, tcb); } @@ -1017,6 +1038,11 @@ static void process_pending_node(struct bgp *bgp, struct rfapi_descriptor *rfd, #endif } else { +#if DEBUG_PROCESS_PENDING_NODE + vnc_zlog_debug_verbose("%s: slRibPt ri %p matched in pending list", + __func__, ori); +#endif + /* * Found in pending list. If same lifetime, * cost, options, @@ -1040,14 +1066,10 @@ static void process_pending_node(struct bgp *bgp, struct rfapi_descriptor *rfd, rfapi_info_free( ri); /* grr... */ } - } #if DEBUG_PROCESS_PENDING_NODE - vnc_zlog_debug_verbose( - "%s: slRibPt ri %p matched in pending list, %s", - __func__, ori, - (same ? "same info" - : "different info")); + vnc_zlog_debug_verbose("%s: same info", __func__); #endif + } } } /* @@ -1339,6 +1361,7 @@ callback: tcb = EVENT_ARG(ri->timer); EVENT_OFF(ri->timer); + rrtcb_del(&_rrtcbhash, tcb); XFREE(MTYPE_RFAPI_RECENT_DELETE, tcb); } RFAPI_RIB_CHECK_COUNTS(0, delete_list->count); @@ -2285,8 +2308,7 @@ static int print_rib_sl(int (*fp)(void *, const char *, ...), struct vty *vty, /* * This one is for debugging (set stream to NULL to send output to log) */ -static void rfapiRibShowRibSl(void *stream, struct prefix *pfx, - struct skiplist *sl) +static void rfapiRibShowRibSl(void *stream, const struct prefix *pfx, struct skiplist *sl) { int (*fp)(void *, const char *, ...); struct vty *vty; @@ -2426,3 +2448,25 @@ void rfapiRibShowResponses(void *stream, struct prefix *pfx_match, fp(out, "\n"); } } + +void rfapi_rib_init(void) +{ + rrtcb_init(&_rrtcbhash); +} + +void rfapi_rib_terminate(void) +{ + struct rfapi_rib_tcb *tcb; + + vnc_zlog_debug_verbose("%s: cleaning up %zu pending timers", __func__, + rrtcb_count(&_rrtcbhash)); + + /* + * Clean up memory allocations stored in pending timers + */ + while ((tcb = rrtcb_pop(&_rrtcbhash))) { + assert(tcb == EVENT_ARG(tcb->ri->timer)); + EVENT_OFF(tcb->ri->timer); + _rfapiRibExpireTimer(tcb); /* deletes hash entry, frees tcb */ + } +} diff --git a/bgpd/rfapi/rfapi_rib.h b/bgpd/rfapi/rfapi_rib.h index 5fa838bd34..3e34f26fcd 100644 --- a/bgpd/rfapi/rfapi_rib.h +++ b/bgpd/rfapi/rfapi_rib.h @@ -138,4 +138,7 @@ extern int rfapi_rib_key_cmp(const void *k1, const void *k2); extern void rfapiAdbFree(struct rfapi_adb *adb); +extern void rfapi_rib_init(void); +extern void rfapi_rib_terminate(void); + #endif /* QUAGGA_HGP_RFAPI_RIB_H */ diff --git a/tests/topotests/bgp_rfapi_basic_sanity/r3/bgpd.conf b/tests/topotests/bgp_rfapi_basic_sanity/r3/bgpd.conf index 2210f24589..e1c533c1f3 100644 --- a/tests/topotests/bgp_rfapi_basic_sanity/r3/bgpd.conf +++ b/tests/topotests/bgp_rfapi_basic_sanity/r3/bgpd.conf @@ -4,6 +4,7 @@ hostname r3 password zebra log stdout notifications log commands +#debug bgp vnc verbose router bgp 5226 bgp router-id 3.3.3.3 bgp cluster-id 3.3.3.3 diff --git a/tests/topotests/bgp_rfapi_basic_sanity/scripts/cleanup_all.py b/tests/topotests/bgp_rfapi_basic_sanity/scripts/cleanup_all.py index 7201ac8111..aaa43d5a7e 100644 --- a/tests/topotests/bgp_rfapi_basic_sanity/scripts/cleanup_all.py +++ b/tests/topotests/bgp_rfapi_basic_sanity/scripts/cleanup_all.py @@ -91,10 +91,10 @@ luCommand( ) num = "0 exist" -luCommand("r1", 'vtysh -c "show bgp ipv4 vpn"', num, "pass", "VPN SAFI clear") -luCommand("r2", 'vtysh -c "show bgp ipv4 vpn"', num, "pass", "VPN SAFI clear") -luCommand("r3", 'vtysh -c "show bgp ipv4 vpn"', num, "pass", "VPN SAFI clear") -luCommand("r4", 'vtysh -c "show bgp ipv4 vpn"', num, "pass", "VPN SAFI clear") +luCommand("r1", 'vtysh -c "show bgp ipv4 vpn"', num, "wait", "VPN SAFI clear") +luCommand("r2", 'vtysh -c "show bgp ipv4 vpn"', num, "wait", "VPN SAFI clear") +luCommand("r3", 'vtysh -c "show bgp ipv4 vpn"', num, "wait", "VPN SAFI clear") +luCommand("r4", 'vtysh -c "show bgp ipv4 vpn"', num, "wait", "VPN SAFI clear") luCommand( "r1", diff --git a/tests/topotests/bgp_rfapi_basic_sanity_config2/r3/bgpd.conf b/tests/topotests/bgp_rfapi_basic_sanity_config2/r3/bgpd.conf index e74fc0b3de..04b10b4d22 100644 --- a/tests/topotests/bgp_rfapi_basic_sanity_config2/r3/bgpd.conf +++ b/tests/topotests/bgp_rfapi_basic_sanity_config2/r3/bgpd.conf @@ -4,6 +4,7 @@ hostname r3 password zebra log stdout notifications log commands +#debug bgp vnc verbose router bgp 5226 bgp router-id 3.3.3.3 bgp cluster-id 3.3.3.3 -- 2.39.5