From: sudhanshukumar22 Date: Sat, 31 Oct 2020 14:30:24 +0000 (-0700) Subject: bgpd:'bgpd' core generated on Leaf device with system-test config X-Git-Tag: base_8.0~460^2 X-Git-Url: https://git.puffer.fish/?a=commitdiff_plain;h=a4559740ea64ca2951be89d8b3a7ad5b0d47f9c4;p=mirror%2Ffrr.git bgpd:'bgpd' core generated on Leaf device with system-test config Description: aggregate member route was enqueued for recalculation while bgp instance was deleted. As part of aggregate member route deletion, the aggregate route is reinstalled with self-peer as source, but self-peer is already removed. Assert() for null peer pointer is path attribute aborts bgp. Problem Description/Summary : BGP crashed while cleaning up aggregate route as part of bgp instance deletion. ----------------------- Leaf-4(config)# Leaf-4(config)# no router bgp 65179 vrf Vrf-red Leaf-4(config)# no router bgp 65179 Leaf-4(config)# Leaf-4(config)# Leaf-4(config)# root@Leaf-4:~# Sep 26 15:38:21.257554 System is not ready - Core services are down ------------ router bgp 65179 bgp router-id 100.2.0.3 no bgp default ipv4-unicast bgp network import-check neighbor LeafToHostv4 peer-group neighbor LeafToHostv4 remote-as 65003 neighbor LeafToHostv6 peer-group neighbor LeafToHostv6 remote-as 65003 neighbor LeafToSpinev4 peer-group neighbor LeafToSpinev4 remote-as 65134 neighbor LeafToSpinev4 bfd neighbor LeafToSpinev6 peer-group neighbor LeafToSpinev6 remote-as 65134 neighbor LeafToSpinev6 bfd neighbor WindowsServer peer-group neighbor WindowsServer remote-as 65201 neighbor 155.1.0.4 peer-group LeafToSpinev4 neighbor 155.2.0.4 peer-group LeafToSpinev4 neighbor 2000:155:1::4 peer-group LeafToSpinev6 neighbor 2000:155:2::4 peer-group LeafToSpinev6 neighbor 172.16.11.2 peer-group WindowsServer neighbor 172.16.1.2 remote-as 65101 neighbor 2000:172:16:1::2 remote-as 65101 bgp listen limit 400 bgp listen range 133.3.0.0/16 peer-group LeafToHostv4 bgp listen range 2000:133:3::/48 peer-group LeafToHostv6 ! address-family ipv4 unicast aggregate-address 133.1.0.0/16 as-set aggregate-address 133.2.0.0/16 as-set aggregate-address 133.3.0.0/16 as-set aggregate-address 133.4.0.0/16 as-set redistribute connected neighbor LeafToHostv4 activate neighbor LeafToSpinev4 activate neighbor LeafToSpinev4 allowas-in 1 neighbor LeafToSpinev4 route-map spine_v4_export out neighbor WindowsServer activate neighbor 172.16.1.2 activate exit-address-family ! address-family ipv6 unicast aggregate-address 2000:133:1::/48 as-set aggregate-address 2000:133:2::/48 as-set aggregate-address 2000:133:3::/48 as-set aggregate-address 2000:133:4::/48 as-set redistribute connected .. ------------ (gdb) bt name=0x55607dd49090 <_FUNCTION_.23915> "bgp_path_info_add") at bgpd/bgpd.c:1159 name=name@entry=0x55607dd49090 <_FUNCTION_.23915> "bgp_path_info_add", peer=) at bgpd/bgpd.c:1158 pi=) at bgpd/bgp_route.c:313 afi=afi@entry=AFI_IP, safi=safi@entry=SAFI_UNICAST, p=p@entry=0x55607f1c4e10, origin=, aspath=0x55607f4bc8a0, community=, ecommunity=, lcommunity=, atomic_aggregate=0 '\000', aggregate=0x55607f1c4ee0) at bgpd/bgp_route.c:5926 aggr_p=, aggregate=, pi=0x55607f41f9f0, safi=SAFI_UNICAST, afi=AFI_IP, bgp=0x55607eeba5d0) at bgpd/bgp_route.c:6385 del=del@entry=0x55607f41f9f0, afi=afi@entry=AFI_IP, --Type to continue, or q to quit-- safi=safi@entry=SAFI_UNICAST) at bgpd/bgp_route.c:6446 pi=0x55607f41f9f0, peer=0x55607ef22c10, afi=AFI_IP, safi=SAFI_UNICAST) at bgpd/bgp_route.c:2885 data=) at bgpd/bgp_route.c:4125 at lib/workqueue.c:291 at lib/thread.c:1540 at bgpd/bgp_main.c:498 (gdb) fr 5 name=name@entry=0x55607dd49090 <_FUNCTION_.23915> "bgp_path_info_add", peer=) at bgpd/bgpd.c:1158 1158 bgpd/bgpd.c: No such file or directory. (gdb) fr 10 pi=0x55607f41f9f0, peer=0x55607ef22c10, afi=AFI_IP, safi=SAFI_UNICAST) at bgpd/bgp_route.c:2885 2885 bgpd/bgp_route.c: No such file or directory. (gdb) p peer->lock $2 = 210 (gdb) p peer->status $3 = 8 (gdb) (gdb) p bgp $11 = (struct bgp *) 0x56121ba315d0 (gdb) p bgp->peer_self $12 = (struct peer *) 0x0 (gdb) p bgp->name $13 = 0x0 (gdb) p bgp->name_pretty $14 = 0x56121bb046a0 "VRF default" (gdb) p bgp->inst_type $15 = BGP_INSTANCE_TYPE_DEFAULT (gdb) bgp_aggregate_install(): 5920 5921 new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, 0, 5922 bgp->peer_self, attr, rn); 5923 5924 SET_FLAG(new->flags, BGP_PATH_VALID); 5925 5926 bgp_path_info_add(rn, new); 5927 bgp_process(bgp, rn, afi, safi); 299 void bgp_path_info_add(struct bgp_node *rn, struct bgp_path_info *pi): ... 310 311 bgp_path_info_lock(pi); 312 bgp_lock_node(rn); 313 peer_lock(pi->peer); /* bgp_path_info peer reference */ <<< This points to bgp->peer_self = NULL 314 } 1573 #define peer_lock(B) peer_lock_with_caller(_FUNCTION_, (B)) 1156 /* increase reference count on a struct peer */ 1157 struct peer *peer_lock_with_caller(const char *name, struct peer *peer) 1158 { 1159 assert(peer && (peer->lock >= 0)); <<< asserted here 1160 Similar issue was fixed in community and we already have the fix: https://github.com/FRRouting/frr/pull/4816 root@sr407497_lxc2:/home/ubuntu/frr_repo/frr/bgpd# git diff dfb6fd1dd119a5bd660012e940e8328534547e76~ dfb6fd1dd119a5bd660012e940e8328534547e76 diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index abad1db..a372568 100644 — a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -5332,6 +5332,13 @@ static void bgp_purge_af_static_redist_routes(struct bgp *bgp, afi_t afi, struct bgp_node *rn; struct bgp_path_info *pi; + /* Do not install the aggregate route if BGP is in the + * process of termination. + */ + if (bgp_flag_check(bgp, BGP_FLAG_DELETE_IN_PROGRESS) || + (bgp->peer_self == NULL)) + return; + table = bgp->rib[afi][safi]; for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) { for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next) { But looks like similar handling is required at other places as well: Expected Behavior : BGP daemon should not crash Signed-off-by: sudhanshukumar22 --- diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index c4ab223b7f..647f154c9e 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -6657,6 +6657,9 @@ static void bgp_aggregate_install( if (!attr) { bgp_aggregate_delete(bgp, p, afi, safi, aggregate); + if (BGP_DEBUG(update_groups, UPDATE_GROUPS)) + zlog_debug("%s: %pFX null attribute", __func__, + p); return; } @@ -7175,6 +7178,13 @@ static void bgp_add_route_to_aggregate(struct bgp *bgp, struct ecommunity *ecommunity = NULL; struct lcommunity *lcommunity = NULL; + /* If the bgp instance is being deleted or self peer is deleted + * then do not create aggregate route + */ + if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS) + || (bgp->peer_self == NULL)) + return; + /* ORIGIN attribute: If at least one route among routes that are * aggregated has ORIGIN with the value INCOMPLETE, then the * aggregated route must have the ORIGIN attribute with the value @@ -7291,6 +7301,13 @@ static void bgp_remove_route_from_aggregate(struct bgp *bgp, afi_t afi, struct lcommunity *lcommunity = NULL; unsigned long match = 0; + /* If the bgp instance is being deleted or self peer is deleted + * then do not create aggregate route + */ + if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS) + || (bgp->peer_self == NULL)) + return; + if (BGP_PATH_HOLDDOWN(pi)) return; @@ -7487,6 +7504,13 @@ int bgp_aggregate_unset(struct bgp *bgp, struct prefix *prefix, afi_t afi, struct bgp_dest *dest; struct bgp_aggregate *aggregate; + /* If the bgp instance is being deleted or self peer is deleted + * then do not create aggregate route + */ + if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS) + || (bgp->peer_self == NULL)) + return 0; + apply_mask(prefix); /* Old configuration check. */ dest = bgp_node_lookup(bgp->aggregate[afi][safi], prefix);