From 9fb7d677d3584eadd1d4568bedd542eb880afcd7 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Thu, 30 May 2024 15:47:11 +0200 Subject: [PATCH] bgpd: fix do not skip paths with same nexthop Under a setup where two BGP prefixes are available from multiple sources, if one of the two prefixes is recursive over the other BGP prefix, then it will not be considered as multipath. The below output shows the two prefixes 192.0.2.24/32 and 192.0.2.21/32. The 192.0.2.[5,6,8] are the known IP addresses visible from the IGP. > # show bgp ipv4 192.0.2.24/32 > *>i 192.0.2.24/32 192.0.2.21 0 100 0 i > * i 192.0.2.21 0 100 0 i > * i 192.0.2.21 0 100 0 i > # show bgp ipv4 192.0.2.21/32 > *>i 192.0.2.21/32 192.0.2.5 0 100 0 i > *=i 192.0.2.6 0 100 0 i > *=i 192.0.2.8 0 100 0 i The bgp best selection algorithm refuses to consider the paths to '192.0.2.24/32' as multipath, whereas the BGP paths which use the BGP peer as nexthop are considered multipath. > ... has the same nexthop as the bestpath, skip it ... Previously, this condition has been added to prevent ZEBRA from installing routes with same nexthop: > Here you can see the two paths with nexthop 210.2.2.2 > superm-redxp-05# show ip route 2.23.24.192/28 > Routing entry for 2.23.24.192/28 > Known via "bgp", distance 20, metric 0, best > Last update 00:32:12 ago > * 210.2.2.2, via swp3 > * 210.2.0.2, via swp1 > * 210.2.1.2, via swp2 > * 210.2.2.2, via swp3 > [..] But today, ZEBRA knows how to handle it. When receiving incoming routes, nexthop groups are used. At creation, duplicated nexthops are identified, and will not be installed. The below output illustrate the duplicate paths to 172.16.0.200 received by an other peer. > r1# show ip route 172.18.1.100 nexthop-group > Routing entry for 172.18.1.100/32 > Known via "bgp", distance 200, metric 0, best > Last update 00:03:03 ago > Nexthop Group ID: 75757580 > 172.16.0.200 (recursive), weight 1 > * 172.31.0.3, via r1-eth1, label 16055, weight 1 > * 172.31.2.4, via r1-eth2, label 16055, weight 1 > * 172.31.0.3, via r1-eth1, label 16006, weight 1 > * 172.31.2.4, via r1-eth2, label 16006, weight 1 > * 172.31.8.7, via r1-eth4, label 16008, weight 1 > 172.16.0.200 (duplicate nexthop removed) (recursive), weight 1 > 172.31.0.3, via r1-eth1 (duplicate nexthop removed), label 16055, weight 1 > 172.31.2.4, via r1-eth2 (duplicate nexthop removed), label 16055, weight 1 > 172.31.0.3, via r1-eth1 (duplicate nexthop removed), label 16006, weight 1 > 172.31.2.4, via r1-eth2 (duplicate nexthop removed), label 16006, weight 1 > 172.31.8.7, via r1-eth4 (duplicate nexthop removed), label 16008, weight 1 Fix this by proposing to let ZEBRA handle this duplicate decision. Fixes: 7dc9d4e4e360 ("bgp may add multiple path entries with the same nexthop") Signed-off-by: Philippe Guibert --- bgpd/bgp_mpath.c | 115 +++++++++++++++++++++++++++++------------------ bgpd/bgp_route.c | 9 ---- 2 files changed, 71 insertions(+), 53 deletions(-) diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c index e12d84b84c..eadd52b8e0 100644 --- a/bgpd/bgp_mpath.c +++ b/bgpd/bgp_mpath.c @@ -605,31 +605,43 @@ void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, if (mp_node && (listgetdata(mp_node) == cur_mpath)) { list_delete_node(mp_list, mp_node); bgp_path_info_mpath_dequeue(cur_mpath); - if ((mpath_count < maxpaths) - && prev_mpath - && bgp_path_info_nexthop_cmp(prev_mpath, - cur_mpath)) { + if ((mpath_count < maxpaths) && prev_mpath) { + mpath_count++; + if (bgp_path_info_nexthop_cmp(prev_mpath, + cur_mpath)) { + if (ecommunity_linkbw_present( + bgp_attr_get_ecommunity( + cur_mpath->attr), + &bwval) || + ecommunity_linkbw_present( + bgp_attr_get_ipv6_ecommunity( + cur_mpath->attr), + &bwval)) + cum_bw += bwval; + else + all_paths_lb = false; + if (debug) { + bgp_path_info_path_with_addpath_rx_str( + cur_mpath, path_buf, + sizeof(path_buf)); + zlog_debug("%pBD: %s is still multipath, cur count %d", + dest, path_buf, + mpath_count); + } + } else { + if (debug) { + bgp_path_info_path_with_addpath_rx_str( + cur_mpath, path_buf, + sizeof(path_buf)); + zlog_debug("%pBD: nexthop equal, however add mpath %s nexthop %pI4, cur count %d", + dest, path_buf, + &cur_mpath->attr->nexthop, + mpath_count); + } + } bgp_path_info_mpath_enqueue(prev_mpath, cur_mpath); prev_mpath = cur_mpath; - mpath_count++; - if (ecommunity_linkbw_present(bgp_attr_get_ecommunity( - cur_mpath->attr), - &bwval) || - ecommunity_linkbw_present( - bgp_attr_get_ipv6_ecommunity( - cur_mpath->attr), - &bwval)) - cum_bw += bwval; - else - all_paths_lb = false; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: %s is still multipath, cur count %d", - dest, path_buf, mpath_count); - } } else { mpath_changed = 1; if (debug) { @@ -693,35 +705,50 @@ void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, list_delete_node(mp_list, mp_node); assert(new_mpath); assert(prev_mpath); - if ((mpath_count < maxpaths) && (new_mpath != new_best) - && bgp_path_info_nexthop_cmp(prev_mpath, - new_mpath)) { + if ((mpath_count < maxpaths) && (new_mpath != new_best)) { + /* keep duplicate nexthop */ bgp_path_info_mpath_dequeue(new_mpath); bgp_path_info_mpath_enqueue(prev_mpath, new_mpath); - prev_mpath = new_mpath; mpath_changed = 1; mpath_count++; - if (ecommunity_linkbw_present(bgp_attr_get_ecommunity( - new_mpath->attr), - &bwval) || - ecommunity_linkbw_present( - bgp_attr_get_ipv6_ecommunity( - new_mpath->attr), - &bwval)) - cum_bw += bwval; - else - all_paths_lb = false; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - new_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: add mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &new_mpath->attr->nexthop, - mpath_count); + if (bgp_path_info_nexthop_cmp(prev_mpath, + new_mpath)) { + if (ecommunity_linkbw_present( + bgp_attr_get_ecommunity( + new_mpath->attr), + &bwval) || + ecommunity_linkbw_present( + bgp_attr_get_ipv6_ecommunity( + new_mpath->attr), + &bwval)) + cum_bw += bwval; + else + all_paths_lb = false; + if (debug) { + bgp_path_info_path_with_addpath_rx_str( + new_mpath, path_buf, + sizeof(path_buf)); + zlog_debug("%pBD: add mpath %s nexthop %pI4, cur count %d", + dest, path_buf, + &new_mpath->attr + ->nexthop, + mpath_count); + } + } else { + if (debug) { + bgp_path_info_path_with_addpath_rx_str( + new_mpath, path_buf, + sizeof(path_buf)); + zlog_debug("%pBD: nexthop equal, however add mpath %s nexthop %pI4, cur count %d", + dest, path_buf, + &new_mpath->attr + ->nexthop, + mpath_count); + } } + prev_mpath = new_mpath; } mp_node = mp_next_node; } diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 94c21e1861..4dcb22234a 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -3256,15 +3256,6 @@ void bgp_best_selection(struct bgp *bgp, struct bgp_dest *dest, if (!peer_established(pi->peer->connection)) continue; - if (!bgp_path_info_nexthop_cmp(pi, new_select)) { - if (debug) - zlog_debug( - "%pBD(%s): %s has the same nexthop as the bestpath, skip it", - dest, bgp->name_pretty, - path_buf); - continue; - } - bgp_path_info_cmp(bgp, pi, new_select, &paths_eq, mpath_cfg, debug, pfx_buf, afi, safi, &dest->reason); -- 2.39.5