From 31a4638f7d49753e904c3518de2b1aa51a47ab94 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 12 Jun 2015 07:59:10 -0700 Subject: [PATCH] BGP: bestpath needs to prefer confed-external over confed-internal Topology: +-----------------------------------------+ | | | AS 100 | | | | +----------------+ | +-----------+ | | | | | | | | SubAS 65001 | | | AS 90 | | | | +-------------+ | | r9----------------r1---------r2----\ | | | | | | | | | | | | | SubAS 65002 | | +-----|-----+ | | \--- r3 --/ | \-------r4 | | \---------------------/ \---------------/ | | | | | | | | | | | +----------------+ | | | | | | | | | | +----------------+ | r5 | | +-----------+ | | | | | | | | | | | SubAS 65003 | +-----|-------+ | | AS 80 | | | | | | | r8----------------r7--------r6--------------/ | | | | | | | +-----------+ | +----------------+ | +-----------------------------------------+ Important info: - r8 originates 8.8.8.8/32 - r1, r2, r3 -> r7 are 10.0.0.1, 10.0.0.2, etc - 'bgp bestpath compare-routerid' is configured everywhere (we could still hit the problem without this though) Bestpath selection for 8.8.8.8/32 on r2 and r3 is inconsistent. Here r4 advertised the 8.8.8.8/32 to r2 first, r2 then advertised it to r3, r3 selected the path from r2 as the bestpath due to lowest router-id. r2 BGP routing table entry for 8.8.8.8/32 Paths: (1 available, best #1, table Default-IP-Routing-Table) Advertised to non peer-group peers: 10.0.0.1 10.0.0.3 10.0.0.4 (65002 65003) 80 10.0.0.7 (metric 50) from 10.0.0.4 (10.0.0.4) Origin IGP, metric 0, localpref 100, valid, confed-external, best Last update: Fri May 1 14:46:57 2015 r3 BGP routing table entry for 8.8.8.8/32 Paths: (2 available, best #1, table Default-IP-Routing-Table) Advertised to non peer-group peers: 10.0.0.4 90.1.1.6 (65002 65003) 80 10.0.0.7 (metric 50) from 10.0.0.2 (10.0.0.2) Origin IGP, metric 0, localpref 100, valid, confed-internal, best Last update: Fri May 1 14:46:58 2015 (65002 65003) 80 10.0.0.7 (metric 50) from 10.0.0.4 (10.0.0.4) Origin IGP, metric 0, localpref 100, valid, confed-external Last update: Fri May 1 14:46:57 2015 Here r4 advertised the 8.8.8.8/32 to r3 first, r3 then advertised it to r2, r2 selected the path from r3 as the bestpath due to lowest router-id. r2 BGP routing table entry for 8.8.8.8/32 Paths: (2 available, best #2, table Default-IP-Routing-Table) Advertised to non peer-group peers: 10.0.0.4 (65002 65003) 80 10.0.0.7 (metric 50) from 10.0.0.4 (10.0.0.4) Origin IGP, metric 0, localpref 100, valid, confed-external Last update: Fri May 1 15:37:27 2015 (65002 65003) 80 10.0.0.7 (metric 50) from 10.0.0.3 (10.0.0.3) Origin IGP, metric 0, localpref 100, valid, confed-internal, best Last update: Fri May 1 15:37:27 2015 r3 BGP routing table entry for 8.8.8.8/32 Paths: (1 available, best #1, table Default-IP-Routing-Table) Advertised to non peer-group peers: 10.0.0.1 10.0.0.2 10.0.0.4 90.1.1.6 (65002 65003) 80 10.0.0.7 (metric 50) from 10.0.0.4 (10.0.0.4) Origin IGP, metric 0, localpref 100, valid, confed-external, best Last update: Fri May 1 15:37:22 2015 The fix is to have bestpath prefer a confed-external path over a confed-internal path. I added this just after the "nexthop IGP cost" step because some confed customers will have one IGP covering multiple sub-ASs, in that case you want to compare nexthop IGP cost. --- bgpd/bgp_route.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index e5e6209674..e892501125 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -561,7 +561,7 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, && (exist_sort == BGP_PEER_IBGP || exist_sort == BGP_PEER_CONFED)) { if (debug) - zlog_debug("%s: path %s wins over path %s due to eBGP peer > iBGP peeer", + zlog_debug("%s: path %s wins over path %s due to eBGP peer > iBGP peer", pfx_buf, new->peer->host, exist->peer->host); return 1; } @@ -570,7 +570,7 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, && (new_sort == BGP_PEER_IBGP || new_sort == BGP_PEER_CONFED)) { if (debug) - zlog_debug("%s: path %s loses to path %s due to iBGP peer < eBGP peeer", + zlog_debug("%s: path %s loses to path %s due to iBGP peer < eBGP peer", pfx_buf, new->peer->host, exist->peer->host); return 0; } @@ -599,7 +599,7 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, ret = 0; } - /* 8.1. Same IGP metric. Compare the cluster list length as + /* 9. Same IGP metric. Compare the cluster list length as representative of IGP hops metric. Rewrite the metric value pair (newm, existm) with the cluster list length. Prefer the path with smaller cluster list length. */ @@ -633,7 +633,27 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, } } - /* 9. Maximum path check. */ + /* 10. confed-external vs. confed-internal */ + if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION)) + { + if (new_sort == BGP_PEER_CONFED && exist_sort == BGP_PEER_IBGP) + { + if (debug) + zlog_debug("%s: path %s wins over path %s due to confed-external peer > confed-internal peer", + pfx_buf, new->peer->host, exist->peer->host); + return 1; + } + + if (exist_sort == BGP_PEER_CONFED && new_sort == BGP_PEER_IBGP) + { + if (debug) + zlog_debug("%s: path %s loses to path %s due to confed-internal peer < confed-external peer", + pfx_buf, new->peer->host, exist->peer->host); + return 0; + } + } + + /* 11. Maximum path check. */ if (newm == existm) { if (bgp_flag_check(bgp, BGP_FLAG_ASPATH_MULTIPATH_RELAX)) @@ -682,7 +702,7 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, return ret; } - /* 10. If both paths are external, prefer the path that was received + /* 12. If both paths are external, prefer the path that was received first (the oldest one). This step minimizes route-flap, since a newer path won't displace an older one, even if it was the preferred route based on the additional decision criteria below. */ @@ -707,7 +727,7 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, } } - /* 11. Router-ID comparision. */ + /* 13. Router-ID comparision. */ /* If one of the paths is "stale", the corresponding peer router-id will * be 0 and would always win over the other path. If originator id is * used for the comparision, it will decide which path is better. @@ -737,7 +757,7 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, return 0; } - /* 12. Cluster length comparision. */ + /* 14. Cluster length comparision. */ new_cluster = BGP_CLUSTER_LIST_LENGTH(new->attr); exist_cluster = BGP_CLUSTER_LIST_LENGTH(exist->attr); @@ -759,7 +779,7 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, return 0; } - /* 13. Neighbor address comparision. */ + /* 15. Neighbor address comparision. */ /* Do this only if neither path is "stale" as stale paths do not have * valid peer information (as the connection may or may not be up). */ -- 2.39.5