diff options
Diffstat (limited to 'zebra/zebra_nhg.c')
| -rw-r--r-- | zebra/zebra_nhg.c | 388 |
1 files changed, 298 insertions, 90 deletions
diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 2864b96c83..12ed024a66 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -36,7 +36,6 @@ #include "zebra/zebra_nhg_private.h" #include "zebra/zebra_rnh.h" #include "zebra/zebra_routemap.h" -#include "zebra/zebra_memory.h" #include "zebra/zebra_srte.h" #include "zebra/zserv.h" #include "zebra/rt.h" @@ -49,12 +48,23 @@ DEFINE_MTYPE_STATIC(ZEBRA, NHG, "Nexthop Group Entry"); DEFINE_MTYPE_STATIC(ZEBRA, NHG_CONNECTED, "Nexthop Group Connected"); DEFINE_MTYPE_STATIC(ZEBRA, NHG_CTX, "Nexthop Group Context"); +/* Map backup nexthop indices between two nhes */ +struct backup_nh_map_s { + int map_count; + + struct { + uint8_t orig_idx; + uint8_t new_idx; + } map[MULTIPATH_NUM]; +}; + /* id counter to keep in sync with kernel */ uint32_t id_counter; -/* */ +/* Controlled through ui */ static bool g_nexthops_enabled = true; static bool proto_nexthops_only; +static bool use_recursive_backups = true; static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi, int type, bool from_dplane); @@ -371,8 +381,8 @@ void zebra_nhe_init(struct nhg_hash_entry *nhe, afi_t afi, */ if (nh && (nh->next == NULL)) { switch (nh->type) { - case (NEXTHOP_TYPE_IFINDEX): - case (NEXTHOP_TYPE_BLACKHOLE): + case NEXTHOP_TYPE_IFINDEX: + case NEXTHOP_TYPE_BLACKHOLE: /* * This switch case handles setting the afi different * for ipv4/v6 routes. Ifindex/blackhole nexthop @@ -383,12 +393,12 @@ void zebra_nhe_init(struct nhg_hash_entry *nhe, afi_t afi, */ nhe->afi = afi; break; - case (NEXTHOP_TYPE_IPV4_IFINDEX): - case (NEXTHOP_TYPE_IPV4): + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_IPV4: nhe->afi = AFI_IP; break; - case (NEXTHOP_TYPE_IPV6_IFINDEX): - case (NEXTHOP_TYPE_IPV6): + case NEXTHOP_TYPE_IPV6_IFINDEX: + case NEXTHOP_TYPE_IPV6: nhe->afi = AFI_IP6; break; } @@ -1626,9 +1636,10 @@ void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe) nhg_connected_tree_increment_ref(&nhe->nhg_depends); } -static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop, - struct nexthop *nexthop, - struct zebra_sr_policy *policy) +static struct nexthop *nexthop_set_resolved(afi_t afi, + const struct nexthop *newhop, + struct nexthop *nexthop, + struct zebra_sr_policy *policy) { struct nexthop *resolved_hop; uint8_t num_labels = 0; @@ -1746,6 +1757,8 @@ static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop, resolved_hop->rparent = nexthop; _nexthop_add(&nexthop->resolved, resolved_hop); + + return resolved_hop; } /* Checks if nexthop we are trying to resolve to is valid */ @@ -1784,13 +1797,109 @@ static bool nexthop_valid_resolve(const struct nexthop *nexthop, } /* - * Given a nexthop we need to properly recursively resolve - * the route. As such, do a table lookup to find and match - * if at all possible. Set the nexthop->ifindex and resolved_id - * as appropriate + * When resolving a recursive nexthop, capture backup nexthop(s) also + * so they can be conveyed through the dataplane to the FIB. We'll look + * at the backups in the resolving nh 'nexthop' and its nhe, and copy them + * into the route's resolved nh 'resolved' and its nhe 'nhe'. + */ +static int resolve_backup_nexthops(const struct nexthop *nexthop, + const struct nhg_hash_entry *nhe, + struct nexthop *resolved, + struct nhg_hash_entry *resolve_nhe, + struct backup_nh_map_s *map) +{ + int i, j, idx; + const struct nexthop *bnh; + struct nexthop *nh, *newnh; + + assert(nexthop->backup_num <= NEXTHOP_MAX_BACKUPS); + + if (resolve_nhe->backup_info->nhe == NULL) + resolve_nhe->backup_info->nhe = zebra_nhg_alloc(); + + /* Locate backups from the original nexthop's backup index and nhe */ + for (i = 0; i < nexthop->backup_num; i++) { + idx = nexthop->backup_idx[i]; + + /* Do we already know about this particular backup? */ + for (j = 0; j < map->map_count; j++) { + if (map->map[j].orig_idx == idx) + break; + } + + if (j < map->map_count) { + resolved->backup_idx[resolved->backup_num] = + map->map[j].new_idx; + resolved->backup_num++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: found map idx orig %d, new %d", + __func__, map->map[j].orig_idx, + map->map[j].new_idx); + + continue; + } + + /* We can't handle any new map entries at this point. */ + if (map->map_count == MULTIPATH_NUM) + break; + + /* Need to create/copy a new backup */ + bnh = nhe->backup_info->nhe->nhg.nexthop; + for (j = 0; j < idx; j++) { + if (bnh == NULL) + break; + bnh = bnh->next; + } + + /* Whoops - bad index in the nexthop? */ + if (bnh == NULL) + continue; + + /* Update backup info in the resolving nexthop and its nhe */ + newnh = nexthop_dup_no_recurse(bnh, NULL); + + /* Need to compute the new backup index in the new + * backup list, and add to map struct. + */ + j = 0; + nh = resolve_nhe->backup_info->nhe->nhg.nexthop; + if (nh) { + while (nh->next) { + nh = nh->next; + j++; + } + + nh->next = newnh; + + } else /* First one */ + resolve_nhe->backup_info->nhe->nhg.nexthop = newnh; + + /* Capture index */ + resolved->backup_idx[resolved->backup_num] = j; + resolved->backup_num++; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("%s: added idx orig %d, new %d", + __func__, idx, j); + + /* Update map/cache */ + map->map[map->map_count].orig_idx = idx; + map->map[map->map_count].new_idx = j; + map->map_count++; + } + + return 0; +} + +/* + * Given a nexthop we need to properly recursively resolve, + * do a table lookup to find and match if at all possible. + * Set the nexthop->ifindex and resolution info as appropriate. */ -static int nexthop_active(afi_t afi, struct route_entry *re, - struct nexthop *nexthop, struct route_node *top) +static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe, + const struct prefix *top, int type, uint32_t flags, + uint32_t *pmtu) { struct prefix p; struct route_table *table; @@ -1804,33 +1913,65 @@ static int nexthop_active(afi_t afi, struct route_entry *re, struct zebra_vrf *zvrf; struct in_addr local_ipv4; struct in_addr *ipv4; + afi_t afi = AFI_IP; + /* Reset some nexthop attributes that we'll recompute if necessary */ if ((nexthop->type == NEXTHOP_TYPE_IPV4) - || nexthop->type == NEXTHOP_TYPE_IPV6) + || (nexthop->type == NEXTHOP_TYPE_IPV6)) nexthop->ifindex = 0; - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE); nexthops_free(nexthop->resolved); nexthop->resolved = NULL; - re->nexthop_mtu = 0; - - if (IS_ZEBRA_DEBUG_NHG_DETAIL) - zlog_debug("%s: re %p, nexthop %pNHv", - __func__, re, nexthop); /* - * If the kernel has sent us a NEW route, then - * by golly gee whiz it's a good route. - * - * If its an already INSTALLED route we have already handled, then the - * kernel route's nexthop might have became unreachable - * and we have to handle that. + * Set afi based on nexthop type. + * Some nexthop types get special handling, possibly skipping + * the normal processing. */ - if (!CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) - && (re->type == ZEBRA_ROUTE_KERNEL - || re->type == ZEBRA_ROUTE_SYSTEM)) + switch (nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + + ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); + /* + * If the interface exists and its operative or its a kernel + * route and interface is up, its active. We trust kernel routes + * to be good. + */ + if (ifp + && (if_is_operative(ifp) + || (if_is_up(ifp) + && (type == ZEBRA_ROUTE_KERNEL + || type == ZEBRA_ROUTE_SYSTEM)))) + return 1; + else + return 0; + break; + + case NEXTHOP_TYPE_IPV6_IFINDEX: + afi = AFI_IP6; + + if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) { + ifp = if_lookup_by_index(nexthop->ifindex, + nexthop->vrf_id); + if (ifp && if_is_operative(ifp)) + return 1; + else + return 0; + } + break; + + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + afi = AFI_IP; + break; + case NEXTHOP_TYPE_IPV6: + afi = AFI_IP6; + break; + + case NEXTHOP_TYPE_BLACKHOLE: return 1; + } /* * If the nexthop has been marked as 'onlink' we just need to make @@ -1853,10 +1994,11 @@ static int nexthop_active(afi_t afi, struct route_entry *re, return 1; } - if ((top->p.family == AF_INET && top->p.prefixlen == 32 - && nexthop->gate.ipv4.s_addr == top->p.u.prefix4.s_addr) - || (top->p.family == AF_INET6 && top->p.prefixlen == 128 - && memcmp(&nexthop->gate.ipv6, &top->p.u.prefix6, 16) == 0)) { + if (top && + ((top->family == AF_INET && top->prefixlen == 32 + && nexthop->gate.ipv4.s_addr == top->u.prefix4.s_addr) + || (top->family == AF_INET6 && top->prefixlen == 128 + && memcmp(&nexthop->gate.ipv6, &top->u.prefix6, 16) == 0))) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug( " :%s: Attempting to install a max prefixlength route through itself", @@ -1873,6 +2015,9 @@ static int nexthop_active(afi_t afi, struct route_entry *re, ipv4 = &nexthop->gate.ipv4; } + /* Processing for nexthops with SR 'color' attribute, using + * the corresponding SR policy object. + */ if (nexthop->srte_color) { struct ipaddr endpoint = {0}; struct zebra_sr_policy *policy; @@ -1950,7 +2095,7 @@ static int nexthop_active(afi_t afi, struct route_entry *re, * resolved by a route NH1. The exception is if the route is a * host route. */ - if (rn == top) + if (prefix_same(&rn->p, top)) if (((afi == AFI_IP) && (rn->p.prefixlen != 32)) || ((afi == AFI_IP6) && (rn->p.prefixlen != 128))) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) @@ -2020,8 +2165,10 @@ static int nexthop_active(afi_t afi, struct route_entry *re, match->nhe->id, newhop); return 1; - } else if (CHECK_FLAG(re->flags, ZEBRA_FLAG_ALLOW_RECURSION)) { + } else if (CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) { struct nexthop_group *nhg; + struct nexthop *resolver; + struct backup_nh_map_s map = {}; resolved = 0; @@ -2051,17 +2198,29 @@ static int nexthop_active(afi_t afi, struct route_entry *re, SET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE); - nexthop_set_resolved(afi, newhop, nexthop, - NULL); + resolver = nexthop_set_resolved(afi, newhop, + nexthop, NULL); resolved = 1; + + /* If there are backup nexthops, capture + * that info with the resolving nexthop. + */ + if (resolver && newhop->backup_num > 0) { + resolve_backup_nexthops(newhop, + match->nhe, + resolver, nhe, + &map); + } } /* Examine installed backup nexthops, if any. There * are only installed backups *if* there is a - * dedicated fib list. + * dedicated fib list. The UI can also control use + * of backups for resolution. */ nhg = rib_get_fib_backup_nhg(match); - if (nhg == NULL || nhg->nexthop == NULL) + if (!use_recursive_backups || + nhg == NULL || nhg->nexthop == NULL) goto done_with_match; for (ALL_NEXTHOPS_PTR(nhg, newhop)) { @@ -2079,10 +2238,14 @@ static int nexthop_active(afi_t afi, struct route_entry *re, NULL); resolved = 1; } + done_with_match: - if (resolved) - re->nexthop_mtu = match->mtu; - else if (IS_ZEBRA_DEBUG_RIB_DETAILED) + /* Capture resolving mtu */ + if (resolved) { + if (pmtu) + *pmtu = match->mtu; + + } else if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug( " %s: Recursion failed to find", __func__); @@ -2092,9 +2255,9 @@ done_with_match: if (IS_ZEBRA_DEBUG_RIB_DETAILED) { zlog_debug( " %s: Route Type %s has not turned on recursion", - __func__, zebra_route_string(re->type)); - if (re->type == ZEBRA_ROUTE_BGP - && !CHECK_FLAG(re->flags, ZEBRA_FLAG_IBGP)) + __func__, zebra_route_string(type)); + if (type == ZEBRA_ROUTE_BGP + && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP)) zlog_debug( " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\""); } @@ -2110,23 +2273,23 @@ done_with_match: /* This function verifies reachability of one given nexthop, which can be * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored * in nexthop->flags field. The nexthop->ifindex will be updated - * appropriately as well. An existing route map can turn an - * otherwise active nexthop into inactive, but not vice versa. + * appropriately as well. * - * If it finds a nexthop recursively, set the resolved_id - * to match that nexthop's nhg_hash_entry ID; + * An existing route map can turn an otherwise active nexthop into inactive, + * but not vice versa. * * The return value is the final value of 'ACTIVE' flag. */ static unsigned nexthop_active_check(struct route_node *rn, struct route_entry *re, - struct nexthop *nexthop) + struct nexthop *nexthop, + struct nhg_hash_entry *nhe) { - struct interface *ifp; route_map_result_t ret = RMAP_PERMITMATCH; afi_t family; const struct prefix *p, *src_p; struct zebra_vrf *zvrf; + uint32_t mtu = 0; srcdest_rnode_prefixes(rn, &p, &src_p); @@ -2137,19 +2300,28 @@ static unsigned nexthop_active_check(struct route_node *rn, else family = 0; + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: re %p, nexthop %pNHv", __func__, re, nexthop); + + /* + * If the kernel has sent us a NEW route, then + * by golly gee whiz it's a good route. + * + * If its an already INSTALLED route we have already handled, then the + * kernel route's nexthop might have became unreachable + * and we have to handle that. + */ + if (!CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) && + (re->type == ZEBRA_ROUTE_KERNEL || + re->type == ZEBRA_ROUTE_SYSTEM)) { + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + goto skip_check; + } + switch (nexthop->type) { case NEXTHOP_TYPE_IFINDEX: - ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); - /* - * If the interface exists and its operative or its a kernel - * route and interface is up, its active. We trust kernel routes - * to be good. - */ - if (ifp - && (if_is_operative(ifp) - || (if_is_up(ifp) - && (re->type == ZEBRA_ROUTE_KERNEL - || re->type == ZEBRA_ROUTE_SYSTEM)))) + if (nexthop_active(nexthop, nhe, &rn->p, re->type, + re->flags, &mtu)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); @@ -2157,14 +2329,16 @@ static unsigned nexthop_active_check(struct route_node *rn, case NEXTHOP_TYPE_IPV4: case NEXTHOP_TYPE_IPV4_IFINDEX: family = AFI_IP; - if (nexthop_active(AFI_IP, re, nexthop, rn)) + if (nexthop_active(nexthop, nhe, &rn->p, re->type, + re->flags, &mtu)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); break; case NEXTHOP_TYPE_IPV6: family = AFI_IP6; - if (nexthop_active(AFI_IP6, re, nexthop, rn)) + if (nexthop_active(nexthop, nhe, &rn->p, re->type, + re->flags, &mtu)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); @@ -2173,19 +2347,12 @@ static unsigned nexthop_active_check(struct route_node *rn, /* RFC 5549, v4 prefix with v6 NH */ if (rn->p.family != AF_INET) family = AFI_IP6; - if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) { - ifp = if_lookup_by_index(nexthop->ifindex, - nexthop->vrf_id); - if (ifp && if_is_operative(ifp)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - } else { - if (nexthop_active(AFI_IP6, re, nexthop, rn)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - } + + if (nexthop_active(nexthop, nhe, &rn->p, re->type, + re->flags, &mtu)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); break; case NEXTHOP_TYPE_BLACKHOLE: SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); @@ -2194,6 +2361,8 @@ static unsigned nexthop_active_check(struct route_node *rn, break; } +skip_check: + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) { if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug(" %s: Unable to find active nexthop", @@ -2201,6 +2370,18 @@ static unsigned nexthop_active_check(struct route_node *rn, return 0; } + /* Capture recursive nexthop mtu. + * TODO -- the code used to just reset the re's value to zero + * for each nexthop, and then jam any resolving route's mtu value in, + * whether or not that was zero, or lt/gt any existing value? The + * way this is used appears to be as a floor value, so let's try + * using it that way here. + */ + if (mtu > 0) { + if (re->nexthop_mtu == 0 || re->nexthop_mtu > mtu) + re->nexthop_mtu = mtu; + } + /* XXX: What exactly do those checks do? Do we support * e.g. IPv4 routes with IPv6 nexthops or vice versa? */ @@ -2214,7 +2395,7 @@ static unsigned nexthop_active_check(struct route_node *rn, * Possibly it may be better to use only the rib_table_info * in every case. */ - if (!family) { + if (family == 0) { struct rib_table_info *info; info = srcdest_rnode_table_info(rn); @@ -2277,21 +2458,26 @@ done: } /* - * Process a list of nexthops, given an nhg, determining + * Process a list of nexthops, given an nhe, determining * whether each one is ACTIVE/installable at this time. */ static uint32_t nexthop_list_active_update(struct route_node *rn, struct route_entry *re, - struct nexthop_group *nhg) + struct nhg_hash_entry *nhe, + bool is_backup) { union g_addr prev_src; unsigned int prev_active, new_active; ifindex_t prev_index; uint32_t counter = 0; struct nexthop *nexthop; + struct nexthop_group *nhg = &nhe->nhg; nexthop = nhg->nexthop; + /* Init recursive nh mtu */ + re->nexthop_mtu = 0; + /* Process nexthops one-by-one */ for ( ; nexthop; nexthop = nexthop->next) { @@ -2301,15 +2487,20 @@ static uint32_t nexthop_list_active_update(struct route_node *rn, prev_src = nexthop->rmap_src; prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); prev_index = nexthop->ifindex; + + /* Include the containing nhe for primary nexthops: if there's + * recursive resolution, we capture the backup info also. + */ + new_active = + nexthop_active_check(rn, re, nexthop, + (is_backup ? NULL : nhe)); + /* * We need to respect the multipath_num here * as that what we should be able to install from * a multipath perspective should not be a data plane * decision point. */ - new_active = - nexthop_active_check(rn, re, nexthop); - if (new_active && counter >= zrouter.multipath_num) { struct nexthop *nh; @@ -2323,7 +2514,7 @@ static uint32_t nexthop_list_active_update(struct route_node *rn, if (new_active) counter++; - /* Don't allow src setting on IPv6 addr for now */ + /* Check for changes to the nexthop - set ROUTE_ENTRY_CHANGED */ if (prev_active != new_active || prev_index != nexthop->ifindex || ((nexthop->type >= NEXTHOP_TYPE_IFINDEX && nexthop->type < NEXTHOP_TYPE_IPV6) @@ -2392,7 +2583,7 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re) curr_nhe->id = 0; /* Process nexthops */ - curr_active = nexthop_list_active_update(rn, re, &curr_nhe->nhg); + curr_active = nexthop_list_active_update(rn, re, curr_nhe, false); if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: re %p curr_active %u", __func__, re, @@ -2403,7 +2594,7 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re) goto backups_done; backup_active = nexthop_list_active_update( - rn, re, zebra_nhg_get_backup_nhg(curr_nhe)); + rn, re, curr_nhe->backup_info->nhe, true /*is_backup*/); if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: re %p backup_active %u", __func__, re, @@ -2715,6 +2906,12 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) case DPLANE_OP_NEIGH_DISCOVER: case DPLANE_OP_BR_PORT_UPDATE: case DPLANE_OP_NONE: + case DPLANE_OP_IPTABLE_ADD: + case DPLANE_OP_IPTABLE_DELETE: + case DPLANE_OP_IPSET_ADD: + case DPLANE_OP_IPSET_DELETE: + case DPLANE_OP_IPSET_ENTRY_ADD: + case DPLANE_OP_IPSET_ENTRY_DELETE: break; } @@ -2776,6 +2973,17 @@ bool zebra_nhg_kernel_nexthops_enabled(void) return g_nexthops_enabled; } +/* Global control for use of activated backups for recursive resolution. */ +void zebra_nhg_set_recursive_use_backups(bool set) +{ + use_recursive_backups = set; +} + +bool zebra_nhg_recursive_use_backups(void) +{ + return use_recursive_backups; +} + /* * Global control to only use kernel nexthops for protocol created NHGs. * There are some use cases where you may not want zebra to implicitly |
