diff options
Diffstat (limited to 'zebra/zebra_rib.c')
| -rw-r--r-- | zebra/zebra_rib.c | 1037 |
1 files changed, 505 insertions, 532 deletions
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index b31b6a1250..600e820bc4 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -54,6 +54,7 @@ #include "zebra/zebra_vxlan.h" #include "zebra/zapi_msg.h" #include "zebra/zebra_dplane.h" +#include "zebra/zebra_nhg.h" /* * Event, list, and mutex for delivery of dataplane results @@ -336,298 +337,6 @@ struct nexthop *route_entry_nexthop_blackhole_add(struct route_entry *re, return nexthop; } -static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop, - struct nexthop *nexthop) -{ - struct nexthop *resolved_hop; - - resolved_hop = nexthop_new(); - SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ACTIVE); - - resolved_hop->vrf_id = nexthop->vrf_id; - switch (newhop->type) { - case NEXTHOP_TYPE_IPV4: - case NEXTHOP_TYPE_IPV4_IFINDEX: - /* If the resolving route specifies a gateway, use it */ - resolved_hop->type = newhop->type; - resolved_hop->gate.ipv4 = newhop->gate.ipv4; - - if (newhop->ifindex) { - resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX; - resolved_hop->ifindex = newhop->ifindex; - } - break; - case NEXTHOP_TYPE_IPV6: - case NEXTHOP_TYPE_IPV6_IFINDEX: - resolved_hop->type = newhop->type; - resolved_hop->gate.ipv6 = newhop->gate.ipv6; - - if (newhop->ifindex) { - resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX; - resolved_hop->ifindex = newhop->ifindex; - } - break; - case NEXTHOP_TYPE_IFINDEX: - /* If the resolving route is an interface route, - * it means the gateway we are looking up is connected - * to that interface. (The actual network is _not_ onlink). - * Therefore, the resolved route should have the original - * gateway as nexthop as it is directly connected. - * - * On Linux, we have to set the onlink netlink flag because - * otherwise, the kernel won't accept the route. - */ - resolved_hop->flags |= NEXTHOP_FLAG_ONLINK; - if (afi == AFI_IP) { - resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX; - resolved_hop->gate.ipv4 = nexthop->gate.ipv4; - } else if (afi == AFI_IP6) { - resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX; - resolved_hop->gate.ipv6 = nexthop->gate.ipv6; - } - resolved_hop->ifindex = newhop->ifindex; - break; - case NEXTHOP_TYPE_BLACKHOLE: - resolved_hop->type = NEXTHOP_TYPE_BLACKHOLE; - resolved_hop->bh_type = nexthop->bh_type; - break; - } - - if (newhop->flags & NEXTHOP_FLAG_ONLINK) - resolved_hop->flags |= NEXTHOP_FLAG_ONLINK; - - /* Copy labels of the resolved route */ - if (newhop->nh_label) - nexthop_add_labels(resolved_hop, newhop->nh_label_type, - newhop->nh_label->num_labels, - &newhop->nh_label->label[0]); - - resolved_hop->rparent = nexthop; - nexthop_add(&nexthop->resolved, resolved_hop); -} - -/* - * Given a nexthop we need to properly recursively resolve - * the route. As such, do a table lookup to find and match - * if at all possible. Set the nexthop->ifindex as appropriate - */ -static int nexthop_active(afi_t afi, struct route_entry *re, - struct nexthop *nexthop, - struct route_node *top) -{ - struct prefix p; - struct route_table *table; - struct route_node *rn; - struct route_entry *match = NULL; - int resolved; - struct nexthop *newhop; - struct interface *ifp; - rib_dest_t *dest; - - if ((nexthop->type == NEXTHOP_TYPE_IPV4) - || nexthop->type == NEXTHOP_TYPE_IPV6) - nexthop->ifindex = 0; - - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE); - nexthops_free(nexthop->resolved); - nexthop->resolved = NULL; - re->nexthop_mtu = 0; - - /* - * If the kernel has sent us a route, then - * by golly gee whiz it's a good route. - */ - if (re->type == ZEBRA_ROUTE_KERNEL || - re->type == ZEBRA_ROUTE_SYSTEM) - return 1; - - /* - * Check to see if we should trust the passed in information - * for UNNUMBERED interfaces as that we won't find the GW - * address in the routing table. - * This check should suffice to handle IPv4 or IPv6 routes - * sourced from EVPN routes which are installed with the - * next hop as the remote VTEP IP. - */ - if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) { - ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); - if (!ifp) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - "\t%s: Onlink and interface: %u[%u] does not exist", - __PRETTY_FUNCTION__, nexthop->ifindex, - nexthop->vrf_id); - return 0; - } - if (connected_is_unnumbered(ifp)) { - if (if_is_operative(ifp)) - return 1; - else { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - "\t%s: Onlink and interface %s is not operative", - __PRETTY_FUNCTION__, ifp->name); - return 0; - } - } - if (!if_is_operative(ifp)) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - "\t%s: Interface %s is not unnumbered", - __PRETTY_FUNCTION__, ifp->name); - return 0; - } - } - - /* Make lookup prefix. */ - memset(&p, 0, sizeof(struct prefix)); - switch (afi) { - case AFI_IP: - p.family = AF_INET; - p.prefixlen = IPV4_MAX_PREFIXLEN; - p.u.prefix4 = nexthop->gate.ipv4; - break; - case AFI_IP6: - p.family = AF_INET6; - p.prefixlen = IPV6_MAX_PREFIXLEN; - p.u.prefix6 = nexthop->gate.ipv6; - break; - default: - assert(afi != AFI_IP && afi != AFI_IP6); - break; - } - /* Lookup table. */ - table = zebra_vrf_table(afi, SAFI_UNICAST, nexthop->vrf_id); - if (!table) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug("\t%s: Table not found", - __PRETTY_FUNCTION__); - return 0; - } - - rn = route_node_match(table, (struct prefix *)&p); - while (rn) { - route_unlock_node(rn); - - /* Lookup should halt if we've matched against ourselves ('top', - * if specified) - i.e., we cannot have a nexthop NH1 is - * resolved by a route NH1. The exception is if the route is a - * host route. - */ - if (top && rn == top) - if (((afi == AFI_IP) && (rn->p.prefixlen != 32)) - || ((afi == AFI_IP6) && (rn->p.prefixlen != 128))) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - "\t%s: Matched against ourself and prefix length is not max bit length", - __PRETTY_FUNCTION__); - return 0; - } - - /* Pick up selected route. */ - /* However, do not resolve over default route unless explicitly - * allowed. */ - if (is_default_prefix(&rn->p) - && !rnh_resolve_via_default(p.family)) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - "\t:%s: Resolved against default route", - __PRETTY_FUNCTION__); - return 0; - } - - dest = rib_dest_from_rnode(rn); - if (dest && dest->selected_fib - && !CHECK_FLAG(dest->selected_fib->status, - ROUTE_ENTRY_REMOVED) - && dest->selected_fib->type != ZEBRA_ROUTE_TABLE) - match = dest->selected_fib; - - /* If there is no selected route or matched route is EGP, go up - tree. */ - if (!match) { - do { - rn = rn->parent; - } while (rn && rn->info == NULL); - if (rn) - route_lock_node(rn); - - continue; - } - - if (match->type == ZEBRA_ROUTE_CONNECT) { - /* Directly point connected route. */ - newhop = match->ng.nexthop; - if (newhop) { - if (nexthop->type == NEXTHOP_TYPE_IPV4 - || nexthop->type == NEXTHOP_TYPE_IPV6) - nexthop->ifindex = newhop->ifindex; - } - return 1; - } else if (CHECK_FLAG(re->flags, ZEBRA_FLAG_ALLOW_RECURSION)) { - resolved = 0; - for (ALL_NEXTHOPS(match->ng, newhop)) { - if (!CHECK_FLAG(match->status, - ROUTE_ENTRY_INSTALLED)) - continue; - if (CHECK_FLAG(newhop->flags, - NEXTHOP_FLAG_RECURSIVE)) - continue; - - SET_FLAG(nexthop->flags, - NEXTHOP_FLAG_RECURSIVE); - SET_FLAG(re->status, - ROUTE_ENTRY_NEXTHOPS_CHANGED); - nexthop_set_resolved(afi, newhop, nexthop); - resolved = 1; - } - if (resolved) - re->nexthop_mtu = match->mtu; - if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug("\t%s: Recursion failed to find", - __PRETTY_FUNCTION__); - return resolved; - } else if (re->type == ZEBRA_ROUTE_STATIC) { - resolved = 0; - for (ALL_NEXTHOPS(match->ng, newhop)) { - if (!CHECK_FLAG(match->status, - ROUTE_ENTRY_INSTALLED)) - continue; - if (CHECK_FLAG(newhop->flags, - NEXTHOP_FLAG_RECURSIVE)) - continue; - - SET_FLAG(nexthop->flags, - NEXTHOP_FLAG_RECURSIVE); - nexthop_set_resolved(afi, newhop, nexthop); - resolved = 1; - } - if (resolved) - re->nexthop_mtu = match->mtu; - - if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - "\t%s: Static route unable to resolve", - __PRETTY_FUNCTION__); - return resolved; - } else { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) { - zlog_debug("\t%s: Route Type %s has not turned on recursion", - __PRETTY_FUNCTION__, - zebra_route_string(re->type)); - if (re->type == ZEBRA_ROUTE_BGP && - !CHECK_FLAG(re->flags, ZEBRA_FLAG_IBGP)) - zlog_debug("\tEBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\""); - } - return 0; - } - } - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug("\t%s: Nexthop did not lookup in table", - __PRETTY_FUNCTION__); - return 0; -} - struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, union g_addr *addr, struct route_node **rn_out) { @@ -798,190 +507,6 @@ struct route_entry *rib_lookup_ipv4(struct prefix_ipv4 *p, vrf_id_t vrf_id) return NULL; } -/* This function verifies reachability of one given nexthop, which can be - * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored - * in nexthop->flags field. The nexthop->ifindex will be updated - * appropriately as well. An existing route map can turn - * (otherwise active) nexthop into inactive, but not vice versa. - * - * The return value is the final value of 'ACTIVE' flag. - */ -static unsigned nexthop_active_check(struct route_node *rn, - struct route_entry *re, - struct nexthop *nexthop) -{ - struct interface *ifp; - route_map_result_t ret = RMAP_MATCH; - int family; - char buf[SRCDEST2STR_BUFFER]; - const struct prefix *p, *src_p; - struct zebra_vrf *zvrf; - - srcdest_rnode_prefixes(rn, &p, &src_p); - - if (rn->p.family == AF_INET) - family = AFI_IP; - else if (rn->p.family == AF_INET6) - family = AFI_IP6; - else - family = 0; - switch (nexthop->type) { - case NEXTHOP_TYPE_IFINDEX: - ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); - if (ifp && if_is_operative(ifp)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - break; - case NEXTHOP_TYPE_IPV4: - case NEXTHOP_TYPE_IPV4_IFINDEX: - family = AFI_IP; - if (nexthop_active(AFI_IP, re, nexthop, rn)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - break; - case NEXTHOP_TYPE_IPV6: - family = AFI_IP6; - if (nexthop_active(AFI_IP6, re, nexthop, rn)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - break; - case NEXTHOP_TYPE_IPV6_IFINDEX: - /* RFC 5549, v4 prefix with v6 NH */ - if (rn->p.family != AF_INET) - family = AFI_IP6; - if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) { - ifp = if_lookup_by_index(nexthop->ifindex, - nexthop->vrf_id); - if (ifp && if_is_operative(ifp)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - } else { - if (nexthop_active(AFI_IP6, re, nexthop, rn)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - } - break; - case NEXTHOP_TYPE_BLACKHOLE: - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - break; - default: - break; - } - if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug("\t%s: Unable to find a active nexthop", - __PRETTY_FUNCTION__); - return 0; - } - - /* XXX: What exactly do those checks do? Do we support - * e.g. IPv4 routes with IPv6 nexthops or vice versa? - */ - if (RIB_SYSTEM_ROUTE(re) || (family == AFI_IP && p->family != AF_INET) - || (family == AFI_IP6 && p->family != AF_INET6)) - return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - - /* The original code didn't determine the family correctly - * e.g. for NEXTHOP_TYPE_IFINDEX. Retrieve the correct afi - * from the rib_table_info in those cases. - * Possibly it may be better to use only the rib_table_info - * in every case. - */ - if (!family) { - rib_table_info_t *info; - - info = srcdest_rnode_table_info(rn); - family = info->afi; - } - - memset(&nexthop->rmap_src.ipv6, 0, sizeof(union g_addr)); - - zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id); - if (!zvrf) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug("\t%s: zvrf is NULL", __PRETTY_FUNCTION__); - return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - } - - /* It'll get set if required inside */ - ret = zebra_route_map_check(family, re->type, re->instance, p, - nexthop, zvrf, re->tag); - if (ret == RMAP_DENYMATCH) { - if (IS_ZEBRA_DEBUG_RIB) { - srcdest_rnode2str(rn, buf, sizeof(buf)); - zlog_debug( - "%u:%s: Filtering out with NH out %s due to route map", - re->vrf_id, buf, - ifindex2ifname(nexthop->ifindex, - nexthop->vrf_id)); - } - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - } - return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); -} - -/* - * Iterate over all nexthops of the given RIB entry and refresh their - * ACTIVE flag. re->nexthop_active_num is updated accordingly. If any - * nexthop is found to toggle the ACTIVE flag, the whole re structure - * is flagged with ROUTE_ENTRY_CHANGED. - * - * Return value is the new number of active nexthops. - */ -static int nexthop_active_update(struct route_node *rn, struct route_entry *re) -{ - struct nexthop *nexthop; - union g_addr prev_src; - unsigned int prev_active, new_active; - ifindex_t prev_index; - - re->nexthop_active_num = 0; - UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); - - for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) { - /* No protocol daemon provides src and so we're skipping - * tracking it */ - prev_src = nexthop->rmap_src; - prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - prev_index = nexthop->ifindex; - /* - * We need to respect the multipath_num here - * as that what we should be able to install from - * a multipath perpsective should not be a data plane - * decision point. - */ - new_active = nexthop_active_check(rn, re, nexthop); - if (new_active - && re->nexthop_active_num >= zrouter.multipath_num) { - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - new_active = 0; - } - if (new_active) - re->nexthop_active_num++; - /* Don't allow src setting on IPv6 addr for now */ - if (prev_active != new_active || prev_index != nexthop->ifindex - || ((nexthop->type >= NEXTHOP_TYPE_IFINDEX - && nexthop->type < NEXTHOP_TYPE_IPV6) - && prev_src.ipv4.s_addr - != nexthop->rmap_src.ipv4.s_addr) - || ((nexthop->type >= NEXTHOP_TYPE_IPV6 - && nexthop->type < NEXTHOP_TYPE_BLACKHOLE) - && !(IPV6_ADDR_SAME(&prev_src.ipv6, - &nexthop->rmap_src.ipv6))) - || CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED)) { - SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); - SET_FLAG(re->status, ROUTE_ENTRY_NEXTHOPS_CHANGED); - } - } - - return re->nexthop_active_num; -} - /* * Is this RIB labeled-unicast? It must be of type BGP and all paths * (nexthops) must have a label. @@ -1062,8 +587,25 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re, switch (ret) { case ZEBRA_DPLANE_REQUEST_QUEUED: SET_FLAG(re->status, ROUTE_ENTRY_QUEUED); - if (old) + + if (old) { SET_FLAG(old->status, ROUTE_ENTRY_QUEUED); + + /* Free old FIB nexthop group */ + if (old->fib_ng.nexthop) { + nexthops_free(old->fib_ng.nexthop); + old->fib_ng.nexthop = NULL; + } + + if (!RIB_SYSTEM_ROUTE(old)) { + /* Clear old route's FIB flags */ + for (ALL_NEXTHOPS(old->ng, nexthop)) { + UNSET_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB); + } + } + } + if (zvrf) zvrf->installs_queued++; break; @@ -1149,6 +691,12 @@ static void rib_uninstall(struct route_node *rn, struct route_entry *re) dest->selected_fib = NULL; + /* Free FIB nexthop group, if present */ + if (re->fib_ng.nexthop) { + nexthops_free(re->fib_ng.nexthop); + re->fib_ng.nexthop = NULL; + } + for (ALL_NEXTHOPS(re->ng, nexthop)) UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); } @@ -1841,21 +1389,239 @@ static void zebra_rib_fixup_system(struct route_node *rn) } /* - * Route-update results processing after async dataplane update. + * Update a route from a dplane context. This consolidates common code + * that can be used in processing of results from FIB updates, and in + * async notification processing. + * The return is 'true' if the installed nexthops changed; 'false' otherwise. */ -static void rib_process_result(struct zebra_dplane_ctx *ctx) +static bool rib_update_re_from_ctx(struct route_entry *re, + struct route_node *rn, + struct zebra_dplane_ctx *ctx) +{ + char dest_str[PREFIX_STRLEN] = ""; + char nh_str[NEXTHOP_STRLEN]; + struct nexthop *nexthop, *ctx_nexthop; + bool matched; + const struct nexthop_group *ctxnhg; + bool is_selected = false; /* Is 're' currently the selected re? */ + bool changed_p = false; /* Change to nexthops? */ + rib_dest_t *dest; + + /* Note well: only capturing the prefix string if debug is enabled here; + * unconditional log messages will have to generate the string. + */ + if (IS_ZEBRA_DEBUG_RIB) + prefix2str(&(rn->p), dest_str, sizeof(dest_str)); + + dest = rib_dest_from_rnode(rn); + if (dest) + is_selected = (re == dest->selected_fib); + + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("update_from_ctx: %u:%s: %sSELECTED", + re->vrf_id, dest_str, (is_selected ? "" : "NOT ")); + + /* Update zebra's nexthop FIB flag for each nexthop that was installed. + * If the installed set differs from the set requested by the rib/owner, + * we use the fib-specific nexthop-group to record the actual FIB + * status. + */ + + /* + * First check the fib nexthop-group, if it's present. The comparison + * here is quite strict: we require that the fib sets match exactly. + */ + matched = false; + do { + if (re->fib_ng.nexthop == NULL) + break; + + matched = true; + + /* First check the route's fib nexthops */ + for (ALL_NEXTHOPS(re->fib_ng, nexthop)) { + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + ctx_nexthop = NULL; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), + ctx_nexthop)) { + if (nexthop_same(ctx_nexthop, nexthop)) + break; + } + + if (ctx_nexthop == NULL) { + /* Nexthop not in the new installed set */ + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + nexthop2str(nexthop, nh_str, + sizeof(nh_str)); + zlog_debug("update_from_ctx: no match for fib nh %s", + nh_str); + } + + matched = false; + break; + } + } + + if (!matched) + break; + + /* Check the new installed set */ + ctx_nexthop = NULL; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), ctx_nexthop)) { + + if (CHECK_FLAG(ctx_nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) + continue; + + /* Compare with the current group's nexthops */ + nexthop = NULL; + for (ALL_NEXTHOPS(re->fib_ng, nexthop)) { + if (nexthop_same(nexthop, ctx_nexthop)) + break; + } + + if (nexthop == NULL) { + /* Nexthop not in the old installed set */ + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + nexthop2str(ctx_nexthop, nh_str, + sizeof(nh_str)); + zlog_debug("update_from_ctx: no fib match for notif nh %s", + nh_str); + } + matched = false; + break; + } + } + + } while (0); + + /* If the new FIB set matches the existing FIB set, we're done. */ + if (matched) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%u:%s update_from_ctx(): existing fib nhg, no change", + re->vrf_id, dest_str); + goto done; + + } else if (re->fib_ng.nexthop) { + /* + * Free stale fib list and move on to check the rib nhg. + */ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%u:%s update_from_ctx(): replacing fib nhg", + re->vrf_id, dest_str); + nexthops_free(re->fib_ng.nexthop); + re->fib_ng.nexthop = NULL; + + /* Note that the installed nexthops have changed */ + changed_p = true; + } else { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%u:%s update_from_ctx(): no fib nhg", + re->vrf_id, dest_str); + } + + /* + * Compare with the rib nexthop group. The comparison here is different: + * the RIB group may be a superset of the list installed in the FIB. We + * walk the RIB group, looking for the 'installable' candidate + * nexthops, and then check those against the set + * that is actually installed. + */ + matched = true; + for (ALL_NEXTHOPS(re->ng, nexthop)) { + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + continue; + + /* Check for a FIB nexthop corresponding to the RIB nexthop */ + ctx_nexthop = NULL; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), ctx_nexthop)) { + if (nexthop_same(ctx_nexthop, nexthop)) + break; + } + + /* If the FIB doesn't know about the nexthop, + * it's not installed + */ + if (ctx_nexthop == NULL) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) { + nexthop2str(nexthop, nh_str, sizeof(nh_str)); + zlog_debug("update_from_ctx: no notif match for rib nh %s", + nh_str); + } + matched = false; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + changed_p = true; + + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + break; + } + + if (CHECK_FLAG(ctx_nexthop->flags, NEXTHOP_FLAG_FIB)) { + if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + changed_p = true; + + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } else { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + changed_p = true; + + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } + } + + /* If all nexthops were processed, we're done */ + if (matched) { + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%u:%s update_from_ctx(): rib nhg matched, changed '%s'", + re->vrf_id, dest_str, + (changed_p ? "true" : "false")); + goto done; + } + + /* FIB nexthop set differs from the RIB set: + * create a fib-specific nexthop-group + */ + if (IS_ZEBRA_DEBUG_RIB) + zlog_debug("%u:%s update_from_ctx(): changed %s, adding new fib nhg", + re->vrf_id, dest_str, + (changed_p ? "true" : "false")); + + ctxnhg = dplane_ctx_get_ng(ctx); + + if (ctxnhg->nexthop) + copy_nexthops(&(re->fib_ng.nexthop), ctxnhg->nexthop, NULL); + else { + /* Bit of a special case when the fib has _no_ installed + * nexthops. + */ + nexthop = nexthop_new(); + nexthop->type = NEXTHOP_TYPE_IPV4; + nexthop_add(&(re->fib_ng.nexthop), nexthop); + } + +done: + return changed_p; +} + +/* + * Helper to locate a zebra route-node from a dplane context. This is used + * when processing dplane results, e.g. Note well: the route-node is returned + * with a ref held - route_unlock_node() must be called eventually. + */ +static struct route_node * +rib_find_rn_from_ctx(const struct zebra_dplane_ctx *ctx) { struct route_table *table = NULL; - struct zebra_vrf *zvrf = NULL; struct route_node *rn = NULL; - struct route_entry *re = NULL, *old_re = NULL, *rib; - bool is_update = false; - struct nexthop *nexthop, *ctx_nexthop; - char dest_str[PREFIX_STRLEN] = ""; - enum dplane_op_e op; - enum zebra_dplane_result status; const struct prefix *dest_pfx, *src_pfx; - uint32_t seq; /* Locate rn and re(s) from ctx */ @@ -1865,7 +1631,7 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) dplane_ctx_get_table(ctx)); if (table == NULL) { if (IS_ZEBRA_DEBUG_DPLANE) { - zlog_debug("Failed to process dplane results: no table for afi %d, safi %d, vrf %u", + zlog_debug("Failed to find route for ctx: no table for afi %d, safi %d, vrf %u", dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx), dplane_ctx_get_vrf(ctx)); @@ -1873,8 +1639,35 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) goto done; } - zvrf = vrf_info_lookup(dplane_ctx_get_vrf(ctx)); + dest_pfx = dplane_ctx_get_dest(ctx); + src_pfx = dplane_ctx_get_src(ctx); + + rn = srcdest_rnode_get(table, dest_pfx, + src_pfx ? (struct prefix_ipv6 *)src_pfx : NULL); + +done: + return rn; +} + + + +/* + * Route-update results processing after async dataplane update. + */ +static void rib_process_result(struct zebra_dplane_ctx *ctx) +{ + struct zebra_vrf *zvrf = NULL; + struct route_node *rn = NULL; + struct route_entry *re = NULL, *old_re = NULL, *rib; + bool is_update = false; + char dest_str[PREFIX_STRLEN] = ""; + enum dplane_op_e op; + enum zebra_dplane_result status; + const struct prefix *dest_pfx, *src_pfx; + uint32_t seq; + bool fib_changed = false; + zvrf = vrf_info_lookup(dplane_ctx_get_vrf(ctx)); dest_pfx = dplane_ctx_get_dest(ctx); /* Note well: only capturing the prefix string if debug is enabled here; @@ -1883,9 +1676,8 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) if (IS_ZEBRA_DEBUG_DPLANE) prefix2str(dest_pfx, dest_str, sizeof(dest_str)); - src_pfx = dplane_ctx_get_src(ctx); - rn = srcdest_rnode_get(table, dplane_ctx_get_dest(ctx), - src_pfx ? (struct prefix_ipv6 *)src_pfx : NULL); + /* Locate rn and re(s) from ctx */ + rn = rib_find_rn_from_ctx(ctx); if (rn == NULL) { if (IS_ZEBRA_DEBUG_DPLANE) { zlog_debug("Failed to process dplane results: no route for %u:%s", @@ -1979,34 +1771,25 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) UNSET_FLAG(old_re->status, ROUTE_ENTRY_INSTALLED); } - /* Update zebra nexthop FIB flag for each - * nexthop that was installed. - */ - for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), - ctx_nexthop)) { - - if (!re) - continue; - for (ALL_NEXTHOPS(re->ng, nexthop)) { - if (nexthop_same(ctx_nexthop, nexthop)) - break; + /* Update zebra route based on the results in + * the context struct. + */ + if (re) { + fib_changed = + rib_update_re_from_ctx(re, rn, ctx); + + if (!fib_changed) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%u:%s no fib change for re", + dplane_ctx_get_vrf( + ctx), + dest_str); } - if (nexthop == NULL) - continue; - - if (CHECK_FLAG(nexthop->flags, - NEXTHOP_FLAG_RECURSIVE)) - continue; - - if (CHECK_FLAG(ctx_nexthop->flags, - NEXTHOP_FLAG_FIB)) - SET_FLAG(nexthop->flags, - NEXTHOP_FLAG_FIB); - else - UNSET_FLAG(nexthop->flags, - NEXTHOP_FLAG_FIB); + /* Redistribute */ + redistribute_update(dest_pfx, src_pfx, + re, NULL); } /* @@ -2023,19 +1806,6 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) if (zvrf) zvrf->installs++; - /* Redistribute */ - /* - * TODO -- still calling the redist api using the - * route_entries, and there's a corner-case here: - * if there's no client for the 'new' route, a redist - * deleting the 'old' route will be sent. But if the - * 'old' context info was stale, 'old_re' will be - * NULL here and that delete will not be sent. - */ - if (re) - redistribute_update(dest_pfx, src_pfx, - re, old_re); - /* Notify route owner */ zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_INSTALLED); @@ -2110,6 +1880,179 @@ done: dplane_ctx_fini(&ctx); } +/* + * Handle notification from async dataplane: the dataplane has detected + * some change to a route, and notifies zebra so that the control plane + * can reflect that change. + */ +static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx) +{ + struct route_node *rn = NULL; + struct route_entry *re = NULL; + struct nexthop *nexthop; + char dest_str[PREFIX_STRLEN] = ""; + const struct prefix *dest_pfx, *src_pfx; + rib_dest_t *dest; + bool fib_changed = false; + bool debug_p = IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_RIB; + int start_count, end_count; + dest_pfx = dplane_ctx_get_dest(ctx); + + /* Note well: only capturing the prefix string if debug is enabled here; + * unconditional log messages will have to generate the string. + */ + if (debug_p) + prefix2str(dest_pfx, dest_str, sizeof(dest_str)); + + /* Locate rn and re(s) from ctx */ + rn = rib_find_rn_from_ctx(ctx); + if (rn == NULL) { + if (debug_p) { + zlog_debug("Failed to process dplane notification: no routes for %u:%s", + dplane_ctx_get_vrf(ctx), dest_str); + } + goto done; + } + + dest = rib_dest_from_rnode(rn); + srcdest_rnode_prefixes(rn, &dest_pfx, &src_pfx); + + if (debug_p) + zlog_debug("%u:%s Processing dplane notif ctx %p", + dplane_ctx_get_vrf(ctx), dest_str, ctx); + + /* + * Take a pass through the routes, look for matches with the context + * info. + */ + RNODE_FOREACH_RE(rn, re) { + if (rib_route_match_ctx(re, ctx, false /*!update*/)) + break; + } + + /* No match? Nothing we can do */ + if (re == NULL) { + if (debug_p) + zlog_debug("%u:%s Unable to process dplane notification: no entry for type %s", + dplane_ctx_get_vrf(ctx), dest_str, + zebra_route_string( + dplane_ctx_get_type(ctx))); + + goto done; + } + + /* Is this a notification that ... matters? We only really care about + * the route that is currently selected for installation. + */ + if (re != dest->selected_fib) { + /* TODO -- don't skip processing entirely? We might like to + * at least report on the event. + */ + if (debug_p) + zlog_debug("%u:%s dplane notif, but type %s not selected_fib", + dplane_ctx_get_vrf(ctx), dest_str, + zebra_route_string( + dplane_ctx_get_type(ctx))); + goto done; + } + + /* We'll want to determine whether the installation status of the + * route has changed: we'll check the status before processing, + * and then again if there's been a change. + */ + start_count = 0; + for (ALL_NEXTHOPS_PTR(rib_active_nhg(re), nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + start_count++; + } + + /* Update zebra's nexthop FIB flags based on the context struct's + * nexthops. + */ + fib_changed = rib_update_re_from_ctx(re, rn, ctx); + + if (!fib_changed) { + if (debug_p) + zlog_debug("%u:%s No change from dplane notification", + dplane_ctx_get_vrf(ctx), dest_str); + + goto done; + } + + /* + * Perform follow-up work if the actual status of the prefix + * changed. + */ + + end_count = 0; + for (ALL_NEXTHOPS_PTR(rib_active_nhg(re), nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) + end_count++; + } + + /* Various fib transitions: changed nexthops; from installed to + * not-installed; or not-installed to installed. + */ + if (start_count > 0 && end_count > 0) { + + /* Changed nexthops - update kernel/others */ + dplane_route_notif_update(rn, re, + DPLANE_OP_ROUTE_UPDATE, ctx); + + } else if (start_count == 0 && end_count > 0) { + if (debug_p) + zlog_debug("%u:%s installed transition from dplane notification", + dplane_ctx_get_vrf(ctx), dest_str); + + /* We expect this to be the selected route, so we want + * to tell others about this transistion. + */ + SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + + /* Changed nexthops - update kernel/others */ + dplane_route_notif_update(rn, re, DPLANE_OP_ROUTE_INSTALL, ctx); + + /* Redistribute, lsp, and nht update */ + redistribute_update(dest_pfx, src_pfx, re, NULL); + + zebra_rib_evaluate_rn_nexthops( + rn, zebra_router_get_next_sequence()); + + zebra_rib_evaluate_mpls(rn); + + } else if (start_count > 0 && end_count == 0) { + if (debug_p) + zlog_debug("%u:%s un-installed transition from dplane notification", + dplane_ctx_get_vrf(ctx), dest_str); + + /* Transition from _something_ installed to _nothing_ + * installed. + */ + /* We expect this to be the selected route, so we want + * to tell others about this transistion. + */ + UNSET_FLAG(re->status, ROUTE_ENTRY_INSTALLED); + + /* Changed nexthops - update kernel/others */ + dplane_route_notif_update(rn, re, DPLANE_OP_ROUTE_DELETE, ctx); + + /* Redistribute, lsp, and nht update */ + redistribute_delete(dest_pfx, src_pfx, re); + + zebra_rib_evaluate_rn_nexthops( + rn, zebra_router_get_next_sequence()); + + zebra_rib_evaluate_mpls(rn); + } + +done: + if (rn) + route_unlock_node(rn); + + /* Return context to dataplane module */ + dplane_ctx_fini(&ctx); +} + /* Take a list of route_node structs and return 1, if there was a record * picked from it and processed by rib_process(). Don't process more, * than one RN record; operate only in the specified sub-queue. @@ -2133,6 +2076,7 @@ static unsigned int process_subq(struct list *subq, uint8_t qindex) if (IS_ZEBRA_DEBUG_RIB_DETAILED) { char buf[SRCDEST2STR_BUFFER]; + srcdest_rnode2str(rnode, buf, sizeof(buf)); zlog_debug("%u:%s: rn %p dequeued from sub-queue %u", zvrf ? zvrf_id(zvrf) : 0, buf, rnode, qindex); @@ -2468,6 +2412,8 @@ void rib_unlink(struct route_node *rn, struct route_entry *re) dest->selected_fib = NULL; nexthops_free(re->ng.nexthop); + nexthops_free(re->fib_ng.nexthop); + XFREE(MTYPE_RE, re); } @@ -3331,13 +3277,40 @@ static int rib_process_dplane_results(struct thread *thread) case DPLANE_OP_ROUTE_INSTALL: case DPLANE_OP_ROUTE_UPDATE: case DPLANE_OP_ROUTE_DELETE: - rib_process_result(ctx); + { + /* Bit of special case for route updates + * that were generated by async notifications: + * we don't want to continue processing these + * in the rib. + */ + if (dplane_ctx_get_notif_provider(ctx) == 0) + rib_process_result(ctx); + else + dplane_ctx_fini(&ctx); + } + break; + + case DPLANE_OP_ROUTE_NOTIFY: + rib_process_dplane_notify(ctx); break; case DPLANE_OP_LSP_INSTALL: case DPLANE_OP_LSP_UPDATE: case DPLANE_OP_LSP_DELETE: - zebra_mpls_lsp_dplane_result(ctx); + { + /* Bit of special case for LSP updates + * that were generated by async notifications: + * we don't want to continue processing these. + */ + if (dplane_ctx_get_notif_provider(ctx) == 0) + zebra_mpls_lsp_dplane_result(ctx); + else + dplane_ctx_fini(&ctx); + } + break; + + case DPLANE_OP_LSP_NOTIFY: + zebra_mpls_process_dplane_notify(ctx); break; case DPLANE_OP_PW_INSTALL: |
