diff options
| author | Rafael Zalamena <rzalamena@users.noreply.github.com> | 2024-12-16 09:57:31 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-16 09:57:31 -0300 |
| commit | 3bebb7be9204a51f194317438aa883f1b10c8646 (patch) | |
| tree | 6e4e508a35f6d03491661b25b645008d8ae7dd5a /pimd/pim_nht.c | |
| parent | 30467f8f2b3a671973aa4b0aa2bcc1e3a208cc56 (diff) | |
| parent | 06c3436a12226d1f7e18e549f562ba9ecde4b394 (diff) | |
Merge pull request #17252 from nabahr/mcast-mode
Fix PIMD RPF lookup mode and nexthop tracking
Diffstat (limited to 'pimd/pim_nht.c')
| -rw-r--r-- | pimd/pim_nht.c | 1314 |
1 files changed, 796 insertions, 518 deletions
diff --git a/pimd/pim_nht.c b/pimd/pim_nht.c index f2dbfa9765..00ab46b4cd 100644 --- a/pimd/pim_nht.c +++ b/pimd/pim_nht.c @@ -38,118 +38,267 @@ * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister * command to Zebra. */ -void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient, - struct pim_nexthop_cache *pnc, int command) +static void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient, pim_addr addr, + int command) { struct prefix p; int ret; - pim_addr_to_prefix(&p, pnc->rpf.rpf_addr); - ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false, - pim->vrf->vrf_id); + pim_addr_to_prefix(&p, addr); + + /* Register to track nexthops from the MRIB */ + ret = zclient_send_rnh(zclient, command, &p, SAFI_MULTICAST, false, false, pim->vrf->vrf_id); + if (ret == ZCLIENT_SEND_FAILURE) + zlog_warn( + "sendmsg_nexthop: zclient_send_message() failed registering MRIB tracking"); + + if (PIM_DEBUG_PIM_NHT) + zlog_debug("%s: MRIB NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", __func__, + (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, pim->vrf->name, + ret); + + /* Also register to track nexthops from the URIB */ + ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false, pim->vrf->vrf_id); if (ret == ZCLIENT_SEND_FAILURE) - zlog_warn("sendmsg_nexthop: zclient_send_message() failed"); + zlog_warn( + "sendmsg_nexthop: zclient_send_message() failed registering URIB tracking"); if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", - __func__, - (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, - pim->vrf->name, ret); + zlog_debug("%s: URIB NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", __func__, + (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, pim->vrf->name, + ret); return; } -struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim, - struct pim_rpf *rpf) +static struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim, pim_addr addr) { struct pim_nexthop_cache *pnc = NULL; struct pim_nexthop_cache lookup; - lookup.rpf.rpf_addr = rpf->rpf_addr; - pnc = hash_lookup(pim->rpf_hash, &lookup); + lookup.addr = addr; + pnc = hash_lookup(pim->nht_hash, &lookup); return pnc; } -static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim, - struct pim_rpf *rpf_addr) +static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim, pim_addr addr) { struct pim_nexthop_cache *pnc; char hash_name[64]; - pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE, - sizeof(struct pim_nexthop_cache)); - pnc->rpf.rpf_addr = rpf_addr->rpf_addr; + /* This function is only ever called if we are unable to find an entry, so + * the hash_get should always add a new entry + */ + pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE, sizeof(struct pim_nexthop_cache)); + pnc->addr = addr; - pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern); + pnc = hash_get(pim->nht_hash, pnc, hash_alloc_intern); pnc->rp_list = list_new(); pnc->rp_list->cmp = pim_rp_list_cmp; - snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash", - &pnc->rpf.rpf_addr, pim->vrf->name); - pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key, - pim_upstream_equal, hash_name); + snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash", &pnc->addr, + pim->vrf->name); + pnc->upstream_hash = hash_create_size(32, pim_upstream_hash_key, pim_upstream_equal, + hash_name); return pnc; } -static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim, - pim_addr addr) +static bool pim_nht_pnc_has_answer(struct pim_instance *pim, struct pim_nexthop_cache *pnc) +{ + switch (pim->rpf_mode) { + case MCAST_MRIB_ONLY: + return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_ANSWER_RECEIVED); + + case MCAST_URIB_ONLY: + return CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_ANSWER_RECEIVED); + + case MCAST_MIX_MRIB_FIRST: + case MCAST_NO_CONFIG: + case MCAST_MIX_DISTANCE: + case MCAST_MIX_PFXLEN: + /* This check is to determine if we've received an answer necessary to make a NH decision. + * For the mixed modes, where we may lookup from MRIB or URIB, let's require an answer + * for both tables. + */ + return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_ANSWER_RECEIVED) && + CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_ANSWER_RECEIVED); + + default: + break; + } + return false; +} + +static struct pim_nexthop_cache_rib *pim_pnc_get_rib(struct pim_instance *pim, + struct pim_nexthop_cache *pnc) +{ + struct pim_nexthop_cache_rib *pnc_rib = NULL; + + if (pim->rpf_mode == MCAST_MRIB_ONLY) + pnc_rib = &pnc->mrib; + else if (pim->rpf_mode == MCAST_URIB_ONLY) + pnc_rib = &pnc->urib; + else if (pim->rpf_mode == MCAST_MIX_MRIB_FIRST || pim->rpf_mode == MCAST_NO_CONFIG) { + if (pnc->mrib.nexthop_num > 0) + pnc_rib = &pnc->mrib; + else + pnc_rib = &pnc->urib; + } else if (pim->rpf_mode == MCAST_MIX_DISTANCE) { + if (pnc->mrib.distance <= pnc->urib.distance) + pnc_rib = &pnc->mrib; + else + pnc_rib = &pnc->urib; + } else if (pim->rpf_mode == MCAST_MIX_PFXLEN) { + if (pnc->mrib.prefix_len >= pnc->urib.prefix_len) + pnc_rib = &pnc->mrib; + else + pnc_rib = &pnc->urib; + } + + return pnc_rib; +} + +bool pim_nht_pnc_is_valid(struct pim_instance *pim, struct pim_nexthop_cache *pnc) +{ + switch (pim->rpf_mode) { + case MCAST_MRIB_ONLY: + return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_VALID); + + case MCAST_URIB_ONLY: + return CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_VALID); + + case MCAST_MIX_MRIB_FIRST: + case MCAST_NO_CONFIG: + case MCAST_MIX_DISTANCE: + case MCAST_MIX_PFXLEN: + /* The valid flag is set if there are nexthops...so when doing mixed, mrib might not have + * any nexthops, so consider valid if at least one RIB is valid + */ + return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_VALID) || + CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_VALID); + + default: + break; + } + return false; +} + +struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim, pim_addr addr) { struct pim_nexthop_cache *pnc = NULL; - struct pim_rpf rpf; struct zclient *zclient = NULL; zclient = pim_zebra_zclient_get(); - memset(&rpf, 0, sizeof(rpf)); - rpf.rpf_addr = addr; + pnc = pim_nexthop_cache_find(pim, addr); - pnc = pim_nexthop_cache_find(pim, &rpf); - if (!pnc) { - pnc = pim_nexthop_cache_add(pim, &rpf); - pim_sendmsg_zebra_rnh(pim, zclient, pnc, - ZEBRA_NEXTHOP_REGISTER); - if (PIM_DEBUG_PIM_NHT_DETAIL) - zlog_debug( - "%s: NHT cache and zebra notification added for %pPA(%s)", - __func__, &addr, pim->vrf->name); - } + if (pnc) + return pnc; + + pnc = pim_nexthop_cache_add(pim, addr); + pim_sendmsg_zebra_rnh(pim, zclient, pnc->addr, ZEBRA_NEXTHOP_REGISTER); + + if (PIM_DEBUG_PIM_NHT_DETAIL) + zlog_debug("%s: NHT cache and zebra notification added for %pPA(%s)", __func__, + &addr, pim->vrf->name); return pnc; } -/* TBD: this does several distinct things and should probably be split up. - * (checking state vs. returning pnc vs. adding upstream vs. adding rp) +void pim_nht_set_gateway(struct pim_instance *pim, struct pim_nexthop_cache *pnc, pim_addr addr, + struct interface *ifp) +{ + struct nexthop *nh_node = NULL; + struct interface *ifp1 = NULL; + + for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) { + /* If the gateway is already set, then keep it */ +#if PIM_IPV == 4 + if (!pim_addr_is_any(nh_node->gate.ipv4)) + continue; +#else + if (!pim_addr_is_any(nh_node->gate.ipv6)) + continue; +#endif + + /* Only set gateway on the correct interface */ + ifp1 = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); + if (ifp != ifp1) + continue; + + /* Update the gateway address with the given address */ +#if PIM_IPV == 4 + nh_node->gate.ipv4 = addr; +#else + nh_node->gate.ipv6 = addr; +#endif + if (PIM_DEBUG_PIM_NHT_RP) + zlog_debug("%s: addr %pPA new MRIB nexthop addr %pPAs interface %s", + __func__, &pnc->addr, &addr, ifp1->name); + } + + /* Now do the same with URIB nexthop entries */ + for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) { +#if PIM_IPV == 4 + if (!pim_addr_is_any(nh_node->gate.ipv4)) + continue; +#else + if (!pim_addr_is_any(nh_node->gate.ipv6)) + continue; +#endif + + ifp1 = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); + + if (ifp != ifp1) + continue; + +#if PIM_IPV == 4 + nh_node->gate.ipv4 = addr; +#else + nh_node->gate.ipv6 = addr; +#endif + if (PIM_DEBUG_PIM_NHT_RP) + zlog_debug("%s: addr %pPA new URIB nexthop addr %pPAs interface %s", + __func__, &pnc->addr, &addr, ifp1->name); + } +} + +/* Finds the nexthop cache entry for the given address. If no cache, add it for tracking. + * Up and/or rp may be given to add to the nexthop cache entry so that they get updates when the nexthop changes + * If out_pnc is not null, then copy the nexthop cache entry to it. + * Return true if an entry was found and is valid. */ -int pim_find_or_track_nexthop(struct pim_instance *pim, pim_addr addr, - struct pim_upstream *up, struct rp_info *rp, - struct pim_nexthop_cache *out_pnc) +bool pim_nht_find_or_track(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up, + struct rp_info *rp, struct pim_nexthop_cache *out_pnc) { struct pim_nexthop_cache *pnc; struct listnode *ch_node = NULL; + /* This will find the entry and add it to tracking if not found */ pnc = pim_nht_get(pim, addr); assertf(up || rp, "addr=%pPA", &addr); + /* Store the RP if provided and not currently in the list */ if (rp != NULL) { ch_node = listnode_lookup(pnc->rp_list, rp); if (ch_node == NULL) listnode_add_sort(pnc->rp_list, rp); } + /* Store the upstream if provided and not currently in the list */ if (up != NULL) (void)hash_get(pnc->upstream_hash, up, hash_alloc_intern); - if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) { + if (pim_nht_pnc_is_valid(pim, pnc)) { if (out_pnc) memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache)); - return 1; + return true; } - return 0; + return false; } void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr) @@ -157,7 +306,6 @@ void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr) struct pim_nexthop_cache *pnc; pnc = pim_nht_get(pim, addr); - pnc->bsr_count++; } @@ -166,47 +314,47 @@ bool pim_nht_candrp_add(struct pim_instance *pim, pim_addr addr) struct pim_nexthop_cache *pnc; pnc = pim_nht_get(pim, addr); - pnc->candrp_count++; - return CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID); + return pim_nht_pnc_is_valid(pim, pnc); } -static void pim_nht_drop_maybe(struct pim_instance *pim, - struct pim_nexthop_cache *pnc) +static void pim_nht_drop_maybe(struct pim_instance *pim, struct pim_nexthop_cache *pnc) { if (PIM_DEBUG_PIM_NHT) zlog_debug("%s: NHT %pPA(%s) rp_list count:%d upstream count:%ld BSR count:%u Cand-RP count:%u", - __func__, &pnc->rpf.rpf_addr, pim->vrf->name, - pnc->rp_list->count, pnc->upstream_hash->count, - pnc->bsr_count, pnc->candrp_count); + __func__, &pnc->addr, pim->vrf->name, pnc->rp_list->count, + pnc->upstream_hash->count, pnc->bsr_count, pnc->candrp_count); - if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0 && - pnc->bsr_count == 0 && pnc->candrp_count == 0) { + if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0 && pnc->bsr_count == 0 && + pnc->candrp_count == 0) { struct zclient *zclient = pim_zebra_zclient_get(); - pim_sendmsg_zebra_rnh(pim, zclient, pnc, - ZEBRA_NEXTHOP_UNREGISTER); + pim_sendmsg_zebra_rnh(pim, zclient, pnc->addr, ZEBRA_NEXTHOP_UNREGISTER); list_delete(&pnc->rp_list); + hash_free(pnc->upstream_hash); + hash_release(pim->nht_hash, pnc); + + if (pnc->urib.nexthop) + nexthops_free(pnc->urib.nexthop); + if (pnc->mrib.nexthop) + nexthops_free(pnc->mrib.nexthop); - hash_release(pim->rpf_hash, pnc); - if (pnc->nexthop) - nexthops_free(pnc->nexthop); XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc); } } -void pim_delete_tracked_nexthop(struct pim_instance *pim, pim_addr addr, - struct pim_upstream *up, struct rp_info *rp) +void pim_nht_delete_tracked(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up, + struct rp_info *rp) { struct pim_nexthop_cache *pnc = NULL; struct pim_nexthop_cache lookup; struct pim_upstream *upstream = NULL; /* Remove from RPF hash if it is the last entry */ - lookup.rpf.rpf_addr = addr; - pnc = hash_lookup(pim->rpf_hash, &lookup); + lookup.addr = addr; + pnc = hash_lookup(pim->nht_hash, &lookup); if (!pnc) { zlog_warn("attempting to delete nonexistent NHT entry %pPA", &addr); @@ -251,9 +399,9 @@ void pim_nht_bsr_del(struct pim_instance *pim, pim_addr addr) if (pim_addr_is_any(addr)) return; - lookup.rpf.rpf_addr = addr; + lookup.addr = addr; - pnc = hash_lookup(pim->rpf_hash, &lookup); + pnc = hash_lookup(pim->nht_hash, &lookup); if (!pnc) { zlog_warn("attempting to delete nonexistent NHT BSR entry %pPA", @@ -272,9 +420,9 @@ void pim_nht_candrp_del(struct pim_instance *pim, pim_addr addr) struct pim_nexthop_cache *pnc = NULL; struct pim_nexthop_cache lookup; - lookup.rpf.rpf_addr = addr; + lookup.addr = addr; - pnc = hash_lookup(pim->rpf_hash, &lookup); + pnc = hash_lookup(pim->nht_hash, &lookup); if (!pnc) { zlog_warn("attempting to delete nonexistent NHT C-RP entry %pPA", @@ -297,10 +445,10 @@ bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr, struct nexthop *nh; struct interface *ifp; - lookup.rpf.rpf_addr = bsr_addr; + lookup.addr = bsr_addr; - pnc = hash_lookup(pim->rpf_hash, &lookup); - if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) { + pnc = hash_lookup(pim->nht_hash, &lookup); + if (!pnc || !pim_nht_pnc_has_answer(pim, pnc)) { /* BSM from a new freshly registered BSR - do a synchronous * zebra query since otherwise we'd drop the first packet, * leading to additional delay in picking up BSM data @@ -359,91 +507,92 @@ bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr, return false; } - if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) - return false; - - /* if we accept BSMs from more than one ECMP nexthop, this will cause - * BSM message "multiplication" for each ECMP hop. i.e. if you have - * 4-way ECMP and 4 hops you end up with 256 copies of each BSM - * message. - * - * so... only accept the first (IPv4) valid nexthop as source. - */ + if (pim_nht_pnc_is_valid(pim, pnc)) { + /* if we accept BSMs from more than one ECMP nexthop, this will cause + * BSM message "multiplication" for each ECMP hop. i.e. if you have + * 4-way ECMP and 4 hops you end up with 256 copies of each BSM + * message. + * + * so... only accept the first (IPv4) valid nexthop as source. + */ + struct pim_nexthop_cache_rib *rib = pim_pnc_get_rib(pim, pnc); - for (nh = pnc->nexthop; nh; nh = nh->next) { - pim_addr nhaddr; + for (nh = rib->nexthop; nh; nh = nh->next) { + pim_addr nhaddr; - switch (nh->type) { + switch (nh->type) { #if PIM_IPV == 4 - case NEXTHOP_TYPE_IPV4: - if (nh->ifindex == IFINDEX_INTERNAL) - continue; + case NEXTHOP_TYPE_IPV4: + if (nh->ifindex == IFINDEX_INTERNAL) + continue; - fallthrough; - case NEXTHOP_TYPE_IPV4_IFINDEX: - nhaddr = nh->gate.ipv4; - break; - case NEXTHOP_TYPE_IPV6: - case NEXTHOP_TYPE_IPV6_IFINDEX: - continue; -#else - case NEXTHOP_TYPE_IPV6: - if (nh->ifindex == IFINDEX_INTERNAL) + fallthrough; + case NEXTHOP_TYPE_IPV4_IFINDEX: + nhaddr = nh->gate.ipv4; + break; + + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: continue; +#else + case NEXTHOP_TYPE_IPV6: + if (nh->ifindex == IFINDEX_INTERNAL) + continue; - fallthrough; - case NEXTHOP_TYPE_IPV6_IFINDEX: - nhaddr = nh->gate.ipv6; - break; - case NEXTHOP_TYPE_IPV4: - case NEXTHOP_TYPE_IPV4_IFINDEX: - continue; + fallthrough; + case NEXTHOP_TYPE_IPV6_IFINDEX: + nhaddr = nh->gate.ipv6; + break; + + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + continue; #endif - case NEXTHOP_TYPE_IFINDEX: - nhaddr = bsr_addr; - break; + case NEXTHOP_TYPE_IFINDEX: + nhaddr = bsr_addr; + break; - case NEXTHOP_TYPE_BLACKHOLE: - continue; - } + case NEXTHOP_TYPE_BLACKHOLE: + continue; + } - ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id); - if (!ifp || !ifp->info) - continue; + ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id); + if (!ifp || !ifp->info) + continue; - if (if_is_loopback(ifp) && if_is_loopback(src_ifp)) - return true; + if (if_is_loopback(ifp) && if_is_loopback(src_ifp)) + return true; - /* MRIB (IGP) may be pointing at a router where PIM is down */ - nbr = pim_neighbor_find(ifp, nhaddr, true); - if (!nbr) - continue; + /* MRIB (IGP) may be pointing at a router where PIM is down */ + nbr = pim_neighbor_find(ifp, nhaddr, true); + if (!nbr) + continue; - /* Are we on the correct interface? */ - if (nh->ifindex == src_ifp->ifindex) { - /* Do we have the correct NH ? */ - if (!pim_addr_cmp(nhaddr, src_ip)) - return true; - /* - * check If the packet came from the neighbor, - * and the dst is a secondary address on the connected interface - */ - return (!pim_addr_cmp(nbr->source_addr, src_ip) && - pim_if_connected_to_source(ifp, nhaddr)); + /* Are we on the correct interface? */ + if (nh->ifindex == src_ifp->ifindex) { + /* Do we have the correct NH ? */ + if (!pim_addr_cmp(nhaddr, src_ip)) + return true; + /* + * check If the packet came from the neighbor, + * and the dst is a secondary address on the connected interface + */ + return (!pim_addr_cmp(nbr->source_addr, src_ip) && + pim_if_connected_to_source(ifp, nhaddr)); + } + return false; } - return false; } return false; } -void pim_rp_nexthop_del(struct rp_info *rp_info) +void pim_nht_rp_del(struct rp_info *rp_info) { rp_info->rp.source_nexthop.interface = NULL; rp_info->rp.source_nexthop.mrib_nexthop_addr = PIMADDR_ANY; rp_info->rp.source_nexthop.mrib_metric_preference = router->infinite_assert_metric.metric_preference; - rp_info->rp.source_nexthop.mrib_route_metric = - router->infinite_assert_metric.route_metric; + rp_info->rp.source_nexthop.mrib_route_metric = router->infinite_assert_metric.route_metric; } /* Update RP nexthop info based on Nexthop update received from Zebra.*/ @@ -461,10 +610,9 @@ static void pim_update_rp_nh(struct pim_instance *pim, ifp = rp_info->rp.source_nexthop.interface; // Compute PIM RPF using cached nexthop - if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop, - rp_info->rp.rpf_addr, - &rp_info->group, 1)) - pim_rp_nexthop_del(rp_info); + if (!pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, rp_info->rp.rpf_addr, + &rp_info->group, true)) + pim_nht_rp_del(rp_info); /* * If we transition from no path to a path @@ -544,33 +692,43 @@ static int pim_upstream_nh_if_update_helper(struct hash_bucket *bucket, struct pim_instance *pim = pwd->pim; struct interface *ifp = pwd->ifp; struct nexthop *nh_node = NULL; - ifindex_t first_ifindex; - for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next) { - first_ifindex = nh_node->ifindex; - if (ifp != if_lookup_by_index(first_ifindex, pim->vrf->vrf_id)) - continue; + /* This update happens when an interface is added to/removed from pim. + * So go through both MRIB and URIB and update any upstreams for any + * matching nexthop + */ + for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) { + if (ifp->ifindex == nh_node->ifindex) { + if (pnc->upstream_hash->count) { + pim_update_upstream_nh(pim, pnc); + break; + } + } + } - if (pnc->upstream_hash->count) { - pim_update_upstream_nh(pim, pnc); - break; + for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) { + if (ifp->ifindex == nh_node->ifindex) { + if (pnc->upstream_hash->count) { + pim_update_upstream_nh(pim, pnc); + break; + } } } return HASHWALK_CONTINUE; } -void pim_upstream_nh_if_update(struct pim_instance *pim, struct interface *ifp) +void pim_nht_upstream_if_update(struct pim_instance *pim, struct interface *ifp) { struct pnc_hash_walk_data pwd; pwd.pim = pim; pwd.ifp = ifp; - hash_walk(pim->rpf_hash, pim_upstream_nh_if_update_helper, &pwd); + hash_walk(pim->nht_hash, pim_upstream_nh_if_update_helper, &pwd); } -uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp) +static uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp) { uint32_t hash_val; @@ -583,47 +741,42 @@ uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp) return hash_val; } -static int pim_ecmp_nexthop_search(struct pim_instance *pim, - struct pim_nexthop_cache *pnc, - struct pim_nexthop *nexthop, pim_addr src, - struct prefix *grp, int neighbor_needed) +static bool pim_ecmp_nexthop_search(struct pim_instance *pim, struct pim_nexthop_cache *pnc, + struct pim_nexthop *nexthop, pim_addr src, struct prefix *grp, + bool neighbor_needed) { - struct pim_neighbor *nbrs[router->multipath], *nbr = NULL; - struct interface *ifps[router->multipath]; struct nexthop *nh_node = NULL; - ifindex_t first_ifindex; - struct interface *ifp = NULL; - uint32_t hash_val = 0, mod_val = 0; - uint16_t nh_iter = 0, found = 0; - uint32_t i, num_nbrs = 0; - struct pim_interface *pim_ifp; - - if (!pnc || !pnc->nexthop_num || !nexthop) - return 0; - - pim_addr nh_addr = nexthop->mrib_nexthop_addr; - pim_addr grp_addr = pim_addr_from_prefix(grp); + uint32_t hash_val = 0; + uint32_t mod_val = 0; + uint16_t nh_iter = 0; + bool found = false; + uint32_t num_nbrs = 0; + pim_addr nh_addr; + pim_addr grp_addr; + struct pim_nexthop_cache_rib *rib; - memset(&nbrs, 0, sizeof(nbrs)); - memset(&ifps, 0, sizeof(ifps)); + /* Early return if required parameters aren't provided */ + if (!pim || !pnc || !pim_nht_pnc_is_valid(pim, pnc) || !nexthop || !grp) + return false; + nh_addr = nexthop->mrib_nexthop_addr; + grp_addr = pim_addr_from_prefix(grp); + rib = pim_pnc_get_rib(pim, pnc); - // Current Nexthop is VALID, check to stay on the current path. + /* Current Nexthop is VALID, check to stay on the current path. */ if (nexthop->interface && nexthop->interface->info && (!pim_addr_is_any(nh_addr))) { - /* User configured knob to explicitly switch - to new path is disabled or current path - metric is less than nexthop update. + /* User configured knob to explicitly switch to new path is disabled or + * current path metric is less than nexthop update. */ + if (!pim->ecmp_rebalance_enable) { + bool curr_route_valid = false; - if (pim->ecmp_rebalance_enable == 0) { - uint8_t curr_route_valid = 0; - // Check if current nexthop is present in new updated - // Nexthop list. - // If the current nexthop is not valid, candidate to - // choose new Nexthop. - for (nh_node = pnc->nexthop; nh_node; - nh_node = nh_node->next) { + /* Check if current nexthop is present in new updated Nexthop list. + * If the current nexthop is not valid, candidate to choose new + * Nexthop. + */ + for (nh_node = rib->nexthop; nh_node; nh_node = nh_node->next) { curr_route_valid = (nexthop->interface->ifindex == nh_node->ifindex); if (curr_route_valid) @@ -633,9 +786,9 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, if (curr_route_valid && !pim_if_connected_to_source(nexthop->interface, src)) { - nbr = pim_neighbor_find( - nexthop->interface, - nexthop->mrib_nexthop_addr, true); + struct pim_neighbor *nbr = + pim_neighbor_find(nexthop->interface, + nexthop->mrib_nexthop_addr, true); if (!nbr && !if_is_loopback(nexthop->interface)) { if (PIM_DEBUG_PIM_NHT) @@ -646,10 +799,8 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, /* update metric even if the upstream * neighbor stays unchanged */ - nexthop->mrib_metric_preference = - pnc->distance; - nexthop->mrib_route_metric = - pnc->metric; + nexthop->mrib_metric_preference = rib->distance; + nexthop->mrib_route_metric = rib->metric; if (PIM_DEBUG_PIM_NHT) zlog_debug( "%s: (%pPA,%pPA)(%s) current nexthop %s is valid, skipping new path selection", @@ -657,40 +808,39 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, &grp_addr, pim->vrf->name, nexthop->interface->name); - return 1; + return true; } } } } - /* - * Look up all interfaces and neighbors, - * store for later usage - */ - for (nh_node = pnc->nexthop, i = 0; nh_node; - nh_node = nh_node->next, i++) { - ifps[i] = - if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); - if (ifps[i]) { + /* Count the number of neighbors for ECMP */ + for (nh_node = rib->nexthop; nh_node; nh_node = nh_node->next) { + struct pim_neighbor *nbr; + struct interface *ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); + + if (!ifp) + continue; + #if PIM_IPV == 4 - pim_addr nhaddr = nh_node->gate.ipv4; + pim_addr nhaddr = nh_node->gate.ipv4; #else - pim_addr nhaddr = nh_node->gate.ipv6; + pim_addr nhaddr = nh_node->gate.ipv6; #endif - nbrs[i] = pim_neighbor_find(ifps[i], nhaddr, true); - if (nbrs[i] || pim_if_connected_to_source(ifps[i], src)) - num_nbrs++; - } + nbr = pim_neighbor_find(ifp, nhaddr, true); + if (nbr || pim_if_connected_to_source(ifp, src)) + num_nbrs++; } + if (pim->ecmp_enable) { struct prefix src_pfx; - uint32_t consider = pnc->nexthop_num; + uint32_t consider = rib->nexthop_num; if (neighbor_needed && num_nbrs < consider) consider = num_nbrs; if (consider == 0) - return 0; + return false; // PIM ECMP flag is enable then choose ECMP path. pim_addr_to_prefix(&src_pfx, src); @@ -698,16 +848,16 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, mod_val = hash_val % consider; } - for (nh_node = pnc->nexthop; nh_node && (found == 0); - nh_node = nh_node->next) { - first_ifindex = nh_node->ifindex; - ifp = ifps[nh_iter]; + for (nh_node = rib->nexthop; nh_node && !found; nh_node = nh_node->next) { + struct pim_neighbor *nbr = NULL; + struct pim_interface *pim_ifp; + struct interface *ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); + if (!ifp) { if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s %s: could not find interface for ifindex %d (address %pPA(%s))", - __FILE__, __func__, first_ifindex, &src, - pim->vrf->name); + zlog_debug("%s %s: could not find interface for ifindex %d (address %pPA(%s))", + __FILE__, __func__, nh_node->ifindex, &src, + pim->vrf->name); if (nh_iter == mod_val) mod_val++; // Select nexthpath nh_iter++; @@ -718,10 +868,9 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, if (!pim_ifp || !pim_ifp->pim_enable) { if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", - __func__, ifp->name, pim->vrf->name, - first_ifindex, &src); + zlog_debug("%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", + __func__, ifp->name, pim->vrf->name, nh_node->ifindex, + &src); if (nh_iter == mod_val) mod_val++; // Select nexthpath nh_iter++; @@ -729,7 +878,12 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, } if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) { - nbr = nbrs[nh_iter]; +#if PIM_IPV == 4 + nbr = pim_neighbor_find(ifp, nh_node->gate.ipv4, true); +#else + nbr = pim_neighbor_find(ifp, nh_node->gate.ipv6, true); +#endif + if (!nbr && !if_is_loopback(ifp)) { if (PIM_DEBUG_PIM_NHT) zlog_debug( @@ -750,12 +904,12 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, #else nexthop->mrib_nexthop_addr = nh_node->gate.ipv6; #endif - nexthop->mrib_metric_preference = pnc->distance; - nexthop->mrib_route_metric = pnc->metric; + nexthop->mrib_metric_preference = rib->distance; + nexthop->mrib_route_metric = rib->metric; nexthop->last_lookup = src; nexthop->last_lookup_time = pim_time_monotonic_usec(); nexthop->nbr = nbr; - found = 1; + found = true; if (PIM_DEBUG_PIM_NHT) zlog_debug( "%s: (%pPA,%pPA)(%s) selected nhop interface %s addr %pPAs mod_val %u iter %d ecmp %d", @@ -766,260 +920,55 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim, nh_iter++; } - if (found) - return 1; - else - return 0; + return found; } -/* This API is used to parse Registered address nexthop update coming from Zebra - */ -void pim_nexthop_update(struct vrf *vrf, struct prefix *match, - struct zapi_route *nhr) -{ - struct nexthop *nexthop; - struct nexthop *nhlist_head = NULL; - struct nexthop *nhlist_tail = NULL; - int i; - struct pim_rpf rpf; - struct pim_nexthop_cache *pnc = NULL; - struct interface *ifp = NULL; - struct pim_instance *pim; - - pim = vrf->info; - - rpf.rpf_addr = pim_addr_from_prefix(match); - pnc = pim_nexthop_cache_find(pim, &rpf); - if (!pnc) { - if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: Skipping NHT update, addr %pPA is not in local cached DB.", - __func__, &rpf.rpf_addr); - return; - } - - pnc->last_update = pim_time_monotonic_usec(); - - if (nhr->nexthop_num) { - pnc->nexthop_num = 0; - - for (i = 0; i < nhr->nexthop_num; i++) { - nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]); - switch (nexthop->type) { - case NEXTHOP_TYPE_IFINDEX: - /* - * Connected route (i.e. no nexthop), use - * RPF address from nexthop cache (i.e. - * destination) as PIM nexthop. - */ -#if PIM_IPV == 4 - nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX; - nexthop->gate.ipv4 = pnc->rpf.rpf_addr; -#else - nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX; - nexthop->gate.ipv6 = pnc->rpf.rpf_addr; -#endif - break; -#if PIM_IPV == 4 - /* RFC5549 IPv4-over-IPv6 nexthop handling: - * if we get an IPv6 nexthop in IPv4 PIM, hunt down a - * PIM neighbor and use that instead. - */ - case NEXTHOP_TYPE_IPV6_IFINDEX: { - struct interface *ifp1 = NULL; - struct pim_neighbor *nbr = NULL; - - ifp1 = if_lookup_by_index(nexthop->ifindex, - pim->vrf->vrf_id); - - if (!ifp1) - nbr = NULL; - else - /* FIXME: should really use nbr's - * secondary address list here - */ - nbr = pim_neighbor_find_if(ifp1); - - /* Overwrite with Nbr address as NH addr */ - if (nbr) - nexthop->gate.ipv4 = nbr->source_addr; - else - // Mark nexthop address to 0 until PIM - // Nbr is resolved. - nexthop->gate.ipv4 = PIMADDR_ANY; - - break; - } -#else - case NEXTHOP_TYPE_IPV6_IFINDEX: -#endif - case NEXTHOP_TYPE_IPV6: - case NEXTHOP_TYPE_IPV4: - case NEXTHOP_TYPE_IPV4_IFINDEX: - case NEXTHOP_TYPE_BLACKHOLE: - /* nothing to do for the other nexthop types */ - break; - } - - ifp = if_lookup_by_index(nexthop->ifindex, - pim->vrf->vrf_id); - if (!ifp) { - if (PIM_DEBUG_PIM_NHT) { - char buf[NEXTHOP_STRLEN]; - zlog_debug( - "%s: could not find interface for ifindex %d(%s) (addr %s)", - __func__, nexthop->ifindex, - pim->vrf->name, - nexthop2str(nexthop, buf, - sizeof(buf))); - } - nexthop_free(nexthop); - continue; - } - - if (PIM_DEBUG_PIM_NHT) { -#if PIM_IPV == 4 - pim_addr nhaddr = nexthop->gate.ipv4; -#else - pim_addr nhaddr = nexthop->gate.ipv6; -#endif - zlog_debug("%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ", - __func__, match, pim->vrf->name, - i + 1, &nhaddr, ifp->name, - nexthop->type, nhr->distance, - nhr->metric); - } - - if (!ifp->info) { - /* - * Though Multicast is not enabled on this - * Interface store it in database otheriwse we - * may miss this update and this will not cause - * any issue, because while choosing the path we - * are ommitting the Interfaces which are not - * multicast enabled - */ - if (PIM_DEBUG_PIM_NHT) { - char buf[NEXTHOP_STRLEN]; - - zlog_debug( - "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)", - __func__, ifp->name, - pim->vrf->name, - nexthop->ifindex, - nexthop2str(nexthop, buf, - sizeof(buf))); - } - } - - if (nhlist_tail) { - nhlist_tail->next = nexthop; - nhlist_tail = nexthop; - } else { - nhlist_tail = nexthop; - nhlist_head = nexthop; - } - - // Keep track of all nexthops, even PIM-disabled ones. - pnc->nexthop_num++; - } - /* Reset existing pnc->nexthop before assigning new list */ - nexthops_free(pnc->nexthop); - pnc->nexthop = nhlist_head; - if (pnc->nexthop_num) { - pnc->flags |= PIM_NEXTHOP_VALID; - pnc->distance = nhr->distance; - pnc->metric = nhr->metric; - } - } else { - pnc->flags &= ~PIM_NEXTHOP_VALID; - pnc->nexthop_num = nhr->nexthop_num; - nexthops_free(pnc->nexthop); - pnc->nexthop = NULL; - } - SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED); - - if (PIM_DEBUG_PIM_NHT) - zlog_debug("%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d", - __func__, match, pim->vrf->name, nhr->nexthop_num, - pnc->nexthop_num, vrf->vrf_id, - pnc->upstream_hash->count, listcount(pnc->rp_list)); - - pim_rpf_set_refresh_time(pim); - - if (listcount(pnc->rp_list)) - pim_update_rp_nh(pim, pnc); - if (pnc->upstream_hash->count) - pim_update_upstream_nh(pim, pnc); - - if (pnc->candrp_count) - pim_crp_nht_update(pim, pnc); -} - -int pim_ecmp_nexthop_lookup(struct pim_instance *pim, - struct pim_nexthop *nexthop, pim_addr src, - struct prefix *grp, int neighbor_needed) +bool pim_nht_lookup_ecmp(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr src, + struct prefix *grp, bool neighbor_needed) { struct pim_nexthop_cache *pnc; struct pim_zlookup_nexthop nexthop_tab[router->multipath]; - struct pim_neighbor *nbrs[router->multipath], *nbr = NULL; - struct pim_rpf rpf; int num_ifindex; - struct interface *ifps[router->multipath], *ifp; - int first_ifindex; - int found = 0; + bool found = false; uint16_t i = 0; - uint32_t hash_val = 0, mod_val = 0; + uint32_t hash_val = 0; + uint32_t mod_val = 0; uint32_t num_nbrs = 0; - struct pim_interface *pim_ifp; if (PIM_DEBUG_PIM_NHT_DETAIL) - zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld", - __func__, &src, pim->vrf->name, - nexthop->last_lookup_time); - - rpf.rpf_addr = src; + zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld", __func__, &src, + pim->vrf->name, nexthop->last_lookup_time); - pnc = pim_nexthop_cache_find(pim, &rpf); + pnc = pim_nexthop_cache_find(pim, src); if (pnc) { - if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) - return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp, - neighbor_needed); + if (pim_nht_pnc_has_answer(pim, pnc)) + return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp, neighbor_needed); } - memset(nexthop_tab, 0, - sizeof(struct pim_zlookup_nexthop) * router->multipath); - num_ifindex = - zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src, - PIM_NEXTHOP_LOOKUP_MAX); + memset(nexthop_tab, 0, sizeof(struct pim_zlookup_nexthop) * router->multipath); + num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src, + PIM_NEXTHOP_LOOKUP_MAX); if (num_ifindex < 1) { if (PIM_DEBUG_PIM_NHT) - zlog_warn( - "%s: could not find nexthop ifindex for address %pPA(%s)", - __func__, &src, pim->vrf->name); - return 0; + zlog_warn("%s: could not find nexthop ifindex for address %pPA(%s)", + __func__, &src, pim->vrf->name); + return false; } - memset(&nbrs, 0, sizeof(nbrs)); - memset(&ifps, 0, sizeof(ifps)); - - /* - * Look up all interfaces and neighbors, - * store for later usage - */ + /* Count the number of neighbors for ECMP computation */ for (i = 0; i < num_ifindex; i++) { - ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex, - pim->vrf->vrf_id); - if (ifps[i]) { - nbrs[i] = pim_neighbor_find( - ifps[i], nexthop_tab[i].nexthop_addr, true); - - if (nbrs[i] || pim_if_connected_to_source(ifps[i], src)) - num_nbrs++; - } + struct pim_neighbor *nbr; + struct interface *ifp = if_lookup_by_index(nexthop_tab[i].ifindex, pim->vrf->vrf_id); + + if (!ifp) + continue; + + nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true); + if (nbr || pim_if_connected_to_source(ifp, src)) + num_nbrs++; } - // If PIM ECMP enable then choose ECMP path. + /* If PIM ECMP enable then choose ECMP path. */ if (pim->ecmp_enable) { struct prefix src_pfx; uint32_t consider = num_ifindex; @@ -1028,30 +977,27 @@ int pim_ecmp_nexthop_lookup(struct pim_instance *pim, consider = num_nbrs; if (consider == 0) - return 0; + return false; pim_addr_to_prefix(&src_pfx, src); hash_val = pim_compute_ecmp_hash(&src_pfx, grp); mod_val = hash_val % consider; if (PIM_DEBUG_PIM_NHT_DETAIL) - zlog_debug("%s: hash_val %u mod_val %u", __func__, - hash_val, mod_val); + zlog_debug("%s: hash_val %u mod_val %u", __func__, hash_val, mod_val); } - i = 0; - while (!found && (i < num_ifindex)) { - first_ifindex = nexthop_tab[i].ifindex; + for (i = 0; i < num_ifindex && !found; i++) { + struct pim_neighbor *nbr = NULL; + struct pim_interface *pim_ifp; + struct interface *ifp = if_lookup_by_index(nexthop_tab[i].ifindex, pim->vrf->vrf_id); - ifp = ifps[i]; if (!ifp) { if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s %s: could not find interface for ifindex %d (address %pPA(%s))", - __FILE__, __func__, first_ifindex, &src, - pim->vrf->name); + zlog_debug("%s %s: could not find interface for ifindex %d (address %pPA(%s))", + __FILE__, __func__, nexthop_tab[i].ifindex, &src, + pim->vrf->name); if (i == mod_val) mod_val++; - i++; continue; } @@ -1059,99 +1005,431 @@ int pim_ecmp_nexthop_lookup(struct pim_instance *pim, if (!pim_ifp || !pim_ifp->pim_enable) { if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", - __func__, ifp->name, pim->vrf->name, - first_ifindex, &src); + zlog_debug("%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", + __func__, ifp->name, pim->vrf->name, + nexthop_tab[i].ifindex, &src); if (i == mod_val) mod_val++; - i++; continue; } + if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) { - nbr = nbrs[i]; + nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true); if (PIM_DEBUG_PIM_NHT_DETAIL) - zlog_debug("ifp name: %s(%s), pim nbr: %p", - ifp->name, pim->vrf->name, nbr); + zlog_debug("ifp name: %s(%s), pim nbr: %p", ifp->name, + pim->vrf->name, nbr); if (!nbr && !if_is_loopback(ifp)) { + if (PIM_DEBUG_PIM_NHT) + zlog_debug("%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)", + __func__, &nexthop_tab[i].nexthop_addr, + ifp->name, pim->vrf->name, &src); if (i == mod_val) mod_val++; - if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)", - __func__, - &nexthop_tab[i].nexthop_addr, - ifp->name, pim->vrf->name, - &src); - i++; continue; } } if (i == mod_val) { if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d", - __func__, &nexthop_tab[i].nexthop_addr, - &src, ifp->name, pim->vrf->name, - nexthop_tab[i].route_metric, - nexthop_tab[i].protocol_distance); + zlog_debug("%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d", + __func__, &nexthop_tab[i].nexthop_addr, &src, ifp->name, + pim->vrf->name, nexthop_tab[i].route_metric, + nexthop_tab[i].protocol_distance); /* update nexthop data */ nexthop->interface = ifp; - nexthop->mrib_nexthop_addr = - nexthop_tab[i].nexthop_addr; - nexthop->mrib_metric_preference = - nexthop_tab[i].protocol_distance; - nexthop->mrib_route_metric = - nexthop_tab[i].route_metric; + nexthop->mrib_nexthop_addr = nexthop_tab[i].nexthop_addr; + nexthop->mrib_metric_preference = nexthop_tab[i].protocol_distance; + nexthop->mrib_route_metric = nexthop_tab[i].route_metric; nexthop->last_lookup = src; nexthop->last_lookup_time = pim_time_monotonic_usec(); nexthop->nbr = nbr; - found = 1; + found = true; } - i++; } - if (found) - return 1; - else - return 0; + return found; } -int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim, pim_addr src, - struct prefix *grp) +bool pim_nht_lookup(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr addr, + int neighbor_needed) +{ + struct pim_zlookup_nexthop nexthop_tab[router->multipath]; + struct pim_neighbor *nbr = NULL; + int num_ifindex; + struct interface *ifp = NULL; + ifindex_t first_ifindex = 0; + bool found = false; + int i = 0; + struct pim_interface *pim_ifp; + +#if PIM_IPV == 4 + /* + * We should not attempt to lookup a + * 255.255.255.255 address, since + * it will never work + */ + if (pim_addr_is_any(addr)) + return false; +#endif + + if ((!pim_addr_cmp(nexthop->last_lookup, addr)) && + (nexthop->last_lookup_time > pim->last_route_change_time)) { + if (PIM_DEBUG_PIM_NHT) + zlog_debug("%s: Using last lookup for %pPAs at %lld, %" PRId64 " addr %pPAs", + __func__, &addr, nexthop->last_lookup_time, + pim->last_route_change_time, &nexthop->mrib_nexthop_addr); + pim->nexthop_lookups_avoided++; + return true; + } + + if (PIM_DEBUG_PIM_NHT) + zlog_debug("%s: Looking up: %pPAs, last lookup time: %lld, %" PRId64, __func__, + &addr, nexthop->last_lookup_time, pim->last_route_change_time); + + memset(nexthop_tab, 0, sizeof(struct pim_zlookup_nexthop) * router->multipath); + num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, addr, + PIM_NEXTHOP_LOOKUP_MAX); + if (num_ifindex < 1) { + if (PIM_DEBUG_PIM_NHT) + zlog_debug("%s: could not find nexthop ifindex for address %pPAs", __func__, + &addr); + return false; + } + + while (!found && (i < num_ifindex)) { + first_ifindex = nexthop_tab[i].ifindex; + + ifp = if_lookup_by_index(first_ifindex, pim->vrf->vrf_id); + if (!ifp) { + if (PIM_DEBUG_ZEBRA) + zlog_debug("%s: could not find interface for ifindex %d (address %pPAs)", + __func__, first_ifindex, &addr); + i++; + continue; + } + + pim_ifp = ifp->info; + if (!pim_ifp || !pim_ifp->pim_enable) { + if (PIM_DEBUG_ZEBRA) + zlog_debug("%s: pim not enabled on input interface %s (ifindex=%d, RPF for source %pPAs)", + __func__, ifp->name, first_ifindex, &addr); + i++; + } else if (neighbor_needed && !pim_if_connected_to_source(ifp, addr)) { + nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true); + if (PIM_DEBUG_PIM_TRACE_DETAIL) + zlog_debug("ifp name: %s, pim nbr: %p", ifp->name, nbr); + if (!nbr && !if_is_loopback(ifp)) + i++; + else + found = true; + } else + found = true; + } + + if (found) { + if (PIM_DEBUG_ZEBRA) + zlog_debug("%s: found nexthop %pPAs for address %pPAs: interface %s ifindex=%d metric=%d pref=%d", + __func__, &nexthop_tab[i].nexthop_addr, &addr, ifp->name, + first_ifindex, nexthop_tab[i].route_metric, + nexthop_tab[i].protocol_distance); + + /* update nexthop data */ + nexthop->interface = ifp; + nexthop->mrib_nexthop_addr = nexthop_tab[i].nexthop_addr; + nexthop->mrib_metric_preference = nexthop_tab[i].protocol_distance; + nexthop->mrib_route_metric = nexthop_tab[i].route_metric; + nexthop->last_lookup = addr; + nexthop->last_lookup_time = pim_time_monotonic_usec(); + nexthop->nbr = nbr; + return true; + } else + return false; +} + +int pim_nht_lookup_ecmp_if_vif_index(struct pim_instance *pim, pim_addr src, struct prefix *grp) { struct pim_nexthop nhop; int vif_index; ifindex_t ifindex; memset(&nhop, 0, sizeof(nhop)); - if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) { + if (!pim_nht_lookup_ecmp(pim, &nhop, src, grp, true)) { if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: could not find nexthop ifindex for address %pPA(%s)", - __func__, &src, pim->vrf->name); + zlog_debug("%s: could not find nexthop ifindex for address %pPA(%s)", + __func__, &src, pim->vrf->name); return -1; } ifindex = nhop.interface->ifindex; if (PIM_DEBUG_PIM_NHT) - zlog_debug( - "%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA", - __func__, ifindex, - ifindex2ifname(ifindex, pim->vrf->vrf_id), - pim->vrf->name, &src); + zlog_debug("%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA", + __func__, ifindex, ifindex2ifname(ifindex, pim->vrf->vrf_id), + pim->vrf->name, &src); vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex); if (vif_index < 0) { if (PIM_DEBUG_PIM_NHT) { - zlog_debug( - "%s: low vif_index=%d(%s) < 1 nexthop for address %pPA", - __func__, vif_index, pim->vrf->name, &src); + zlog_debug("%s: low vif_index=%d(%s) < 1 nexthop for address %pPA", + __func__, vif_index, pim->vrf->name, &src); } return -2; } return vif_index; } + +/* This API is used to parse Registered address nexthop update coming from Zebra + */ +void pim_nexthop_update(struct vrf *vrf, struct prefix *match, struct zapi_route *nhr) +{ + struct nexthop *nhlist_head = NULL; + struct nexthop *nhlist_tail = NULL; + struct pim_nexthop_cache *pnc = NULL; + struct pim_nexthop_cache_rib *pnc_rib = NULL; + struct interface *ifp = NULL; + struct pim_instance *pim; + pim_addr addr; + + pim = vrf->info; + addr = pim_addr_from_prefix(match); + pnc = pim_nexthop_cache_find(pim, addr); + if (!pnc) { + if (PIM_DEBUG_PIM_NHT) + zlog_debug("%s: Skipping NHT update, addr %pPA is not in local cached DB.", + __func__, &addr); + return; + } + + if (nhr->safi == SAFI_UNICAST) + pnc_rib = &pnc->urib; + else if (nhr->safi == SAFI_MULTICAST) + pnc_rib = &pnc->mrib; + else + return; + + pnc_rib->last_update = pim_time_monotonic_usec(); + SET_FLAG(pnc_rib->flags, PIM_NEXTHOP_ANSWER_RECEIVED); + UNSET_FLAG(pnc_rib->flags, PIM_NEXTHOP_VALID); + pnc_rib->nexthop_num = 0; + /* Free the existing nexthop list, resets with any valid nexthops from the update */ + nexthops_free(pnc_rib->nexthop); + pnc_rib->nexthop = NULL; + + for (int i = 0; i < nhr->nexthop_num; i++) { + struct nexthop *nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]); + + switch (nexthop->type) { + case NEXTHOP_TYPE_IFINDEX: + /* + * Connected route (i.e. no nexthop), use + * RPF address from nexthop cache (i.e. + * destination) as PIM nexthop. + */ +#if PIM_IPV == 4 + nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX; + nexthop->gate.ipv4 = pnc->addr; +#else + nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX; + nexthop->gate.ipv6 = pnc->addr; +#endif + break; + +#if PIM_IPV == 4 + /* RFC5549 IPv4-over-IPv6 nexthop handling: + * if we get an IPv6 nexthop in IPv4 PIM, hunt down a + * PIM neighbor and use that instead. + */ + case NEXTHOP_TYPE_IPV6_IFINDEX: { + struct pim_neighbor *nbr = NULL; + struct interface *ifp1 = if_lookup_by_index(nexthop->ifindex, + pim->vrf->vrf_id); + + if (ifp1) + /* FIXME: should really use nbr's + * secondary address list here + */ + nbr = pim_neighbor_find_if(ifp1); + + /* Overwrite with Nbr address as NH addr */ + if (nbr) + nexthop->gate.ipv4 = nbr->source_addr; + else + /* Mark nexthop address to 0 until PIM Nbr is resolved. */ + nexthop->gate.ipv4 = PIMADDR_ANY; + + break; + } +#else + case NEXTHOP_TYPE_IPV6_IFINDEX: +#endif + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + case NEXTHOP_TYPE_BLACKHOLE: + /* nothing to do for the other nexthop types */ + break; + } + + ifp = if_lookup_by_index(nexthop->ifindex, pim->vrf->vrf_id); + if (!ifp) { + if (PIM_DEBUG_PIM_NHT) { + char buf[NEXTHOP_STRLEN]; + zlog_debug("%s: could not find interface for ifindex %d(%s) (addr %s)", + __func__, nexthop->ifindex, pim->vrf->name, + nexthop2str(nexthop, buf, sizeof(buf))); + } + nexthop_free(nexthop); + continue; + } + + if (PIM_DEBUG_PIM_NHT) { +#if PIM_IPV == 4 + pim_addr nhaddr = nexthop->gate.ipv4; +#else + pim_addr nhaddr = nexthop->gate.ipv6; +#endif + zlog_debug("%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ", + __func__, match, pim->vrf->name, i + 1, &nhaddr, ifp->name, + nexthop->type, nhr->distance, nhr->metric); + } + + if (!ifp->info) { + /* + * Though Multicast is not enabled on this + * Interface store it in database otheriwse we + * may miss this update and this will not cause + * any issue, because while choosing the path we + * are ommitting the Interfaces which are not + * multicast enabled + */ + if (PIM_DEBUG_PIM_NHT) { + char buf[NEXTHOP_STRLEN]; + + zlog_debug("%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)", + __func__, ifp->name, pim->vrf->name, nexthop->ifindex, + nexthop2str(nexthop, buf, sizeof(buf))); + } + } + + if (nhlist_tail) { + nhlist_tail->next = nexthop; + nhlist_tail = nexthop; + } else { + nhlist_tail = nexthop; + nhlist_head = nexthop; + } + + /* Keep track of all nexthops, even PIM-disabled ones. */ + pnc_rib->nexthop_num++; + } /* End for nexthops */ + + /* Assign the list if there are nexthops */ + if (pnc_rib->nexthop_num) { + SET_FLAG(pnc_rib->flags, PIM_NEXTHOP_VALID); + pnc_rib->nexthop = nhlist_head; + pnc_rib->distance = nhr->distance; + pnc_rib->metric = nhr->metric; + pnc_rib->prefix_len = nhr->prefix.prefixlen; + } + + if (PIM_DEBUG_PIM_NHT) + zlog_debug("%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d", + __func__, match, pim->vrf->name, nhr->nexthop_num, pnc_rib->nexthop_num, + vrf->vrf_id, pnc->upstream_hash->count, listcount(pnc->rp_list)); + + pim_rpf_set_refresh_time(pim); + + if (listcount(pnc->rp_list)) + pim_update_rp_nh(pim, pnc); + if (pnc->upstream_hash->count) + pim_update_upstream_nh(pim, pnc); + + if (pnc->candrp_count) + pim_crp_nht_update(pim, pnc); +} + +static int pim_nht_hash_mode_update_helper(struct hash_bucket *bucket, void *arg) +{ + struct pim_nexthop_cache *pnc = bucket->data; + struct pnc_hash_walk_data *pwd = arg; + struct pim_instance *pim = pwd->pim; + + if (listcount(pnc->rp_list)) + pim_update_rp_nh(pim, pnc); + + if (pnc->upstream_hash->count) + pim_update_upstream_nh(pim, pnc); + + if (pnc->candrp_count) + pim_crp_nht_update(pim, pnc); + + return HASHWALK_CONTINUE; +} + +void pim_nht_mode_changed(struct pim_instance *pim) +{ + struct pnc_hash_walk_data pwd; + + /* Update the refresh time to force new lookups if needed */ + pim_rpf_set_refresh_time(pim); + + /* Force update the registered RP and upstreams for all cache entries */ + pwd.pim = pim; + hash_walk(pim->nht_hash, pim_nht_hash_mode_update_helper, &pwd); +} + +/* Cleanup pim->nht_hash each node data */ +static void pim_nht_hash_clean(void *data) +{ + struct pim_nexthop_cache *pnc = (struct pim_nexthop_cache *)data; + + list_delete(&pnc->rp_list); + hash_clean_and_free(&pnc->upstream_hash, NULL); + + if (pnc->mrib.nexthop) + nexthops_free(pnc->mrib.nexthop); + + if (pnc->urib.nexthop) + nexthops_free(pnc->urib.nexthop); + + XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc); +} + +static unsigned int pim_nht_hash_key(const void *arg) +{ + const struct pim_nexthop_cache *r = arg; + +#if PIM_IPV == 4 + return jhash_1word(r->addr.s_addr, 0); +#else + return jhash2(r->addr.s6_addr32, array_size(r->addr.s6_addr32), 0); +#endif +} + +static bool pim_nht_equal(const void *arg1, const void *arg2) +{ + const struct pim_nexthop_cache *r1 = arg1; + const struct pim_nexthop_cache *r2 = arg2; + + return (!pim_addr_cmp(r1->addr, r2->addr)); +} + +void pim_nht_init(struct pim_instance *pim) +{ + char hash_name[64]; + + snprintf(hash_name, sizeof(hash_name), "PIM %s NHT Hash", pim->vrf->name); + pim->nht_hash = hash_create_size(256, pim_nht_hash_key, pim_nht_equal, hash_name); + + pim->rpf_mode = MCAST_NO_CONFIG; + + if (PIM_DEBUG_ZEBRA) + zlog_debug("%s: NHT hash init: %s ", __func__, hash_name); +} + +void pim_nht_terminate(struct pim_instance *pim) +{ + /* Traverse and cleanup nht_hash */ + hash_clean_and_free(&pim->nht_hash, (void *)pim_nht_hash_clean); +} |
