summaryrefslogtreecommitdiff
path: root/pimd/pim_nht.c
diff options
context:
space:
mode:
authorRafael Zalamena <rzalamena@users.noreply.github.com>2024-12-16 09:57:31 -0300
committerGitHub <noreply@github.com>2024-12-16 09:57:31 -0300
commit3bebb7be9204a51f194317438aa883f1b10c8646 (patch)
tree6e4e508a35f6d03491661b25b645008d8ae7dd5a /pimd/pim_nht.c
parent30467f8f2b3a671973aa4b0aa2bcc1e3a208cc56 (diff)
parent06c3436a12226d1f7e18e549f562ba9ecde4b394 (diff)
Merge pull request #17252 from nabahr/mcast-mode
Fix PIMD RPF lookup mode and nexthop tracking
Diffstat (limited to 'pimd/pim_nht.c')
-rw-r--r--pimd/pim_nht.c1314
1 files changed, 796 insertions, 518 deletions
diff --git a/pimd/pim_nht.c b/pimd/pim_nht.c
index f2dbfa9765..00ab46b4cd 100644
--- a/pimd/pim_nht.c
+++ b/pimd/pim_nht.c
@@ -38,118 +38,267 @@
* pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
* command to Zebra.
*/
-void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient,
- struct pim_nexthop_cache *pnc, int command)
+static void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient, pim_addr addr,
+ int command)
{
struct prefix p;
int ret;
- pim_addr_to_prefix(&p, pnc->rpf.rpf_addr);
- ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false,
- pim->vrf->vrf_id);
+ pim_addr_to_prefix(&p, addr);
+
+ /* Register to track nexthops from the MRIB */
+ ret = zclient_send_rnh(zclient, command, &p, SAFI_MULTICAST, false, false, pim->vrf->vrf_id);
+ if (ret == ZCLIENT_SEND_FAILURE)
+ zlog_warn(
+ "sendmsg_nexthop: zclient_send_message() failed registering MRIB tracking");
+
+ if (PIM_DEBUG_PIM_NHT)
+ zlog_debug("%s: MRIB NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", __func__,
+ (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, pim->vrf->name,
+ ret);
+
+ /* Also register to track nexthops from the URIB */
+ ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false, pim->vrf->vrf_id);
if (ret == ZCLIENT_SEND_FAILURE)
- zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
+ zlog_warn(
+ "sendmsg_nexthop: zclient_send_message() failed registering URIB tracking");
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ",
- __func__,
- (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p,
- pim->vrf->name, ret);
+ zlog_debug("%s: URIB NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", __func__,
+ (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, pim->vrf->name,
+ ret);
return;
}
-struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim,
- struct pim_rpf *rpf)
+static struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim, pim_addr addr)
{
struct pim_nexthop_cache *pnc = NULL;
struct pim_nexthop_cache lookup;
- lookup.rpf.rpf_addr = rpf->rpf_addr;
- pnc = hash_lookup(pim->rpf_hash, &lookup);
+ lookup.addr = addr;
+ pnc = hash_lookup(pim->nht_hash, &lookup);
return pnc;
}
-static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim,
- struct pim_rpf *rpf_addr)
+static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim, pim_addr addr)
{
struct pim_nexthop_cache *pnc;
char hash_name[64];
- pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE,
- sizeof(struct pim_nexthop_cache));
- pnc->rpf.rpf_addr = rpf_addr->rpf_addr;
+ /* This function is only ever called if we are unable to find an entry, so
+ * the hash_get should always add a new entry
+ */
+ pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE, sizeof(struct pim_nexthop_cache));
+ pnc->addr = addr;
- pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern);
+ pnc = hash_get(pim->nht_hash, pnc, hash_alloc_intern);
pnc->rp_list = list_new();
pnc->rp_list->cmp = pim_rp_list_cmp;
- snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash",
- &pnc->rpf.rpf_addr, pim->vrf->name);
- pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key,
- pim_upstream_equal, hash_name);
+ snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash", &pnc->addr,
+ pim->vrf->name);
+ pnc->upstream_hash = hash_create_size(32, pim_upstream_hash_key, pim_upstream_equal,
+ hash_name);
return pnc;
}
-static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim,
- pim_addr addr)
+static bool pim_nht_pnc_has_answer(struct pim_instance *pim, struct pim_nexthop_cache *pnc)
+{
+ switch (pim->rpf_mode) {
+ case MCAST_MRIB_ONLY:
+ return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_ANSWER_RECEIVED);
+
+ case MCAST_URIB_ONLY:
+ return CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_ANSWER_RECEIVED);
+
+ case MCAST_MIX_MRIB_FIRST:
+ case MCAST_NO_CONFIG:
+ case MCAST_MIX_DISTANCE:
+ case MCAST_MIX_PFXLEN:
+ /* This check is to determine if we've received an answer necessary to make a NH decision.
+ * For the mixed modes, where we may lookup from MRIB or URIB, let's require an answer
+ * for both tables.
+ */
+ return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_ANSWER_RECEIVED) &&
+ CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_ANSWER_RECEIVED);
+
+ default:
+ break;
+ }
+ return false;
+}
+
+static struct pim_nexthop_cache_rib *pim_pnc_get_rib(struct pim_instance *pim,
+ struct pim_nexthop_cache *pnc)
+{
+ struct pim_nexthop_cache_rib *pnc_rib = NULL;
+
+ if (pim->rpf_mode == MCAST_MRIB_ONLY)
+ pnc_rib = &pnc->mrib;
+ else if (pim->rpf_mode == MCAST_URIB_ONLY)
+ pnc_rib = &pnc->urib;
+ else if (pim->rpf_mode == MCAST_MIX_MRIB_FIRST || pim->rpf_mode == MCAST_NO_CONFIG) {
+ if (pnc->mrib.nexthop_num > 0)
+ pnc_rib = &pnc->mrib;
+ else
+ pnc_rib = &pnc->urib;
+ } else if (pim->rpf_mode == MCAST_MIX_DISTANCE) {
+ if (pnc->mrib.distance <= pnc->urib.distance)
+ pnc_rib = &pnc->mrib;
+ else
+ pnc_rib = &pnc->urib;
+ } else if (pim->rpf_mode == MCAST_MIX_PFXLEN) {
+ if (pnc->mrib.prefix_len >= pnc->urib.prefix_len)
+ pnc_rib = &pnc->mrib;
+ else
+ pnc_rib = &pnc->urib;
+ }
+
+ return pnc_rib;
+}
+
+bool pim_nht_pnc_is_valid(struct pim_instance *pim, struct pim_nexthop_cache *pnc)
+{
+ switch (pim->rpf_mode) {
+ case MCAST_MRIB_ONLY:
+ return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_VALID);
+
+ case MCAST_URIB_ONLY:
+ return CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_VALID);
+
+ case MCAST_MIX_MRIB_FIRST:
+ case MCAST_NO_CONFIG:
+ case MCAST_MIX_DISTANCE:
+ case MCAST_MIX_PFXLEN:
+ /* The valid flag is set if there are nexthops...so when doing mixed, mrib might not have
+ * any nexthops, so consider valid if at least one RIB is valid
+ */
+ return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_VALID) ||
+ CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_VALID);
+
+ default:
+ break;
+ }
+ return false;
+}
+
+struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim, pim_addr addr)
{
struct pim_nexthop_cache *pnc = NULL;
- struct pim_rpf rpf;
struct zclient *zclient = NULL;
zclient = pim_zebra_zclient_get();
- memset(&rpf, 0, sizeof(rpf));
- rpf.rpf_addr = addr;
+ pnc = pim_nexthop_cache_find(pim, addr);
- pnc = pim_nexthop_cache_find(pim, &rpf);
- if (!pnc) {
- pnc = pim_nexthop_cache_add(pim, &rpf);
- pim_sendmsg_zebra_rnh(pim, zclient, pnc,
- ZEBRA_NEXTHOP_REGISTER);
- if (PIM_DEBUG_PIM_NHT_DETAIL)
- zlog_debug(
- "%s: NHT cache and zebra notification added for %pPA(%s)",
- __func__, &addr, pim->vrf->name);
- }
+ if (pnc)
+ return pnc;
+
+ pnc = pim_nexthop_cache_add(pim, addr);
+ pim_sendmsg_zebra_rnh(pim, zclient, pnc->addr, ZEBRA_NEXTHOP_REGISTER);
+
+ if (PIM_DEBUG_PIM_NHT_DETAIL)
+ zlog_debug("%s: NHT cache and zebra notification added for %pPA(%s)", __func__,
+ &addr, pim->vrf->name);
return pnc;
}
-/* TBD: this does several distinct things and should probably be split up.
- * (checking state vs. returning pnc vs. adding upstream vs. adding rp)
+void pim_nht_set_gateway(struct pim_instance *pim, struct pim_nexthop_cache *pnc, pim_addr addr,
+ struct interface *ifp)
+{
+ struct nexthop *nh_node = NULL;
+ struct interface *ifp1 = NULL;
+
+ for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) {
+ /* If the gateway is already set, then keep it */
+#if PIM_IPV == 4
+ if (!pim_addr_is_any(nh_node->gate.ipv4))
+ continue;
+#else
+ if (!pim_addr_is_any(nh_node->gate.ipv6))
+ continue;
+#endif
+
+ /* Only set gateway on the correct interface */
+ ifp1 = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
+ if (ifp != ifp1)
+ continue;
+
+ /* Update the gateway address with the given address */
+#if PIM_IPV == 4
+ nh_node->gate.ipv4 = addr;
+#else
+ nh_node->gate.ipv6 = addr;
+#endif
+ if (PIM_DEBUG_PIM_NHT_RP)
+ zlog_debug("%s: addr %pPA new MRIB nexthop addr %pPAs interface %s",
+ __func__, &pnc->addr, &addr, ifp1->name);
+ }
+
+ /* Now do the same with URIB nexthop entries */
+ for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) {
+#if PIM_IPV == 4
+ if (!pim_addr_is_any(nh_node->gate.ipv4))
+ continue;
+#else
+ if (!pim_addr_is_any(nh_node->gate.ipv6))
+ continue;
+#endif
+
+ ifp1 = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
+
+ if (ifp != ifp1)
+ continue;
+
+#if PIM_IPV == 4
+ nh_node->gate.ipv4 = addr;
+#else
+ nh_node->gate.ipv6 = addr;
+#endif
+ if (PIM_DEBUG_PIM_NHT_RP)
+ zlog_debug("%s: addr %pPA new URIB nexthop addr %pPAs interface %s",
+ __func__, &pnc->addr, &addr, ifp1->name);
+ }
+}
+
+/* Finds the nexthop cache entry for the given address. If no cache, add it for tracking.
+ * Up and/or rp may be given to add to the nexthop cache entry so that they get updates when the nexthop changes
+ * If out_pnc is not null, then copy the nexthop cache entry to it.
+ * Return true if an entry was found and is valid.
*/
-int pim_find_or_track_nexthop(struct pim_instance *pim, pim_addr addr,
- struct pim_upstream *up, struct rp_info *rp,
- struct pim_nexthop_cache *out_pnc)
+bool pim_nht_find_or_track(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up,
+ struct rp_info *rp, struct pim_nexthop_cache *out_pnc)
{
struct pim_nexthop_cache *pnc;
struct listnode *ch_node = NULL;
+ /* This will find the entry and add it to tracking if not found */
pnc = pim_nht_get(pim, addr);
assertf(up || rp, "addr=%pPA", &addr);
+ /* Store the RP if provided and not currently in the list */
if (rp != NULL) {
ch_node = listnode_lookup(pnc->rp_list, rp);
if (ch_node == NULL)
listnode_add_sort(pnc->rp_list, rp);
}
+ /* Store the upstream if provided and not currently in the list */
if (up != NULL)
(void)hash_get(pnc->upstream_hash, up, hash_alloc_intern);
- if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) {
+ if (pim_nht_pnc_is_valid(pim, pnc)) {
if (out_pnc)
memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache));
- return 1;
+ return true;
}
- return 0;
+ return false;
}
void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr)
@@ -157,7 +306,6 @@ void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr)
struct pim_nexthop_cache *pnc;
pnc = pim_nht_get(pim, addr);
-
pnc->bsr_count++;
}
@@ -166,47 +314,47 @@ bool pim_nht_candrp_add(struct pim_instance *pim, pim_addr addr)
struct pim_nexthop_cache *pnc;
pnc = pim_nht_get(pim, addr);
-
pnc->candrp_count++;
- return CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID);
+ return pim_nht_pnc_is_valid(pim, pnc);
}
-static void pim_nht_drop_maybe(struct pim_instance *pim,
- struct pim_nexthop_cache *pnc)
+static void pim_nht_drop_maybe(struct pim_instance *pim, struct pim_nexthop_cache *pnc)
{
if (PIM_DEBUG_PIM_NHT)
zlog_debug("%s: NHT %pPA(%s) rp_list count:%d upstream count:%ld BSR count:%u Cand-RP count:%u",
- __func__, &pnc->rpf.rpf_addr, pim->vrf->name,
- pnc->rp_list->count, pnc->upstream_hash->count,
- pnc->bsr_count, pnc->candrp_count);
+ __func__, &pnc->addr, pim->vrf->name, pnc->rp_list->count,
+ pnc->upstream_hash->count, pnc->bsr_count, pnc->candrp_count);
- if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0 &&
- pnc->bsr_count == 0 && pnc->candrp_count == 0) {
+ if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0 && pnc->bsr_count == 0 &&
+ pnc->candrp_count == 0) {
struct zclient *zclient = pim_zebra_zclient_get();
- pim_sendmsg_zebra_rnh(pim, zclient, pnc,
- ZEBRA_NEXTHOP_UNREGISTER);
+ pim_sendmsg_zebra_rnh(pim, zclient, pnc->addr, ZEBRA_NEXTHOP_UNREGISTER);
list_delete(&pnc->rp_list);
+
hash_free(pnc->upstream_hash);
+ hash_release(pim->nht_hash, pnc);
+
+ if (pnc->urib.nexthop)
+ nexthops_free(pnc->urib.nexthop);
+ if (pnc->mrib.nexthop)
+ nexthops_free(pnc->mrib.nexthop);
- hash_release(pim->rpf_hash, pnc);
- if (pnc->nexthop)
- nexthops_free(pnc->nexthop);
XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
}
}
-void pim_delete_tracked_nexthop(struct pim_instance *pim, pim_addr addr,
- struct pim_upstream *up, struct rp_info *rp)
+void pim_nht_delete_tracked(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up,
+ struct rp_info *rp)
{
struct pim_nexthop_cache *pnc = NULL;
struct pim_nexthop_cache lookup;
struct pim_upstream *upstream = NULL;
/* Remove from RPF hash if it is the last entry */
- lookup.rpf.rpf_addr = addr;
- pnc = hash_lookup(pim->rpf_hash, &lookup);
+ lookup.addr = addr;
+ pnc = hash_lookup(pim->nht_hash, &lookup);
if (!pnc) {
zlog_warn("attempting to delete nonexistent NHT entry %pPA",
&addr);
@@ -251,9 +399,9 @@ void pim_nht_bsr_del(struct pim_instance *pim, pim_addr addr)
if (pim_addr_is_any(addr))
return;
- lookup.rpf.rpf_addr = addr;
+ lookup.addr = addr;
- pnc = hash_lookup(pim->rpf_hash, &lookup);
+ pnc = hash_lookup(pim->nht_hash, &lookup);
if (!pnc) {
zlog_warn("attempting to delete nonexistent NHT BSR entry %pPA",
@@ -272,9 +420,9 @@ void pim_nht_candrp_del(struct pim_instance *pim, pim_addr addr)
struct pim_nexthop_cache *pnc = NULL;
struct pim_nexthop_cache lookup;
- lookup.rpf.rpf_addr = addr;
+ lookup.addr = addr;
- pnc = hash_lookup(pim->rpf_hash, &lookup);
+ pnc = hash_lookup(pim->nht_hash, &lookup);
if (!pnc) {
zlog_warn("attempting to delete nonexistent NHT C-RP entry %pPA",
@@ -297,10 +445,10 @@ bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr,
struct nexthop *nh;
struct interface *ifp;
- lookup.rpf.rpf_addr = bsr_addr;
+ lookup.addr = bsr_addr;
- pnc = hash_lookup(pim->rpf_hash, &lookup);
- if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) {
+ pnc = hash_lookup(pim->nht_hash, &lookup);
+ if (!pnc || !pim_nht_pnc_has_answer(pim, pnc)) {
/* BSM from a new freshly registered BSR - do a synchronous
* zebra query since otherwise we'd drop the first packet,
* leading to additional delay in picking up BSM data
@@ -359,91 +507,92 @@ bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr,
return false;
}
- if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID))
- return false;
-
- /* if we accept BSMs from more than one ECMP nexthop, this will cause
- * BSM message "multiplication" for each ECMP hop. i.e. if you have
- * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
- * message.
- *
- * so... only accept the first (IPv4) valid nexthop as source.
- */
+ if (pim_nht_pnc_is_valid(pim, pnc)) {
+ /* if we accept BSMs from more than one ECMP nexthop, this will cause
+ * BSM message "multiplication" for each ECMP hop. i.e. if you have
+ * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
+ * message.
+ *
+ * so... only accept the first (IPv4) valid nexthop as source.
+ */
+ struct pim_nexthop_cache_rib *rib = pim_pnc_get_rib(pim, pnc);
- for (nh = pnc->nexthop; nh; nh = nh->next) {
- pim_addr nhaddr;
+ for (nh = rib->nexthop; nh; nh = nh->next) {
+ pim_addr nhaddr;
- switch (nh->type) {
+ switch (nh->type) {
#if PIM_IPV == 4
- case NEXTHOP_TYPE_IPV4:
- if (nh->ifindex == IFINDEX_INTERNAL)
- continue;
+ case NEXTHOP_TYPE_IPV4:
+ if (nh->ifindex == IFINDEX_INTERNAL)
+ continue;
- fallthrough;
- case NEXTHOP_TYPE_IPV4_IFINDEX:
- nhaddr = nh->gate.ipv4;
- break;
- case NEXTHOP_TYPE_IPV6:
- case NEXTHOP_TYPE_IPV6_IFINDEX:
- continue;
-#else
- case NEXTHOP_TYPE_IPV6:
- if (nh->ifindex == IFINDEX_INTERNAL)
+ fallthrough;
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ nhaddr = nh->gate.ipv4;
+ break;
+
+ case NEXTHOP_TYPE_IPV6:
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
continue;
+#else
+ case NEXTHOP_TYPE_IPV6:
+ if (nh->ifindex == IFINDEX_INTERNAL)
+ continue;
- fallthrough;
- case NEXTHOP_TYPE_IPV6_IFINDEX:
- nhaddr = nh->gate.ipv6;
- break;
- case NEXTHOP_TYPE_IPV4:
- case NEXTHOP_TYPE_IPV4_IFINDEX:
- continue;
+ fallthrough;
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ nhaddr = nh->gate.ipv6;
+ break;
+
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ continue;
#endif
- case NEXTHOP_TYPE_IFINDEX:
- nhaddr = bsr_addr;
- break;
+ case NEXTHOP_TYPE_IFINDEX:
+ nhaddr = bsr_addr;
+ break;
- case NEXTHOP_TYPE_BLACKHOLE:
- continue;
- }
+ case NEXTHOP_TYPE_BLACKHOLE:
+ continue;
+ }
- ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id);
- if (!ifp || !ifp->info)
- continue;
+ ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id);
+ if (!ifp || !ifp->info)
+ continue;
- if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
- return true;
+ if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
+ return true;
- /* MRIB (IGP) may be pointing at a router where PIM is down */
- nbr = pim_neighbor_find(ifp, nhaddr, true);
- if (!nbr)
- continue;
+ /* MRIB (IGP) may be pointing at a router where PIM is down */
+ nbr = pim_neighbor_find(ifp, nhaddr, true);
+ if (!nbr)
+ continue;
- /* Are we on the correct interface? */
- if (nh->ifindex == src_ifp->ifindex) {
- /* Do we have the correct NH ? */
- if (!pim_addr_cmp(nhaddr, src_ip))
- return true;
- /*
- * check If the packet came from the neighbor,
- * and the dst is a secondary address on the connected interface
- */
- return (!pim_addr_cmp(nbr->source_addr, src_ip) &&
- pim_if_connected_to_source(ifp, nhaddr));
+ /* Are we on the correct interface? */
+ if (nh->ifindex == src_ifp->ifindex) {
+ /* Do we have the correct NH ? */
+ if (!pim_addr_cmp(nhaddr, src_ip))
+ return true;
+ /*
+ * check If the packet came from the neighbor,
+ * and the dst is a secondary address on the connected interface
+ */
+ return (!pim_addr_cmp(nbr->source_addr, src_ip) &&
+ pim_if_connected_to_source(ifp, nhaddr));
+ }
+ return false;
}
- return false;
}
return false;
}
-void pim_rp_nexthop_del(struct rp_info *rp_info)
+void pim_nht_rp_del(struct rp_info *rp_info)
{
rp_info->rp.source_nexthop.interface = NULL;
rp_info->rp.source_nexthop.mrib_nexthop_addr = PIMADDR_ANY;
rp_info->rp.source_nexthop.mrib_metric_preference =
router->infinite_assert_metric.metric_preference;
- rp_info->rp.source_nexthop.mrib_route_metric =
- router->infinite_assert_metric.route_metric;
+ rp_info->rp.source_nexthop.mrib_route_metric = router->infinite_assert_metric.route_metric;
}
/* Update RP nexthop info based on Nexthop update received from Zebra.*/
@@ -461,10 +610,9 @@ static void pim_update_rp_nh(struct pim_instance *pim,
ifp = rp_info->rp.source_nexthop.interface;
// Compute PIM RPF using cached nexthop
- if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop,
- rp_info->rp.rpf_addr,
- &rp_info->group, 1))
- pim_rp_nexthop_del(rp_info);
+ if (!pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, rp_info->rp.rpf_addr,
+ &rp_info->group, true))
+ pim_nht_rp_del(rp_info);
/*
* If we transition from no path to a path
@@ -544,33 +692,43 @@ static int pim_upstream_nh_if_update_helper(struct hash_bucket *bucket,
struct pim_instance *pim = pwd->pim;
struct interface *ifp = pwd->ifp;
struct nexthop *nh_node = NULL;
- ifindex_t first_ifindex;
- for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next) {
- first_ifindex = nh_node->ifindex;
- if (ifp != if_lookup_by_index(first_ifindex, pim->vrf->vrf_id))
- continue;
+ /* This update happens when an interface is added to/removed from pim.
+ * So go through both MRIB and URIB and update any upstreams for any
+ * matching nexthop
+ */
+ for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) {
+ if (ifp->ifindex == nh_node->ifindex) {
+ if (pnc->upstream_hash->count) {
+ pim_update_upstream_nh(pim, pnc);
+ break;
+ }
+ }
+ }
- if (pnc->upstream_hash->count) {
- pim_update_upstream_nh(pim, pnc);
- break;
+ for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) {
+ if (ifp->ifindex == nh_node->ifindex) {
+ if (pnc->upstream_hash->count) {
+ pim_update_upstream_nh(pim, pnc);
+ break;
+ }
}
}
return HASHWALK_CONTINUE;
}
-void pim_upstream_nh_if_update(struct pim_instance *pim, struct interface *ifp)
+void pim_nht_upstream_if_update(struct pim_instance *pim, struct interface *ifp)
{
struct pnc_hash_walk_data pwd;
pwd.pim = pim;
pwd.ifp = ifp;
- hash_walk(pim->rpf_hash, pim_upstream_nh_if_update_helper, &pwd);
+ hash_walk(pim->nht_hash, pim_upstream_nh_if_update_helper, &pwd);
}
-uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
+static uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
{
uint32_t hash_val;
@@ -583,47 +741,42 @@ uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
return hash_val;
}
-static int pim_ecmp_nexthop_search(struct pim_instance *pim,
- struct pim_nexthop_cache *pnc,
- struct pim_nexthop *nexthop, pim_addr src,
- struct prefix *grp, int neighbor_needed)
+static bool pim_ecmp_nexthop_search(struct pim_instance *pim, struct pim_nexthop_cache *pnc,
+ struct pim_nexthop *nexthop, pim_addr src, struct prefix *grp,
+ bool neighbor_needed)
{
- struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
- struct interface *ifps[router->multipath];
struct nexthop *nh_node = NULL;
- ifindex_t first_ifindex;
- struct interface *ifp = NULL;
- uint32_t hash_val = 0, mod_val = 0;
- uint16_t nh_iter = 0, found = 0;
- uint32_t i, num_nbrs = 0;
- struct pim_interface *pim_ifp;
-
- if (!pnc || !pnc->nexthop_num || !nexthop)
- return 0;
-
- pim_addr nh_addr = nexthop->mrib_nexthop_addr;
- pim_addr grp_addr = pim_addr_from_prefix(grp);
+ uint32_t hash_val = 0;
+ uint32_t mod_val = 0;
+ uint16_t nh_iter = 0;
+ bool found = false;
+ uint32_t num_nbrs = 0;
+ pim_addr nh_addr;
+ pim_addr grp_addr;
+ struct pim_nexthop_cache_rib *rib;
- memset(&nbrs, 0, sizeof(nbrs));
- memset(&ifps, 0, sizeof(ifps));
+ /* Early return if required parameters aren't provided */
+ if (!pim || !pnc || !pim_nht_pnc_is_valid(pim, pnc) || !nexthop || !grp)
+ return false;
+ nh_addr = nexthop->mrib_nexthop_addr;
+ grp_addr = pim_addr_from_prefix(grp);
+ rib = pim_pnc_get_rib(pim, pnc);
- // Current Nexthop is VALID, check to stay on the current path.
+ /* Current Nexthop is VALID, check to stay on the current path. */
if (nexthop->interface && nexthop->interface->info &&
(!pim_addr_is_any(nh_addr))) {
- /* User configured knob to explicitly switch
- to new path is disabled or current path
- metric is less than nexthop update.
+ /* User configured knob to explicitly switch to new path is disabled or
+ * current path metric is less than nexthop update.
*/
+ if (!pim->ecmp_rebalance_enable) {
+ bool curr_route_valid = false;
- if (pim->ecmp_rebalance_enable == 0) {
- uint8_t curr_route_valid = 0;
- // Check if current nexthop is present in new updated
- // Nexthop list.
- // If the current nexthop is not valid, candidate to
- // choose new Nexthop.
- for (nh_node = pnc->nexthop; nh_node;
- nh_node = nh_node->next) {
+ /* Check if current nexthop is present in new updated Nexthop list.
+ * If the current nexthop is not valid, candidate to choose new
+ * Nexthop.
+ */
+ for (nh_node = rib->nexthop; nh_node; nh_node = nh_node->next) {
curr_route_valid = (nexthop->interface->ifindex
== nh_node->ifindex);
if (curr_route_valid)
@@ -633,9 +786,9 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
if (curr_route_valid &&
!pim_if_connected_to_source(nexthop->interface,
src)) {
- nbr = pim_neighbor_find(
- nexthop->interface,
- nexthop->mrib_nexthop_addr, true);
+ struct pim_neighbor *nbr =
+ pim_neighbor_find(nexthop->interface,
+ nexthop->mrib_nexthop_addr, true);
if (!nbr
&& !if_is_loopback(nexthop->interface)) {
if (PIM_DEBUG_PIM_NHT)
@@ -646,10 +799,8 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
/* update metric even if the upstream
* neighbor stays unchanged
*/
- nexthop->mrib_metric_preference =
- pnc->distance;
- nexthop->mrib_route_metric =
- pnc->metric;
+ nexthop->mrib_metric_preference = rib->distance;
+ nexthop->mrib_route_metric = rib->metric;
if (PIM_DEBUG_PIM_NHT)
zlog_debug(
"%s: (%pPA,%pPA)(%s) current nexthop %s is valid, skipping new path selection",
@@ -657,40 +808,39 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
&grp_addr,
pim->vrf->name,
nexthop->interface->name);
- return 1;
+ return true;
}
}
}
}
- /*
- * Look up all interfaces and neighbors,
- * store for later usage
- */
- for (nh_node = pnc->nexthop, i = 0; nh_node;
- nh_node = nh_node->next, i++) {
- ifps[i] =
- if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
- if (ifps[i]) {
+ /* Count the number of neighbors for ECMP */
+ for (nh_node = rib->nexthop; nh_node; nh_node = nh_node->next) {
+ struct pim_neighbor *nbr;
+ struct interface *ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
+
+ if (!ifp)
+ continue;
+
#if PIM_IPV == 4
- pim_addr nhaddr = nh_node->gate.ipv4;
+ pim_addr nhaddr = nh_node->gate.ipv4;
#else
- pim_addr nhaddr = nh_node->gate.ipv6;
+ pim_addr nhaddr = nh_node->gate.ipv6;
#endif
- nbrs[i] = pim_neighbor_find(ifps[i], nhaddr, true);
- if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
- num_nbrs++;
- }
+ nbr = pim_neighbor_find(ifp, nhaddr, true);
+ if (nbr || pim_if_connected_to_source(ifp, src))
+ num_nbrs++;
}
+
if (pim->ecmp_enable) {
struct prefix src_pfx;
- uint32_t consider = pnc->nexthop_num;
+ uint32_t consider = rib->nexthop_num;
if (neighbor_needed && num_nbrs < consider)
consider = num_nbrs;
if (consider == 0)
- return 0;
+ return false;
// PIM ECMP flag is enable then choose ECMP path.
pim_addr_to_prefix(&src_pfx, src);
@@ -698,16 +848,16 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
mod_val = hash_val % consider;
}
- for (nh_node = pnc->nexthop; nh_node && (found == 0);
- nh_node = nh_node->next) {
- first_ifindex = nh_node->ifindex;
- ifp = ifps[nh_iter];
+ for (nh_node = rib->nexthop; nh_node && !found; nh_node = nh_node->next) {
+ struct pim_neighbor *nbr = NULL;
+ struct pim_interface *pim_ifp;
+ struct interface *ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
+
if (!ifp) {
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
- __FILE__, __func__, first_ifindex, &src,
- pim->vrf->name);
+ zlog_debug("%s %s: could not find interface for ifindex %d (address %pPA(%s))",
+ __FILE__, __func__, nh_node->ifindex, &src,
+ pim->vrf->name);
if (nh_iter == mod_val)
mod_val++; // Select nexthpath
nh_iter++;
@@ -718,10 +868,9 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
if (!pim_ifp || !pim_ifp->pim_enable) {
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
- __func__, ifp->name, pim->vrf->name,
- first_ifindex, &src);
+ zlog_debug("%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
+ __func__, ifp->name, pim->vrf->name, nh_node->ifindex,
+ &src);
if (nh_iter == mod_val)
mod_val++; // Select nexthpath
nh_iter++;
@@ -729,7 +878,12 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
}
if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
- nbr = nbrs[nh_iter];
+#if PIM_IPV == 4
+ nbr = pim_neighbor_find(ifp, nh_node->gate.ipv4, true);
+#else
+ nbr = pim_neighbor_find(ifp, nh_node->gate.ipv6, true);
+#endif
+
if (!nbr && !if_is_loopback(ifp)) {
if (PIM_DEBUG_PIM_NHT)
zlog_debug(
@@ -750,12 +904,12 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
#else
nexthop->mrib_nexthop_addr = nh_node->gate.ipv6;
#endif
- nexthop->mrib_metric_preference = pnc->distance;
- nexthop->mrib_route_metric = pnc->metric;
+ nexthop->mrib_metric_preference = rib->distance;
+ nexthop->mrib_route_metric = rib->metric;
nexthop->last_lookup = src;
nexthop->last_lookup_time = pim_time_monotonic_usec();
nexthop->nbr = nbr;
- found = 1;
+ found = true;
if (PIM_DEBUG_PIM_NHT)
zlog_debug(
"%s: (%pPA,%pPA)(%s) selected nhop interface %s addr %pPAs mod_val %u iter %d ecmp %d",
@@ -766,260 +920,55 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,
nh_iter++;
}
- if (found)
- return 1;
- else
- return 0;
+ return found;
}
-/* This API is used to parse Registered address nexthop update coming from Zebra
- */
-void pim_nexthop_update(struct vrf *vrf, struct prefix *match,
- struct zapi_route *nhr)
-{
- struct nexthop *nexthop;
- struct nexthop *nhlist_head = NULL;
- struct nexthop *nhlist_tail = NULL;
- int i;
- struct pim_rpf rpf;
- struct pim_nexthop_cache *pnc = NULL;
- struct interface *ifp = NULL;
- struct pim_instance *pim;
-
- pim = vrf->info;
-
- rpf.rpf_addr = pim_addr_from_prefix(match);
- pnc = pim_nexthop_cache_find(pim, &rpf);
- if (!pnc) {
- if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: Skipping NHT update, addr %pPA is not in local cached DB.",
- __func__, &rpf.rpf_addr);
- return;
- }
-
- pnc->last_update = pim_time_monotonic_usec();
-
- if (nhr->nexthop_num) {
- pnc->nexthop_num = 0;
-
- for (i = 0; i < nhr->nexthop_num; i++) {
- nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
- switch (nexthop->type) {
- case NEXTHOP_TYPE_IFINDEX:
- /*
- * Connected route (i.e. no nexthop), use
- * RPF address from nexthop cache (i.e.
- * destination) as PIM nexthop.
- */
-#if PIM_IPV == 4
- nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
- nexthop->gate.ipv4 = pnc->rpf.rpf_addr;
-#else
- nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
- nexthop->gate.ipv6 = pnc->rpf.rpf_addr;
-#endif
- break;
-#if PIM_IPV == 4
- /* RFC5549 IPv4-over-IPv6 nexthop handling:
- * if we get an IPv6 nexthop in IPv4 PIM, hunt down a
- * PIM neighbor and use that instead.
- */
- case NEXTHOP_TYPE_IPV6_IFINDEX: {
- struct interface *ifp1 = NULL;
- struct pim_neighbor *nbr = NULL;
-
- ifp1 = if_lookup_by_index(nexthop->ifindex,
- pim->vrf->vrf_id);
-
- if (!ifp1)
- nbr = NULL;
- else
- /* FIXME: should really use nbr's
- * secondary address list here
- */
- nbr = pim_neighbor_find_if(ifp1);
-
- /* Overwrite with Nbr address as NH addr */
- if (nbr)
- nexthop->gate.ipv4 = nbr->source_addr;
- else
- // Mark nexthop address to 0 until PIM
- // Nbr is resolved.
- nexthop->gate.ipv4 = PIMADDR_ANY;
-
- break;
- }
-#else
- case NEXTHOP_TYPE_IPV6_IFINDEX:
-#endif
- case NEXTHOP_TYPE_IPV6:
- case NEXTHOP_TYPE_IPV4:
- case NEXTHOP_TYPE_IPV4_IFINDEX:
- case NEXTHOP_TYPE_BLACKHOLE:
- /* nothing to do for the other nexthop types */
- break;
- }
-
- ifp = if_lookup_by_index(nexthop->ifindex,
- pim->vrf->vrf_id);
- if (!ifp) {
- if (PIM_DEBUG_PIM_NHT) {
- char buf[NEXTHOP_STRLEN];
- zlog_debug(
- "%s: could not find interface for ifindex %d(%s) (addr %s)",
- __func__, nexthop->ifindex,
- pim->vrf->name,
- nexthop2str(nexthop, buf,
- sizeof(buf)));
- }
- nexthop_free(nexthop);
- continue;
- }
-
- if (PIM_DEBUG_PIM_NHT) {
-#if PIM_IPV == 4
- pim_addr nhaddr = nexthop->gate.ipv4;
-#else
- pim_addr nhaddr = nexthop->gate.ipv6;
-#endif
- zlog_debug("%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ",
- __func__, match, pim->vrf->name,
- i + 1, &nhaddr, ifp->name,
- nexthop->type, nhr->distance,
- nhr->metric);
- }
-
- if (!ifp->info) {
- /*
- * Though Multicast is not enabled on this
- * Interface store it in database otheriwse we
- * may miss this update and this will not cause
- * any issue, because while choosing the path we
- * are ommitting the Interfaces which are not
- * multicast enabled
- */
- if (PIM_DEBUG_PIM_NHT) {
- char buf[NEXTHOP_STRLEN];
-
- zlog_debug(
- "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
- __func__, ifp->name,
- pim->vrf->name,
- nexthop->ifindex,
- nexthop2str(nexthop, buf,
- sizeof(buf)));
- }
- }
-
- if (nhlist_tail) {
- nhlist_tail->next = nexthop;
- nhlist_tail = nexthop;
- } else {
- nhlist_tail = nexthop;
- nhlist_head = nexthop;
- }
-
- // Keep track of all nexthops, even PIM-disabled ones.
- pnc->nexthop_num++;
- }
- /* Reset existing pnc->nexthop before assigning new list */
- nexthops_free(pnc->nexthop);
- pnc->nexthop = nhlist_head;
- if (pnc->nexthop_num) {
- pnc->flags |= PIM_NEXTHOP_VALID;
- pnc->distance = nhr->distance;
- pnc->metric = nhr->metric;
- }
- } else {
- pnc->flags &= ~PIM_NEXTHOP_VALID;
- pnc->nexthop_num = nhr->nexthop_num;
- nexthops_free(pnc->nexthop);
- pnc->nexthop = NULL;
- }
- SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED);
-
- if (PIM_DEBUG_PIM_NHT)
- zlog_debug("%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
- __func__, match, pim->vrf->name, nhr->nexthop_num,
- pnc->nexthop_num, vrf->vrf_id,
- pnc->upstream_hash->count, listcount(pnc->rp_list));
-
- pim_rpf_set_refresh_time(pim);
-
- if (listcount(pnc->rp_list))
- pim_update_rp_nh(pim, pnc);
- if (pnc->upstream_hash->count)
- pim_update_upstream_nh(pim, pnc);
-
- if (pnc->candrp_count)
- pim_crp_nht_update(pim, pnc);
-}
-
-int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
- struct pim_nexthop *nexthop, pim_addr src,
- struct prefix *grp, int neighbor_needed)
+bool pim_nht_lookup_ecmp(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr src,
+ struct prefix *grp, bool neighbor_needed)
{
struct pim_nexthop_cache *pnc;
struct pim_zlookup_nexthop nexthop_tab[router->multipath];
- struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
- struct pim_rpf rpf;
int num_ifindex;
- struct interface *ifps[router->multipath], *ifp;
- int first_ifindex;
- int found = 0;
+ bool found = false;
uint16_t i = 0;
- uint32_t hash_val = 0, mod_val = 0;
+ uint32_t hash_val = 0;
+ uint32_t mod_val = 0;
uint32_t num_nbrs = 0;
- struct pim_interface *pim_ifp;
if (PIM_DEBUG_PIM_NHT_DETAIL)
- zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld",
- __func__, &src, pim->vrf->name,
- nexthop->last_lookup_time);
-
- rpf.rpf_addr = src;
+ zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld", __func__, &src,
+ pim->vrf->name, nexthop->last_lookup_time);
- pnc = pim_nexthop_cache_find(pim, &rpf);
+ pnc = pim_nexthop_cache_find(pim, src);
if (pnc) {
- if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED))
- return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp,
- neighbor_needed);
+ if (pim_nht_pnc_has_answer(pim, pnc))
+ return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp, neighbor_needed);
}
- memset(nexthop_tab, 0,
- sizeof(struct pim_zlookup_nexthop) * router->multipath);
- num_ifindex =
- zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src,
- PIM_NEXTHOP_LOOKUP_MAX);
+ memset(nexthop_tab, 0, sizeof(struct pim_zlookup_nexthop) * router->multipath);
+ num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src,
+ PIM_NEXTHOP_LOOKUP_MAX);
if (num_ifindex < 1) {
if (PIM_DEBUG_PIM_NHT)
- zlog_warn(
- "%s: could not find nexthop ifindex for address %pPA(%s)",
- __func__, &src, pim->vrf->name);
- return 0;
+ zlog_warn("%s: could not find nexthop ifindex for address %pPA(%s)",
+ __func__, &src, pim->vrf->name);
+ return false;
}
- memset(&nbrs, 0, sizeof(nbrs));
- memset(&ifps, 0, sizeof(ifps));
-
- /*
- * Look up all interfaces and neighbors,
- * store for later usage
- */
+ /* Count the number of neighbors for ECMP computation */
for (i = 0; i < num_ifindex; i++) {
- ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex,
- pim->vrf->vrf_id);
- if (ifps[i]) {
- nbrs[i] = pim_neighbor_find(
- ifps[i], nexthop_tab[i].nexthop_addr, true);
-
- if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
- num_nbrs++;
- }
+ struct pim_neighbor *nbr;
+ struct interface *ifp = if_lookup_by_index(nexthop_tab[i].ifindex, pim->vrf->vrf_id);
+
+ if (!ifp)
+ continue;
+
+ nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true);
+ if (nbr || pim_if_connected_to_source(ifp, src))
+ num_nbrs++;
}
- // If PIM ECMP enable then choose ECMP path.
+ /* If PIM ECMP enable then choose ECMP path. */
if (pim->ecmp_enable) {
struct prefix src_pfx;
uint32_t consider = num_ifindex;
@@ -1028,30 +977,27 @@ int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
consider = num_nbrs;
if (consider == 0)
- return 0;
+ return false;
pim_addr_to_prefix(&src_pfx, src);
hash_val = pim_compute_ecmp_hash(&src_pfx, grp);
mod_val = hash_val % consider;
if (PIM_DEBUG_PIM_NHT_DETAIL)
- zlog_debug("%s: hash_val %u mod_val %u", __func__,
- hash_val, mod_val);
+ zlog_debug("%s: hash_val %u mod_val %u", __func__, hash_val, mod_val);
}
- i = 0;
- while (!found && (i < num_ifindex)) {
- first_ifindex = nexthop_tab[i].ifindex;
+ for (i = 0; i < num_ifindex && !found; i++) {
+ struct pim_neighbor *nbr = NULL;
+ struct pim_interface *pim_ifp;
+ struct interface *ifp = if_lookup_by_index(nexthop_tab[i].ifindex, pim->vrf->vrf_id);
- ifp = ifps[i];
if (!ifp) {
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
- __FILE__, __func__, first_ifindex, &src,
- pim->vrf->name);
+ zlog_debug("%s %s: could not find interface for ifindex %d (address %pPA(%s))",
+ __FILE__, __func__, nexthop_tab[i].ifindex, &src,
+ pim->vrf->name);
if (i == mod_val)
mod_val++;
- i++;
continue;
}
@@ -1059,99 +1005,431 @@ int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
if (!pim_ifp || !pim_ifp->pim_enable) {
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
- __func__, ifp->name, pim->vrf->name,
- first_ifindex, &src);
+ zlog_debug("%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
+ __func__, ifp->name, pim->vrf->name,
+ nexthop_tab[i].ifindex, &src);
if (i == mod_val)
mod_val++;
- i++;
continue;
}
+
if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
- nbr = nbrs[i];
+ nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true);
if (PIM_DEBUG_PIM_NHT_DETAIL)
- zlog_debug("ifp name: %s(%s), pim nbr: %p",
- ifp->name, pim->vrf->name, nbr);
+ zlog_debug("ifp name: %s(%s), pim nbr: %p", ifp->name,
+ pim->vrf->name, nbr);
if (!nbr && !if_is_loopback(ifp)) {
+ if (PIM_DEBUG_PIM_NHT)
+ zlog_debug("%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)",
+ __func__, &nexthop_tab[i].nexthop_addr,
+ ifp->name, pim->vrf->name, &src);
if (i == mod_val)
mod_val++;
- if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)",
- __func__,
- &nexthop_tab[i].nexthop_addr,
- ifp->name, pim->vrf->name,
- &src);
- i++;
continue;
}
}
if (i == mod_val) {
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d",
- __func__, &nexthop_tab[i].nexthop_addr,
- &src, ifp->name, pim->vrf->name,
- nexthop_tab[i].route_metric,
- nexthop_tab[i].protocol_distance);
+ zlog_debug("%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d",
+ __func__, &nexthop_tab[i].nexthop_addr, &src, ifp->name,
+ pim->vrf->name, nexthop_tab[i].route_metric,
+ nexthop_tab[i].protocol_distance);
/* update nexthop data */
nexthop->interface = ifp;
- nexthop->mrib_nexthop_addr =
- nexthop_tab[i].nexthop_addr;
- nexthop->mrib_metric_preference =
- nexthop_tab[i].protocol_distance;
- nexthop->mrib_route_metric =
- nexthop_tab[i].route_metric;
+ nexthop->mrib_nexthop_addr = nexthop_tab[i].nexthop_addr;
+ nexthop->mrib_metric_preference = nexthop_tab[i].protocol_distance;
+ nexthop->mrib_route_metric = nexthop_tab[i].route_metric;
nexthop->last_lookup = src;
nexthop->last_lookup_time = pim_time_monotonic_usec();
nexthop->nbr = nbr;
- found = 1;
+ found = true;
}
- i++;
}
- if (found)
- return 1;
- else
- return 0;
+ return found;
}
-int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim, pim_addr src,
- struct prefix *grp)
+bool pim_nht_lookup(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr addr,
+ int neighbor_needed)
+{
+ struct pim_zlookup_nexthop nexthop_tab[router->multipath];
+ struct pim_neighbor *nbr = NULL;
+ int num_ifindex;
+ struct interface *ifp = NULL;
+ ifindex_t first_ifindex = 0;
+ bool found = false;
+ int i = 0;
+ struct pim_interface *pim_ifp;
+
+#if PIM_IPV == 4
+ /*
+ * We should not attempt to lookup a
+ * 255.255.255.255 address, since
+ * it will never work
+ */
+ if (pim_addr_is_any(addr))
+ return false;
+#endif
+
+ if ((!pim_addr_cmp(nexthop->last_lookup, addr)) &&
+ (nexthop->last_lookup_time > pim->last_route_change_time)) {
+ if (PIM_DEBUG_PIM_NHT)
+ zlog_debug("%s: Using last lookup for %pPAs at %lld, %" PRId64 " addr %pPAs",
+ __func__, &addr, nexthop->last_lookup_time,
+ pim->last_route_change_time, &nexthop->mrib_nexthop_addr);
+ pim->nexthop_lookups_avoided++;
+ return true;
+ }
+
+ if (PIM_DEBUG_PIM_NHT)
+ zlog_debug("%s: Looking up: %pPAs, last lookup time: %lld, %" PRId64, __func__,
+ &addr, nexthop->last_lookup_time, pim->last_route_change_time);
+
+ memset(nexthop_tab, 0, sizeof(struct pim_zlookup_nexthop) * router->multipath);
+ num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, addr,
+ PIM_NEXTHOP_LOOKUP_MAX);
+ if (num_ifindex < 1) {
+ if (PIM_DEBUG_PIM_NHT)
+ zlog_debug("%s: could not find nexthop ifindex for address %pPAs", __func__,
+ &addr);
+ return false;
+ }
+
+ while (!found && (i < num_ifindex)) {
+ first_ifindex = nexthop_tab[i].ifindex;
+
+ ifp = if_lookup_by_index(first_ifindex, pim->vrf->vrf_id);
+ if (!ifp) {
+ if (PIM_DEBUG_ZEBRA)
+ zlog_debug("%s: could not find interface for ifindex %d (address %pPAs)",
+ __func__, first_ifindex, &addr);
+ i++;
+ continue;
+ }
+
+ pim_ifp = ifp->info;
+ if (!pim_ifp || !pim_ifp->pim_enable) {
+ if (PIM_DEBUG_ZEBRA)
+ zlog_debug("%s: pim not enabled on input interface %s (ifindex=%d, RPF for source %pPAs)",
+ __func__, ifp->name, first_ifindex, &addr);
+ i++;
+ } else if (neighbor_needed && !pim_if_connected_to_source(ifp, addr)) {
+ nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true);
+ if (PIM_DEBUG_PIM_TRACE_DETAIL)
+ zlog_debug("ifp name: %s, pim nbr: %p", ifp->name, nbr);
+ if (!nbr && !if_is_loopback(ifp))
+ i++;
+ else
+ found = true;
+ } else
+ found = true;
+ }
+
+ if (found) {
+ if (PIM_DEBUG_ZEBRA)
+ zlog_debug("%s: found nexthop %pPAs for address %pPAs: interface %s ifindex=%d metric=%d pref=%d",
+ __func__, &nexthop_tab[i].nexthop_addr, &addr, ifp->name,
+ first_ifindex, nexthop_tab[i].route_metric,
+ nexthop_tab[i].protocol_distance);
+
+ /* update nexthop data */
+ nexthop->interface = ifp;
+ nexthop->mrib_nexthop_addr = nexthop_tab[i].nexthop_addr;
+ nexthop->mrib_metric_preference = nexthop_tab[i].protocol_distance;
+ nexthop->mrib_route_metric = nexthop_tab[i].route_metric;
+ nexthop->last_lookup = addr;
+ nexthop->last_lookup_time = pim_time_monotonic_usec();
+ nexthop->nbr = nbr;
+ return true;
+ } else
+ return false;
+}
+
+int pim_nht_lookup_ecmp_if_vif_index(struct pim_instance *pim, pim_addr src, struct prefix *grp)
{
struct pim_nexthop nhop;
int vif_index;
ifindex_t ifindex;
memset(&nhop, 0, sizeof(nhop));
- if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) {
+ if (!pim_nht_lookup_ecmp(pim, &nhop, src, grp, true)) {
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: could not find nexthop ifindex for address %pPA(%s)",
- __func__, &src, pim->vrf->name);
+ zlog_debug("%s: could not find nexthop ifindex for address %pPA(%s)",
+ __func__, &src, pim->vrf->name);
return -1;
}
ifindex = nhop.interface->ifindex;
if (PIM_DEBUG_PIM_NHT)
- zlog_debug(
- "%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA",
- __func__, ifindex,
- ifindex2ifname(ifindex, pim->vrf->vrf_id),
- pim->vrf->name, &src);
+ zlog_debug("%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA",
+ __func__, ifindex, ifindex2ifname(ifindex, pim->vrf->vrf_id),
+ pim->vrf->name, &src);
vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);
if (vif_index < 0) {
if (PIM_DEBUG_PIM_NHT) {
- zlog_debug(
- "%s: low vif_index=%d(%s) < 1 nexthop for address %pPA",
- __func__, vif_index, pim->vrf->name, &src);
+ zlog_debug("%s: low vif_index=%d(%s) < 1 nexthop for address %pPA",
+ __func__, vif_index, pim->vrf->name, &src);
}
return -2;
}
return vif_index;
}
+
+/* This API is used to parse Registered address nexthop update coming from Zebra
+ */
+void pim_nexthop_update(struct vrf *vrf, struct prefix *match, struct zapi_route *nhr)
+{
+ struct nexthop *nhlist_head = NULL;
+ struct nexthop *nhlist_tail = NULL;
+ struct pim_nexthop_cache *pnc = NULL;
+ struct pim_nexthop_cache_rib *pnc_rib = NULL;
+ struct interface *ifp = NULL;
+ struct pim_instance *pim;
+ pim_addr addr;
+
+ pim = vrf->info;
+ addr = pim_addr_from_prefix(match);
+ pnc = pim_nexthop_cache_find(pim, addr);
+ if (!pnc) {
+ if (PIM_DEBUG_PIM_NHT)
+ zlog_debug("%s: Skipping NHT update, addr %pPA is not in local cached DB.",
+ __func__, &addr);
+ return;
+ }
+
+ if (nhr->safi == SAFI_UNICAST)
+ pnc_rib = &pnc->urib;
+ else if (nhr->safi == SAFI_MULTICAST)
+ pnc_rib = &pnc->mrib;
+ else
+ return;
+
+ pnc_rib->last_update = pim_time_monotonic_usec();
+ SET_FLAG(pnc_rib->flags, PIM_NEXTHOP_ANSWER_RECEIVED);
+ UNSET_FLAG(pnc_rib->flags, PIM_NEXTHOP_VALID);
+ pnc_rib->nexthop_num = 0;
+ /* Free the existing nexthop list, resets with any valid nexthops from the update */
+ nexthops_free(pnc_rib->nexthop);
+ pnc_rib->nexthop = NULL;
+
+ for (int i = 0; i < nhr->nexthop_num; i++) {
+ struct nexthop *nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
+
+ switch (nexthop->type) {
+ case NEXTHOP_TYPE_IFINDEX:
+ /*
+ * Connected route (i.e. no nexthop), use
+ * RPF address from nexthop cache (i.e.
+ * destination) as PIM nexthop.
+ */
+#if PIM_IPV == 4
+ nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
+ nexthop->gate.ipv4 = pnc->addr;
+#else
+ nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
+ nexthop->gate.ipv6 = pnc->addr;
+#endif
+ break;
+
+#if PIM_IPV == 4
+ /* RFC5549 IPv4-over-IPv6 nexthop handling:
+ * if we get an IPv6 nexthop in IPv4 PIM, hunt down a
+ * PIM neighbor and use that instead.
+ */
+ case NEXTHOP_TYPE_IPV6_IFINDEX: {
+ struct pim_neighbor *nbr = NULL;
+ struct interface *ifp1 = if_lookup_by_index(nexthop->ifindex,
+ pim->vrf->vrf_id);
+
+ if (ifp1)
+ /* FIXME: should really use nbr's
+ * secondary address list here
+ */
+ nbr = pim_neighbor_find_if(ifp1);
+
+ /* Overwrite with Nbr address as NH addr */
+ if (nbr)
+ nexthop->gate.ipv4 = nbr->source_addr;
+ else
+ /* Mark nexthop address to 0 until PIM Nbr is resolved. */
+ nexthop->gate.ipv4 = PIMADDR_ANY;
+
+ break;
+ }
+#else
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+#endif
+ case NEXTHOP_TYPE_IPV6:
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ case NEXTHOP_TYPE_BLACKHOLE:
+ /* nothing to do for the other nexthop types */
+ break;
+ }
+
+ ifp = if_lookup_by_index(nexthop->ifindex, pim->vrf->vrf_id);
+ if (!ifp) {
+ if (PIM_DEBUG_PIM_NHT) {
+ char buf[NEXTHOP_STRLEN];
+ zlog_debug("%s: could not find interface for ifindex %d(%s) (addr %s)",
+ __func__, nexthop->ifindex, pim->vrf->name,
+ nexthop2str(nexthop, buf, sizeof(buf)));
+ }
+ nexthop_free(nexthop);
+ continue;
+ }
+
+ if (PIM_DEBUG_PIM_NHT) {
+#if PIM_IPV == 4
+ pim_addr nhaddr = nexthop->gate.ipv4;
+#else
+ pim_addr nhaddr = nexthop->gate.ipv6;
+#endif
+ zlog_debug("%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ",
+ __func__, match, pim->vrf->name, i + 1, &nhaddr, ifp->name,
+ nexthop->type, nhr->distance, nhr->metric);
+ }
+
+ if (!ifp->info) {
+ /*
+ * Though Multicast is not enabled on this
+ * Interface store it in database otheriwse we
+ * may miss this update and this will not cause
+ * any issue, because while choosing the path we
+ * are ommitting the Interfaces which are not
+ * multicast enabled
+ */
+ if (PIM_DEBUG_PIM_NHT) {
+ char buf[NEXTHOP_STRLEN];
+
+ zlog_debug("%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
+ __func__, ifp->name, pim->vrf->name, nexthop->ifindex,
+ nexthop2str(nexthop, buf, sizeof(buf)));
+ }
+ }
+
+ if (nhlist_tail) {
+ nhlist_tail->next = nexthop;
+ nhlist_tail = nexthop;
+ } else {
+ nhlist_tail = nexthop;
+ nhlist_head = nexthop;
+ }
+
+ /* Keep track of all nexthops, even PIM-disabled ones. */
+ pnc_rib->nexthop_num++;
+ } /* End for nexthops */
+
+ /* Assign the list if there are nexthops */
+ if (pnc_rib->nexthop_num) {
+ SET_FLAG(pnc_rib->flags, PIM_NEXTHOP_VALID);
+ pnc_rib->nexthop = nhlist_head;
+ pnc_rib->distance = nhr->distance;
+ pnc_rib->metric = nhr->metric;
+ pnc_rib->prefix_len = nhr->prefix.prefixlen;
+ }
+
+ if (PIM_DEBUG_PIM_NHT)
+ zlog_debug("%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
+ __func__, match, pim->vrf->name, nhr->nexthop_num, pnc_rib->nexthop_num,
+ vrf->vrf_id, pnc->upstream_hash->count, listcount(pnc->rp_list));
+
+ pim_rpf_set_refresh_time(pim);
+
+ if (listcount(pnc->rp_list))
+ pim_update_rp_nh(pim, pnc);
+ if (pnc->upstream_hash->count)
+ pim_update_upstream_nh(pim, pnc);
+
+ if (pnc->candrp_count)
+ pim_crp_nht_update(pim, pnc);
+}
+
+static int pim_nht_hash_mode_update_helper(struct hash_bucket *bucket, void *arg)
+{
+ struct pim_nexthop_cache *pnc = bucket->data;
+ struct pnc_hash_walk_data *pwd = arg;
+ struct pim_instance *pim = pwd->pim;
+
+ if (listcount(pnc->rp_list))
+ pim_update_rp_nh(pim, pnc);
+
+ if (pnc->upstream_hash->count)
+ pim_update_upstream_nh(pim, pnc);
+
+ if (pnc->candrp_count)
+ pim_crp_nht_update(pim, pnc);
+
+ return HASHWALK_CONTINUE;
+}
+
+void pim_nht_mode_changed(struct pim_instance *pim)
+{
+ struct pnc_hash_walk_data pwd;
+
+ /* Update the refresh time to force new lookups if needed */
+ pim_rpf_set_refresh_time(pim);
+
+ /* Force update the registered RP and upstreams for all cache entries */
+ pwd.pim = pim;
+ hash_walk(pim->nht_hash, pim_nht_hash_mode_update_helper, &pwd);
+}
+
+/* Cleanup pim->nht_hash each node data */
+static void pim_nht_hash_clean(void *data)
+{
+ struct pim_nexthop_cache *pnc = (struct pim_nexthop_cache *)data;
+
+ list_delete(&pnc->rp_list);
+ hash_clean_and_free(&pnc->upstream_hash, NULL);
+
+ if (pnc->mrib.nexthop)
+ nexthops_free(pnc->mrib.nexthop);
+
+ if (pnc->urib.nexthop)
+ nexthops_free(pnc->urib.nexthop);
+
+ XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
+}
+
+static unsigned int pim_nht_hash_key(const void *arg)
+{
+ const struct pim_nexthop_cache *r = arg;
+
+#if PIM_IPV == 4
+ return jhash_1word(r->addr.s_addr, 0);
+#else
+ return jhash2(r->addr.s6_addr32, array_size(r->addr.s6_addr32), 0);
+#endif
+}
+
+static bool pim_nht_equal(const void *arg1, const void *arg2)
+{
+ const struct pim_nexthop_cache *r1 = arg1;
+ const struct pim_nexthop_cache *r2 = arg2;
+
+ return (!pim_addr_cmp(r1->addr, r2->addr));
+}
+
+void pim_nht_init(struct pim_instance *pim)
+{
+ char hash_name[64];
+
+ snprintf(hash_name, sizeof(hash_name), "PIM %s NHT Hash", pim->vrf->name);
+ pim->nht_hash = hash_create_size(256, pim_nht_hash_key, pim_nht_equal, hash_name);
+
+ pim->rpf_mode = MCAST_NO_CONFIG;
+
+ if (PIM_DEBUG_ZEBRA)
+ zlog_debug("%s: NHT hash init: %s ", __func__, hash_name);
+}
+
+void pim_nht_terminate(struct pim_instance *pim)
+{
+ /* Traverse and cleanup nht_hash */
+ hash_clean_and_free(&pim->nht_hash, (void *)pim_nht_hash_clean);
+}