diff options
Diffstat (limited to 'pimd')
| -rw-r--r-- | pimd/pim_bsm.c | 2 | ||||
| -rw-r--r-- | pimd/pim_bsr_rpdb.c | 4 | ||||
| -rw-r--r-- | pimd/pim_cmd.c | 33 | ||||
| -rw-r--r-- | pimd/pim_cmd_common.c | 152 | ||||
| -rw-r--r-- | pimd/pim_iface.c | 29 | ||||
| -rw-r--r-- | pimd/pim_igmp_mtrace.c | 9 | ||||
| -rw-r--r-- | pimd/pim_instance.c | 12 | ||||
| -rw-r--r-- | pimd/pim_instance.h | 3 | ||||
| -rw-r--r-- | pimd/pim_mroute.c | 8 | ||||
| -rw-r--r-- | pimd/pim_msdp.c | 3 | ||||
| -rw-r--r-- | pimd/pim_nb.c | 6 | ||||
| -rw-r--r-- | pimd/pim_nb.h | 2 | ||||
| -rw-r--r-- | pimd/pim_nb_config.c | 38 | ||||
| -rw-r--r-- | pimd/pim_nht.c | 1314 | ||||
| -rw-r--r-- | pimd/pim_nht.h | 104 | ||||
| -rw-r--r-- | pimd/pim_rp.c | 92 | ||||
| -rw-r--r-- | pimd/pim_rp.h | 2 | ||||
| -rw-r--r-- | pimd/pim_rpf.c | 142 | ||||
| -rw-r--r-- | pimd/pim_rpf.h | 17 | ||||
| -rw-r--r-- | pimd/pim_tib.c | 9 | ||||
| -rw-r--r-- | pimd/pim_upstream.c | 2 | ||||
| -rw-r--r-- | pimd/pim_vty.c | 10 | ||||
| -rw-r--r-- | pimd/pim_vxlan.c | 7 | ||||
| -rw-r--r-- | pimd/pim_zlookup.c | 99 | ||||
| -rw-r--r-- | pimd/pim_zlookup.h | 1 | 
25 files changed, 1199 insertions, 901 deletions
diff --git a/pimd/pim_bsm.c b/pimd/pim_bsm.c index 1efdebdee1..6c4d649235 100644 --- a/pimd/pim_bsm.c +++ b/pimd/pim_bsm.c @@ -725,7 +725,7 @@ void pim_bsm_clear(struct pim_instance *pim)  				   __func__, &nht_p);  		} -		pim_delete_tracked_nexthop(pim, nht_p, NULL, rp_info); +		pim_nht_delete_tracked(pim, nht_p, NULL, rp_info);  		pim_get_all_mcast_group(&g_all);  		rp_all = pim_rp_find_match_group(pim, &g_all); diff --git a/pimd/pim_bsr_rpdb.c b/pimd/pim_bsr_rpdb.c index 6e93b65f4b..02e7a69ff1 100644 --- a/pimd/pim_bsr_rpdb.c +++ b/pimd/pim_bsr_rpdb.c @@ -413,11 +413,11 @@ void pim_crp_nht_update(struct pim_instance *pim, struct pim_nexthop_cache *pnc)  	struct bsr_crp_rp *rp, ref;  	bool ok; -	ref.addr = pnc->rpf.rpf_addr; +	ref.addr = pnc->addr;  	rp = bsr_crp_rps_find(scope->ebsr_rps, &ref);  	assertf(rp, "addr=%pPA", &ref.addr); -	ok = CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID); +	ok = pim_nht_pnc_is_valid(pim, pnc);  	if (ok == rp->nht_ok)  		return; diff --git a/pimd/pim_cmd.c b/pimd/pim_cmd.c index 205f1f95ea..a34fb344fe 100644 --- a/pimd/pim_cmd.c +++ b/pimd/pim_cmd.c @@ -2914,7 +2914,7 @@ DEFPY (show_ip_pim_nexthop,  DEFPY (show_ip_pim_nexthop_lookup,         show_ip_pim_nexthop_lookup_cmd, -       "show ip pim [vrf NAME] nexthop-lookup A.B.C.D$source A.B.C.D$group", +       "show ip pim [vrf NAME] nexthop-lookup A.B.C.D$source [A.B.C.D$group]",         SHOW_STR         IP_STR         PIM_STR @@ -2926,6 +2926,14 @@ DEFPY (show_ip_pim_nexthop_lookup,  	return pim_show_nexthop_lookup_cmd_helper(vrf, vty, source, group);  } +ALIAS_DEPRECATED (show_ip_pim_nexthop_lookup, +                  show_ip_rpf_source_cmd, +                  "show ip rpf A.B.C.D$source", +                  SHOW_STR +                  IP_STR +                  "Display RPF information for multicast source\n" +                  "Nexthop lookup for specific source address\n"); +  DEFPY (show_ip_pim_interface_traffic,         show_ip_pim_interface_traffic_cmd,         "show ip pim [vrf NAME] interface traffic [WORD$if_name] [json$json]", @@ -3288,7 +3296,7 @@ DEFUN (show_ip_rib,  		return CMD_WARNING;  	} -	if (!pim_nexthop_lookup(vrf->info, &nexthop, addr, 0)) { +	if (!pim_nht_lookup(vrf->info, &nexthop, addr, 0)) {  		vty_out(vty,  			"Failure querying RIB nexthop for unicast address %s\n",  			addr_str); @@ -8869,6 +8877,24 @@ done:  	return ret;  } +DEFPY_YANG(pim_rpf_lookup_mode, pim_rpf_lookup_mode_cmd, +           "[no] rpf-lookup-mode ![urib-only|mrib-only|mrib-then-urib|lower-distance|longer-prefix]$mode", +           NO_STR +           "RPF lookup behavior\n" +           "Lookup in unicast RIB only\n" +           "Lookup in multicast RIB only\n" +           "Try multicast RIB first, fall back to unicast RIB\n" +           "Lookup both, use entry with lower distance\n" +           "Lookup both, use entry with longer prefix\n") +{ +	if (no) +		nb_cli_enqueue_change(vty, "./mcast-rpf-lookup", NB_OP_DESTROY, NULL); +	else +		nb_cli_enqueue_change(vty, "./mcast-rpf-lookup", NB_OP_MODIFY, mode); + +	return nb_cli_apply_changes(vty, NULL); +} +  struct cmd_node pim_node = {  	.name = "pim",  	.node = PIM_NODE, @@ -9036,6 +9062,8 @@ void pim_cmd_init(void)  	install_element(PIM_NODE, &pim_bsr_candidate_rp_group_cmd);  	install_element(PIM_NODE, &pim_bsr_candidate_bsr_cmd); +	install_element(PIM_NODE, &pim_rpf_lookup_mode_cmd); +  	install_element(INTERFACE_NODE, &interface_ip_igmp_cmd);  	install_element(INTERFACE_NODE, &interface_no_ip_igmp_cmd);  	install_element(INTERFACE_NODE, &interface_ip_igmp_join_cmd); @@ -9159,6 +9187,7 @@ void pim_cmd_init(void)  	install_element(VIEW_NODE, &show_ip_ssmpingd_cmd);  	install_element(VIEW_NODE, &show_ip_pim_nexthop_cmd);  	install_element(VIEW_NODE, &show_ip_pim_nexthop_lookup_cmd); +	install_element(VIEW_NODE, &show_ip_rpf_source_cmd);  	install_element(VIEW_NODE, &show_ip_pim_bsrp_cmd);  	install_element(VIEW_NODE, &show_ip_pim_bsm_db_cmd);  	install_element(VIEW_NODE, &show_ip_pim_bsr_rpinfo_cmd); diff --git a/pimd/pim_cmd_common.c b/pimd/pim_cmd_common.c index 1476845a5d..8aebce7d27 100644 --- a/pimd/pim_cmd_common.c +++ b/pimd/pim_cmd_common.c @@ -2825,31 +2825,39 @@ static int pim_print_vty_pnc_cache_walkcb(struct hash_bucket *bucket, void *arg)  	struct vty *vty = cwd->vty;  	struct pim_instance *pim = cwd->pim;  	struct nexthop *nh_node = NULL; -	ifindex_t first_ifindex;  	struct interface *ifp = NULL;  	struct ttable *tt = NULL;  	char *table = NULL;  	/* Prepare table. */  	tt = ttable_new(&ttable_styles[TTSTYLE_BLANK]); -	ttable_add_row(tt, "Address|Interface|Nexthop"); +	ttable_add_row(tt, "Address|Interface|Nexthop|Table");  	tt->style.cell.rpad = 2;  	tt->style.corner = '+';  	ttable_restyle(tt); -	for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next) { -		first_ifindex = nh_node->ifindex; - -		ifp = if_lookup_by_index(first_ifindex, pim->vrf->vrf_id); +	for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) { +		ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); +#if PIM_IPV == 4 +		ttable_add_row(tt, "%pPA|%s|%pI4|%s", &pnc->addr, ifp ? ifp->name : "NULL", +			       &nh_node->gate.ipv4, "MRIB"); +#else +		ttable_add_row(tt, "%pPA|%s|%pI6|%s", &pnc->addr, ifp ? ifp->name : "NULL", +			       &nh_node->gate.ipv6, "MRIB"); +#endif +	} +	for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) { +		ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);  #if PIM_IPV == 4 -		ttable_add_row(tt, "%pPA|%s|%pI4", &pnc->rpf.rpf_addr, -			       ifp ? ifp->name : "NULL", &nh_node->gate.ipv4); +		ttable_add_row(tt, "%pPA|%s|%pI4|%s", &pnc->addr, ifp ? ifp->name : "NULL", +			       &nh_node->gate.ipv4, "URIB");  #else -		ttable_add_row(tt, "%pPA|%s|%pI6", &pnc->rpf.rpf_addr, -			       ifp ? ifp->name : "NULL", &nh_node->gate.ipv6); +		ttable_add_row(tt, "%pPA|%s|%pI6|%s", &pnc->addr, ifp ? ifp->name : "NULL", +			       &nh_node->gate.ipv6, "URIB");  #endif  	} +  	/* Dump the generated table. */  	table = ttable_dump(tt, "\n");  	vty_out(vty, "%s\n", table); @@ -2859,56 +2867,58 @@ static int pim_print_vty_pnc_cache_walkcb(struct hash_bucket *bucket, void *arg)  	return CMD_SUCCESS;  } -static int pim_print_json_pnc_cache_walkcb(struct hash_bucket *backet, -					   void *arg) +static void pim_print_json_nexthop(json_object *json_obj, struct nexthop *nh_node, +				   struct interface *ifp, char *addr_str, const char *type)  { -	struct pim_nexthop_cache *pnc = backet->data; -	struct json_pnc_cache_walk_data *cwd = arg; -	struct pim_instance *pim = cwd->pim; -	struct nexthop *nh_node = NULL; -	ifindex_t first_ifindex; -	struct interface *ifp = NULL; -	char addr_str[PIM_ADDRSTRLEN];  	json_object *json_row = NULL;  	json_object *json_ifp = NULL;  	json_object *json_arr = NULL;  	struct pim_interface *pim_ifp = NULL; -	bool pim_enable = false; - -	for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next) { -		first_ifindex = nh_node->ifindex; -		ifp = if_lookup_by_index(first_ifindex, pim->vrf->vrf_id); -		snprintfrr(addr_str, sizeof(addr_str), "%pPA", -			   &pnc->rpf.rpf_addr); -		json_object_object_get_ex(cwd->json_obj, addr_str, &json_row); -		if (!json_row) { -			json_row = json_object_new_object(); -			json_object_string_addf(json_row, "address", "%pPA", -						&pnc->rpf.rpf_addr); -			json_object_object_addf(cwd->json_obj, json_row, "%pPA", -						&pnc->rpf.rpf_addr); -			json_arr = json_object_new_array(); -			json_object_object_add(json_row, "nexthops", json_arr); -		} -		json_ifp = json_object_new_object(); -		json_object_string_add(json_ifp, "interface", -				       ifp ? ifp->name : "NULL"); -		if (ifp) -			pim_ifp = ifp->info; +	if (ifp) +		pim_ifp = ifp->info; -		if (pim_ifp && pim_ifp->pim_enable) -			pim_enable = true; +	json_object_object_get_ex(json_obj, addr_str, &json_row); -		json_object_boolean_add(json_ifp, "pimEnabled", pim_enable); +	if (!json_row) { +		json_row = json_object_new_object(); +		json_object_string_addf(json_row, "address", "%s", addr_str); +		json_object_object_addf(json_obj, json_row, "%s", addr_str); +		json_arr = json_object_new_array(); +		json_object_object_add(json_row, "nexthops", json_arr); +	} + +	json_ifp = json_object_new_object(); +	json_object_string_add(json_ifp, "interface", ifp ? ifp->name : "NULL"); +	json_object_boolean_add(json_ifp, "pimEnabled", (pim_ifp && pim_ifp->pim_enable));  #if PIM_IPV == 4 -		json_object_string_addf(json_ifp, "nexthop", "%pI4", -					&nh_node->gate.ipv4); +	json_object_string_addf(json_ifp, "nexthop", "%pI4", &nh_node->gate.ipv4);  #else -		json_object_string_addf(json_ifp, "nexthop", "%pI6", -					&nh_node->gate.ipv6); +	json_object_string_addf(json_ifp, "nexthop", "%pI6", &nh_node->gate.ipv6);  #endif -		json_object_array_add(json_arr, json_ifp); +	json_object_string_add(json_ifp, "table", type); +	json_object_array_add(json_arr, json_ifp); +} + +static int pim_print_json_pnc_cache_walkcb(struct hash_bucket *backet, void *arg) +{ +	struct pim_nexthop_cache *pnc = backet->data; +	struct json_pnc_cache_walk_data *cwd = arg; +	json_object *json_obj = cwd->json_obj; +	struct pim_instance *pim = cwd->pim; +	char addr_str[PIM_ADDRSTRLEN]; +	struct nexthop *nh_node = NULL; +	struct interface *ifp = NULL; + +	snprintfrr(addr_str, sizeof(addr_str), "%pPA", &pnc->addr); +	for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) { +		ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); +		pim_print_json_nexthop(json_obj, nh_node, ifp, addr_str, "MRIB"); +	} + +	for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) { +		ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); +		pim_print_json_nexthop(json_obj, nh_node, ifp, addr_str, "URIB");  	}  	return CMD_SUCCESS;  } @@ -2916,7 +2926,6 @@ static int pim_print_json_pnc_cache_walkcb(struct hash_bucket *backet,  int pim_show_nexthop_lookup_cmd_helper(const char *vrf, struct vty *vty,  				       pim_addr source, pim_addr group)  { -	int result = 0;  	pim_addr vif_source;  	struct prefix grp;  	struct pim_nexthop nexthop; @@ -2929,34 +2938,36 @@ int pim_show_nexthop_lookup_cmd_helper(const char *vrf, struct vty *vty,  #if PIM_IPV == 4  	if (pim_is_group_224_4(source)) { -		vty_out(vty, -			"Invalid argument. Expected Valid Source Address.\n"); +		vty_out(vty, "Invalid argument. Expected Valid Source Address.\n");  		return CMD_WARNING;  	} - -	if (!pim_is_group_224_4(group)) { -		vty_out(vty, -			"Invalid argument. Expected Valid Multicast Group Address.\n"); +	/* Only require group if source is not provided */ +	if (pim_addr_is_any(source) && !pim_is_group_224_4(group)) { +		vty_out(vty, "Invalid argument. Expected Valid Multicast Group Address.\n");  		return CMD_WARNING;  	}  #endif -	if (!pim_rp_set_upstream_addr(v->info, &vif_source, source, group)) +	/* This call will set vif_source=source, if source is not ANY. Otherwise vif_source +	 * will be set to the RP address according to the group address. If no RP is configured +	 * for the group, then return 0 and set vif_source to ANY +	 */ +	if (!pim_rp_set_upstream_addr(v->info, &vif_source, source, group)) { +		vty_out(vty, "(%pPAs, %pPA) --- Nexthop Lookup failed, no RP.\n", &source, &group);  		return CMD_SUCCESS; +	} +  	pim_addr_to_prefix(&grp, group);  	memset(&nexthop, 0, sizeof(nexthop)); -	result = -		pim_ecmp_nexthop_lookup(v->info, &nexthop, vif_source, &grp, 0); - -	if (!result) { -		vty_out(vty, -			"Nexthop Lookup failed, no usable routes returned.\n"); +	if (!pim_nht_lookup_ecmp(v->info, &nexthop, vif_source, &grp, false)) { +		vty_out(vty, "(%pPAs, %pPA) --- Nexthop Lookup failed, no usable routes returned.\n", +			&source, &group);  		return CMD_SUCCESS;  	} -	vty_out(vty, "Group %pFXh --- Nexthop %pPAs Interface %s\n", &grp, +	vty_out(vty, "(%pPAs, %pPAs) --- Nexthop %pPAs Interface %s\n", &source, &group,  		&nexthop.mrib_nexthop_addr, nexthop.interface->name);  	return CMD_SUCCESS; @@ -2985,19 +2996,16 @@ void pim_show_nexthop(struct pim_instance *pim, struct vty *vty, bool uj)  	cwd.pim = pim;  	jcwd.pim = pim; -	if (uj) { +	if (uj)  		jcwd.json_obj = json_object_new_object(); -	} else { -		vty_out(vty, "Number of registered addresses: %lu\n", -			pim->rpf_hash->count); -	} +	else +		vty_out(vty, "Number of registered addresses: %lu\n", pim->nht_hash->count);  	if (uj) { -		hash_walk(pim->rpf_hash, pim_print_json_pnc_cache_walkcb, -			  &jcwd); +		hash_walk(pim->nht_hash, pim_print_json_pnc_cache_walkcb, &jcwd);  		vty_json(vty, jcwd.json_obj);  	} else -		hash_walk(pim->rpf_hash, pim_print_vty_pnc_cache_walkcb, &cwd); +		hash_walk(pim->nht_hash, pim_print_vty_pnc_cache_walkcb, &cwd);  }  int pim_show_neighbors_cmd_helper(const char *vrf, struct vty *vty, diff --git a/pimd/pim_iface.c b/pimd/pim_iface.c index bd7164c9b9..9316cebc0a 100644 --- a/pimd/pim_iface.c +++ b/pimd/pim_iface.c @@ -601,26 +601,13 @@ void pim_if_addr_add(struct connected *ifc)  						ifp->name);  				}  			} -			struct pim_nexthop_cache *pnc = NULL; -			struct pim_rpf rpf; -			struct zclient *zclient = NULL; - -			zclient = pim_zebra_zclient_get(); -			/* RP config might come prior to (local RP's interface) -			   IF UP event. -			   In this case, pnc would not have pim enabled -			   nexthops. -			   Once Interface is UP and pim info is available, -			   reregister -			   with RNH address to receive update and add the -			   interface as nexthop. */ -			memset(&rpf, 0, sizeof(struct pim_rpf)); -			rpf.rpf_addr = pim_addr_from_prefix(ifc->address); -			pnc = pim_nexthop_cache_find(pim_ifp->pim, &rpf); -			if (pnc) -				pim_sendmsg_zebra_rnh(pim_ifp->pim, zclient, -						      pnc, -						      ZEBRA_NEXTHOP_REGISTER); + +			/* RP config might come prior to local RP's interface IF UP event. +			 * In this case, pnc would not have pim enabled nexthops. Once +			 * Interface is UP and pim info is available, reregister with RNH +			 * address to receive update and add the interface as nexthop. +			 */ +			pim_nht_get(pim_ifp->pim, pim_addr_from_prefix(ifc->address));  		}  	} /* pim */ @@ -2043,7 +2030,7 @@ void pim_pim_interface_delete(struct interface *ifp)  	 * pim_ifp->pim_neighbor_list.  	 */  	pim_sock_delete(ifp, "pim unconfigured on interface"); -	pim_upstream_nh_if_update(pim_ifp->pim, ifp); +	pim_nht_upstream_if_update(pim_ifp->pim, ifp);  	if (!pim_ifp->gm_enable) {  		pim_if_addr_del_all(ifp); diff --git a/pimd/pim_igmp_mtrace.c b/pimd/pim_igmp_mtrace.c index 309da138d2..ad6f265101 100644 --- a/pimd/pim_igmp_mtrace.c +++ b/pimd/pim_igmp_mtrace.c @@ -16,6 +16,7 @@  #include "pim_oil.h"  #include "pim_ifchannel.h"  #include "pim_macro.h" +#include "pim_nht.h"  #include "pim_igmp_mtrace.h"  static struct in_addr mtrace_primary_address(struct interface *ifp) @@ -58,14 +59,14 @@ static bool mtrace_fwd_info_weak(struct pim_instance *pim,  	memset(&nexthop, 0, sizeof(nexthop)); -	if (!pim_nexthop_lookup(pim, &nexthop, mtracep->src_addr, 1)) { +	if (!pim_nht_lookup(pim, &nexthop, mtracep->src_addr, 1)) {  		if (PIM_DEBUG_MTRACE)  			zlog_debug("mtrace not found neighbor");  		return false;  	}  	if (PIM_DEBUG_MTRACE) -		zlog_debug("mtrace pim_nexthop_lookup OK"); +		zlog_debug("mtrace pim_nht_lookup OK");  	if (PIM_DEBUG_MTRACE)  		zlog_debug("mtrace next_hop=%pPAs", &nexthop.mrib_nexthop_addr); @@ -353,7 +354,7 @@ static int mtrace_un_forward_packet(struct pim_instance *pim, struct ip *ip_hdr,  	if (interface == NULL) {  		memset(&nexthop, 0, sizeof(nexthop)); -		if (!pim_nexthop_lookup(pim, &nexthop, ip_hdr->ip_dst, 0)) { +		if (!pim_nht_lookup(pim, &nexthop, ip_hdr->ip_dst, 0)) {  			if (PIM_DEBUG_MTRACE)  				zlog_debug(  					"Dropping mtrace packet, no route to destination"); @@ -535,7 +536,7 @@ static int mtrace_send_response(struct pim_instance *pim,  	} else {  		memset(&nexthop, 0, sizeof(nexthop));  		/* TODO: should use unicast rib lookup */ -		if (!pim_nexthop_lookup(pim, &nexthop, mtracep->rsp_addr, 1)) { +		if (!pim_nht_lookup(pim, &nexthop, mtracep->rsp_addr, 1)) {  			if (PIM_DEBUG_MTRACE)  				zlog_debug(  					"Dropped response qid=%ud, no route to response address", diff --git a/pimd/pim_instance.c b/pimd/pim_instance.c index 5649e49835..3945c5923d 100644 --- a/pimd/pim_instance.c +++ b/pimd/pim_instance.c @@ -15,6 +15,7 @@  #include "pim_ssm.h"  #include "pim_rpf.h"  #include "pim_rp.h" +#include "pim_nht.h"  #include "pim_mroute.h"  #include "pim_oil.h"  #include "pim_static.h" @@ -46,8 +47,7 @@ static void pim_instance_terminate(struct pim_instance *pim)  	pim_bsm_proc_free(pim); -	/* Traverse and cleanup rpf_hash */ -	hash_clean_and_free(&pim->rpf_hash, (void *)pim_rp_list_hash_clean); +	pim_nht_terminate(pim);  	pim_if_terminate(pim); @@ -75,7 +75,6 @@ static void pim_instance_terminate(struct pim_instance *pim)  static struct pim_instance *pim_instance_init(struct vrf *vrf)  {  	struct pim_instance *pim; -	char hash_name[64];  	pim = XCALLOC(MTYPE_PIM_PIM_INSTANCE, sizeof(struct pim_instance)); @@ -98,12 +97,7 @@ static struct pim_instance *pim_instance_init(struct vrf *vrf)  #endif /* PIM_IPV == 4 */  	pim_vxlan_init(pim); -	snprintf(hash_name, sizeof(hash_name), "PIM %s RPF Hash", vrf->name); -	pim->rpf_hash = hash_create_size(256, pim_rpf_hash_key, pim_rpf_equal, -					 hash_name); - -	if (PIM_DEBUG_ZEBRA) -		zlog_debug("%s: NHT rpf hash init ", __func__); +	pim_nht_init(pim);  	pim->ssm_info = pim_ssm_init(); diff --git a/pimd/pim_instance.h b/pimd/pim_instance.h index 93acb5e9fd..7f022111bc 100644 --- a/pimd/pim_instance.h +++ b/pimd/pim_instance.h @@ -115,7 +115,8 @@ struct pim_instance {  	/* The name of the register-accept prefix-list */  	char *register_plist; -	struct hash *rpf_hash; +	struct hash *nht_hash; +	enum pim_rpf_lookup_mode rpf_mode;  	void *ssm_info; /* per-vrf SSM configuration */ diff --git a/pimd/pim_mroute.c b/pimd/pim_mroute.c index 96eb5f48f5..93bdd8dac9 100644 --- a/pimd/pim_mroute.c +++ b/pimd/pim_mroute.c @@ -36,6 +36,7 @@  #include "pim_vxlan.h"  #include "pim_msg.h"  #include "pim_util.h" +#include "pim_nht.h"  static void mroute_read_on(struct pim_instance *pim);  static int pim_upstream_mroute_update(struct channel_oil *c_oil, @@ -566,8 +567,7 @@ int pim_mroute_msg_wrvifwhole(int fd, struct interface *ifp, const char *buf,  			 * setting the SPTBIT to true  			 */  			if (!(pim_addr_is_any(up->upstream_register)) && -			    pim_nexthop_lookup(pim_ifp->pim, &source, -					       up->upstream_register, 0)) { +			    pim_nht_lookup(pim_ifp->pim, &source, up->upstream_register, 0)) {  				pim_register_stop_send(source.interface, &sg,  						       pim_ifp->primary_address,  						       up->upstream_register); @@ -580,9 +580,7 @@ int pim_mroute_msg_wrvifwhole(int fd, struct interface *ifp, const char *buf,  							__func__);  		} else {  			if (I_am_RP(pim_ifp->pim, up->sg.grp)) { -				if (pim_nexthop_lookup(pim_ifp->pim, &source, -						       up->upstream_register, -						       0)) +				if (pim_nht_lookup(pim_ifp->pim, &source, up->upstream_register, 0))  					pim_register_stop_send(  						source.interface, &sg,  						pim_ifp->primary_address, diff --git a/pimd/pim_msdp.c b/pimd/pim_msdp.c index b428520b89..5e5ee5e91f 100644 --- a/pimd/pim_msdp.c +++ b/pimd/pim_msdp.c @@ -26,6 +26,7 @@  #include "pim_time.h"  #include "pim_upstream.h"  #include "pim_oil.h" +#include "pim_nht.h"  #include "pim_msdp.h"  #include "pim_msdp_packet.h" @@ -705,7 +706,7 @@ bool pim_msdp_peer_rpf_check(struct pim_msdp_peer *mp, struct in_addr rp)  	}  	/* check if the MSDP peer is the nexthop for the RP */ -	if (pim_nexthop_lookup(mp->pim, &nexthop, rp, 0) && +	if (pim_nht_lookup(mp->pim, &nexthop, rp, 0) &&  	    nexthop.mrib_nexthop_addr.s_addr == mp->peer.s_addr) {  		return true;  	} diff --git a/pimd/pim_nb.c b/pimd/pim_nb.c index 6b6c0e8779..b55541b810 100644 --- a/pimd/pim_nb.c +++ b/pimd/pim_nb.c @@ -264,6 +264,12 @@ const struct frr_yang_module_info frr_pim_info = {  			}  		},  		{ +			.xpath = "/frr-routing:routing/control-plane-protocols/control-plane-protocol/frr-pim:pim/address-family/mcast-rpf-lookup", +			.cbs = { +				.modify = routing_control_plane_protocols_control_plane_protocol_pim_address_family_mcast_rpf_lookup_modify, +			} +		}, +		{  			.xpath = "/frr-interface:lib/interface/frr-pim:pim/address-family",  			.cbs = {  				.create = lib_interface_pim_address_family_create, diff --git a/pimd/pim_nb.h b/pimd/pim_nb.h index c50fdb2000..a5ef6ad60a 100644 --- a/pimd/pim_nb.h +++ b/pimd/pim_nb.h @@ -102,6 +102,8 @@ int routing_control_plane_protocols_control_plane_protocol_pim_address_family_re  	struct nb_cb_modify_args *args);  int routing_control_plane_protocols_control_plane_protocol_pim_address_family_register_accept_list_destroy(  	struct nb_cb_destroy_args *args); +int routing_control_plane_protocols_control_plane_protocol_pim_address_family_mcast_rpf_lookup_modify( +	struct nb_cb_modify_args *args);  int lib_interface_pim_address_family_dr_priority_modify(  	struct nb_cb_modify_args *args);  int lib_interface_pim_address_family_create(struct nb_cb_create_args *args); diff --git a/pimd/pim_nb_config.c b/pimd/pim_nb_config.c index cf9ae21cc0..b55d08bab9 100644 --- a/pimd/pim_nb_config.c +++ b/pimd/pim_nb_config.c @@ -157,7 +157,7 @@ static int pim_cmd_interface_add(struct interface *ifp)  		pim_ifp->pim_enable = true;  	pim_if_addr_add_all(ifp); -	pim_upstream_nh_if_update(pim_ifp->pim, ifp); +	pim_nht_upstream_if_update(pim_ifp->pim, ifp);  	pim_if_membership_refresh(ifp);  	pim_if_create_pimreg(pim_ifp->pim); @@ -1893,6 +1893,39 @@ int routing_control_plane_protocols_control_plane_protocol_pim_address_family_re  }  /* + * XPath: /frr-routing:routing/control-plane-protocols/control-plane-protocol/frr-pim:pim/address-family/mcast-rpf-lookup + */ +int routing_control_plane_protocols_control_plane_protocol_pim_address_family_mcast_rpf_lookup_modify( +	struct nb_cb_modify_args *args) +{ +	struct vrf *vrf; +	struct pim_instance *pim; +	enum pim_rpf_lookup_mode old_mode; + +	switch (args->event) { +	case NB_EV_VALIDATE: +	case NB_EV_PREPARE: +	case NB_EV_ABORT: +		break; +	case NB_EV_APPLY: +		vrf = nb_running_get_entry(args->dnode, NULL, true); +		pim = vrf->info; +		old_mode = pim->rpf_mode; +		pim->rpf_mode = yang_dnode_get_enum(args->dnode, NULL); + +		if (pim->rpf_mode != old_mode && +		    /* MCAST_MIX_MRIB_FIRST is the default if not configured */ +		    (old_mode != MCAST_NO_CONFIG && pim->rpf_mode != MCAST_MIX_MRIB_FIRST)) { +			pim_nht_mode_changed(pim); +		} + +		break; +	} + +	return NB_OK; +} + +/*   * XPath: /frr-interface:lib/interface/frr-pim:pim/address-family   */  int lib_interface_pim_address_family_create(struct nb_cb_create_args *args) @@ -2712,9 +2745,8 @@ int lib_interface_pim_address_family_mroute_oif_modify(  #ifdef PIM_ENFORCE_LOOPFREE_MFC  		iif = nb_running_get_entry(args->dnode, NULL, false); -		if (!iif) { +		if (!iif)  			return NB_OK; -		}  		pim_iifp = iif->info;  		pim = pim_iifp->pim; diff --git a/pimd/pim_nht.c b/pimd/pim_nht.c index f2dbfa9765..00ab46b4cd 100644 --- a/pimd/pim_nht.c +++ b/pimd/pim_nht.c @@ -38,118 +38,267 @@   * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister   *   command to Zebra.   */ -void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient, -			   struct pim_nexthop_cache *pnc, int command) +static void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient, pim_addr addr, +				  int command)  {  	struct prefix p;  	int ret; -	pim_addr_to_prefix(&p, pnc->rpf.rpf_addr); -	ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false, -			       pim->vrf->vrf_id); +	pim_addr_to_prefix(&p, addr); + +	/* Register to track nexthops from the MRIB */ +	ret = zclient_send_rnh(zclient, command, &p, SAFI_MULTICAST, false, false, pim->vrf->vrf_id); +	if (ret == ZCLIENT_SEND_FAILURE) +		zlog_warn( +			"sendmsg_nexthop: zclient_send_message() failed registering MRIB tracking"); + +	if (PIM_DEBUG_PIM_NHT) +		zlog_debug("%s: MRIB NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", __func__, +			   (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, pim->vrf->name, +			   ret); + +	/* Also register to track nexthops from the URIB */ +	ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false, pim->vrf->vrf_id);  	if (ret == ZCLIENT_SEND_FAILURE) -		zlog_warn("sendmsg_nexthop: zclient_send_message() failed"); +		zlog_warn( +			"sendmsg_nexthop: zclient_send_message() failed registering URIB tracking");  	if (PIM_DEBUG_PIM_NHT) -		zlog_debug( -			"%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", -			__func__, -			(command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, -			pim->vrf->name, ret); +		zlog_debug("%s: URIB NHT %sregistered addr %pFX(%s) with Zebra ret:%d ", __func__, +			   (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p, pim->vrf->name, +			   ret);  	return;  } -struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim, -						 struct pim_rpf *rpf) +static struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim, pim_addr addr)  {  	struct pim_nexthop_cache *pnc = NULL;  	struct pim_nexthop_cache lookup; -	lookup.rpf.rpf_addr = rpf->rpf_addr; -	pnc = hash_lookup(pim->rpf_hash, &lookup); +	lookup.addr = addr; +	pnc = hash_lookup(pim->nht_hash, &lookup);  	return pnc;  } -static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim, -						       struct pim_rpf *rpf_addr) +static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim, pim_addr addr)  {  	struct pim_nexthop_cache *pnc;  	char hash_name[64]; -	pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE, -		      sizeof(struct pim_nexthop_cache)); -	pnc->rpf.rpf_addr = rpf_addr->rpf_addr; +	/* This function is only ever called if we are unable to find an entry, so +	 * the hash_get should always add a new entry +	 */ +	pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE, sizeof(struct pim_nexthop_cache)); +	pnc->addr = addr; -	pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern); +	pnc = hash_get(pim->nht_hash, pnc, hash_alloc_intern);  	pnc->rp_list = list_new();  	pnc->rp_list->cmp = pim_rp_list_cmp; -	snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash", -		   &pnc->rpf.rpf_addr, pim->vrf->name); -	pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key, -					      pim_upstream_equal, hash_name); +	snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash", &pnc->addr, +		   pim->vrf->name); +	pnc->upstream_hash = hash_create_size(32, pim_upstream_hash_key, pim_upstream_equal, +					      hash_name);  	return pnc;  } -static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim, -					     pim_addr addr) +static bool pim_nht_pnc_has_answer(struct pim_instance *pim, struct pim_nexthop_cache *pnc) +{ +	switch (pim->rpf_mode) { +	case MCAST_MRIB_ONLY: +		return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_ANSWER_RECEIVED); + +	case MCAST_URIB_ONLY: +		return CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_ANSWER_RECEIVED); + +	case MCAST_MIX_MRIB_FIRST: +	case MCAST_NO_CONFIG: +	case MCAST_MIX_DISTANCE: +	case MCAST_MIX_PFXLEN: +		/* This check is to determine if we've received an answer necessary to make a NH decision. +		 * For the mixed modes, where we may lookup from MRIB or URIB, let's require an answer +		 * for both tables. +		 */ +		return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_ANSWER_RECEIVED) && +		       CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_ANSWER_RECEIVED); + +	default: +		break; +	} +	return false; +} + +static struct pim_nexthop_cache_rib *pim_pnc_get_rib(struct pim_instance *pim, +						     struct pim_nexthop_cache *pnc) +{ +	struct pim_nexthop_cache_rib *pnc_rib = NULL; + +	if (pim->rpf_mode == MCAST_MRIB_ONLY) +		pnc_rib = &pnc->mrib; +	else if (pim->rpf_mode == MCAST_URIB_ONLY) +		pnc_rib = &pnc->urib; +	else if (pim->rpf_mode == MCAST_MIX_MRIB_FIRST || pim->rpf_mode == MCAST_NO_CONFIG) { +		if (pnc->mrib.nexthop_num > 0) +			pnc_rib = &pnc->mrib; +		else +			pnc_rib = &pnc->urib; +	} else if (pim->rpf_mode == MCAST_MIX_DISTANCE) { +		if (pnc->mrib.distance <= pnc->urib.distance) +			pnc_rib = &pnc->mrib; +		else +			pnc_rib = &pnc->urib; +	} else if (pim->rpf_mode == MCAST_MIX_PFXLEN) { +		if (pnc->mrib.prefix_len >= pnc->urib.prefix_len) +			pnc_rib = &pnc->mrib; +		else +			pnc_rib = &pnc->urib; +	} + +	return pnc_rib; +} + +bool pim_nht_pnc_is_valid(struct pim_instance *pim, struct pim_nexthop_cache *pnc) +{ +	switch (pim->rpf_mode) { +	case MCAST_MRIB_ONLY: +		return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_VALID); + +	case MCAST_URIB_ONLY: +		return CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_VALID); + +	case MCAST_MIX_MRIB_FIRST: +	case MCAST_NO_CONFIG: +	case MCAST_MIX_DISTANCE: +	case MCAST_MIX_PFXLEN: +		/* The valid flag is set if there are nexthops...so when doing mixed, mrib might not have +		 * any nexthops, so consider valid if at least one RIB is valid +		 */ +		return CHECK_FLAG(pnc->mrib.flags, PIM_NEXTHOP_VALID) || +		       CHECK_FLAG(pnc->urib.flags, PIM_NEXTHOP_VALID); + +	default: +		break; +	} +	return false; +} + +struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim, pim_addr addr)  {  	struct pim_nexthop_cache *pnc = NULL; -	struct pim_rpf rpf;  	struct zclient *zclient = NULL;  	zclient = pim_zebra_zclient_get(); -	memset(&rpf, 0, sizeof(rpf)); -	rpf.rpf_addr = addr; +	pnc = pim_nexthop_cache_find(pim, addr); -	pnc = pim_nexthop_cache_find(pim, &rpf); -	if (!pnc) { -		pnc = pim_nexthop_cache_add(pim, &rpf); -		pim_sendmsg_zebra_rnh(pim, zclient, pnc, -				      ZEBRA_NEXTHOP_REGISTER); -		if (PIM_DEBUG_PIM_NHT_DETAIL) -			zlog_debug( -				"%s: NHT cache and zebra notification added for %pPA(%s)", -				__func__, &addr, pim->vrf->name); -	} +	if (pnc) +		return pnc; + +	pnc = pim_nexthop_cache_add(pim, addr); +	pim_sendmsg_zebra_rnh(pim, zclient, pnc->addr, ZEBRA_NEXTHOP_REGISTER); + +	if (PIM_DEBUG_PIM_NHT_DETAIL) +		zlog_debug("%s: NHT cache and zebra notification added for %pPA(%s)", __func__, +			   &addr, pim->vrf->name);  	return pnc;  } -/* TBD: this does several distinct things and should probably be split up. - * (checking state vs. returning pnc vs. adding upstream vs. adding rp) +void pim_nht_set_gateway(struct pim_instance *pim, struct pim_nexthop_cache *pnc, pim_addr addr, +			 struct interface *ifp) +{ +	struct nexthop *nh_node = NULL; +	struct interface *ifp1 = NULL; + +	for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) { +		/* If the gateway is already set, then keep it */ +#if PIM_IPV == 4 +		if (!pim_addr_is_any(nh_node->gate.ipv4)) +			continue; +#else +		if (!pim_addr_is_any(nh_node->gate.ipv6)) +			continue; +#endif + +		/* Only set gateway on the correct interface */ +		ifp1 = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); +		if (ifp != ifp1) +			continue; + +			/* Update the gateway address with the given address */ +#if PIM_IPV == 4 +		nh_node->gate.ipv4 = addr; +#else +		nh_node->gate.ipv6 = addr; +#endif +		if (PIM_DEBUG_PIM_NHT_RP) +			zlog_debug("%s: addr %pPA new MRIB nexthop addr %pPAs interface %s", +				   __func__, &pnc->addr, &addr, ifp1->name); +	} + +	/* Now do the same with URIB nexthop entries */ +	for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) { +#if PIM_IPV == 4 +		if (!pim_addr_is_any(nh_node->gate.ipv4)) +			continue; +#else +		if (!pim_addr_is_any(nh_node->gate.ipv6)) +			continue; +#endif + +		ifp1 = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); + +		if (ifp != ifp1) +			continue; + +#if PIM_IPV == 4 +		nh_node->gate.ipv4 = addr; +#else +		nh_node->gate.ipv6 = addr; +#endif +		if (PIM_DEBUG_PIM_NHT_RP) +			zlog_debug("%s: addr %pPA new URIB nexthop addr %pPAs interface %s", +				   __func__, &pnc->addr, &addr, ifp1->name); +	} +} + +/* Finds the nexthop cache entry for the given address. If no cache, add it for tracking. + * Up and/or rp may be given to add to the nexthop cache entry so that they get updates when the nexthop changes + * If out_pnc is not null, then copy the nexthop cache entry to it. + * Return true if an entry was found and is valid.   */ -int pim_find_or_track_nexthop(struct pim_instance *pim, pim_addr addr, -			      struct pim_upstream *up, struct rp_info *rp, -			      struct pim_nexthop_cache *out_pnc) +bool pim_nht_find_or_track(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up, +			   struct rp_info *rp, struct pim_nexthop_cache *out_pnc)  {  	struct pim_nexthop_cache *pnc;  	struct listnode *ch_node = NULL; +	/* This will find the entry and add it to tracking if not found */  	pnc = pim_nht_get(pim, addr);  	assertf(up || rp, "addr=%pPA", &addr); +	/* Store the RP if provided and not currently in the list */  	if (rp != NULL) {  		ch_node = listnode_lookup(pnc->rp_list, rp);  		if (ch_node == NULL)  			listnode_add_sort(pnc->rp_list, rp);  	} +	/* Store the upstream if provided and not currently in the list */  	if (up != NULL)  		(void)hash_get(pnc->upstream_hash, up, hash_alloc_intern); -	if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) { +	if (pim_nht_pnc_is_valid(pim, pnc)) {  		if (out_pnc)  			memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache)); -		return 1; +		return true;  	} -	return 0; +	return false;  }  void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr) @@ -157,7 +306,6 @@ void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr)  	struct pim_nexthop_cache *pnc;  	pnc = pim_nht_get(pim, addr); -  	pnc->bsr_count++;  } @@ -166,47 +314,47 @@ bool pim_nht_candrp_add(struct pim_instance *pim, pim_addr addr)  	struct pim_nexthop_cache *pnc;  	pnc = pim_nht_get(pim, addr); -  	pnc->candrp_count++; -	return CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID); +	return pim_nht_pnc_is_valid(pim, pnc);  } -static void pim_nht_drop_maybe(struct pim_instance *pim, -			       struct pim_nexthop_cache *pnc) +static void pim_nht_drop_maybe(struct pim_instance *pim, struct pim_nexthop_cache *pnc)  {  	if (PIM_DEBUG_PIM_NHT)  		zlog_debug("%s: NHT %pPA(%s) rp_list count:%d upstream count:%ld BSR count:%u Cand-RP count:%u", -			   __func__, &pnc->rpf.rpf_addr, pim->vrf->name, -			   pnc->rp_list->count, pnc->upstream_hash->count, -			   pnc->bsr_count, pnc->candrp_count); +			   __func__, &pnc->addr, pim->vrf->name, pnc->rp_list->count, +			   pnc->upstream_hash->count, pnc->bsr_count, pnc->candrp_count); -	if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0 && -	    pnc->bsr_count == 0 && pnc->candrp_count == 0) { +	if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0 && pnc->bsr_count == 0 && +	    pnc->candrp_count == 0) {  		struct zclient *zclient = pim_zebra_zclient_get(); -		pim_sendmsg_zebra_rnh(pim, zclient, pnc, -				      ZEBRA_NEXTHOP_UNREGISTER); +		pim_sendmsg_zebra_rnh(pim, zclient, pnc->addr, ZEBRA_NEXTHOP_UNREGISTER);  		list_delete(&pnc->rp_list); +  		hash_free(pnc->upstream_hash); +		hash_release(pim->nht_hash, pnc); + +		if (pnc->urib.nexthop) +			nexthops_free(pnc->urib.nexthop); +		if (pnc->mrib.nexthop) +			nexthops_free(pnc->mrib.nexthop); -		hash_release(pim->rpf_hash, pnc); -		if (pnc->nexthop) -			nexthops_free(pnc->nexthop);  		XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);  	}  } -void pim_delete_tracked_nexthop(struct pim_instance *pim, pim_addr addr, -				struct pim_upstream *up, struct rp_info *rp) +void pim_nht_delete_tracked(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up, +			    struct rp_info *rp)  {  	struct pim_nexthop_cache *pnc = NULL;  	struct pim_nexthop_cache lookup;  	struct pim_upstream *upstream = NULL;  	/* Remove from RPF hash if it is the last entry */ -	lookup.rpf.rpf_addr = addr; -	pnc = hash_lookup(pim->rpf_hash, &lookup); +	lookup.addr = addr; +	pnc = hash_lookup(pim->nht_hash, &lookup);  	if (!pnc) {  		zlog_warn("attempting to delete nonexistent NHT entry %pPA",  			  &addr); @@ -251,9 +399,9 @@ void pim_nht_bsr_del(struct pim_instance *pim, pim_addr addr)  	if (pim_addr_is_any(addr))  		return; -	lookup.rpf.rpf_addr = addr; +	lookup.addr = addr; -	pnc = hash_lookup(pim->rpf_hash, &lookup); +	pnc = hash_lookup(pim->nht_hash, &lookup);  	if (!pnc) {  		zlog_warn("attempting to delete nonexistent NHT BSR entry %pPA", @@ -272,9 +420,9 @@ void pim_nht_candrp_del(struct pim_instance *pim, pim_addr addr)  	struct pim_nexthop_cache *pnc = NULL;  	struct pim_nexthop_cache lookup; -	lookup.rpf.rpf_addr = addr; +	lookup.addr = addr; -	pnc = hash_lookup(pim->rpf_hash, &lookup); +	pnc = hash_lookup(pim->nht_hash, &lookup);  	if (!pnc) {  		zlog_warn("attempting to delete nonexistent NHT C-RP entry %pPA", @@ -297,10 +445,10 @@ bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr,  	struct nexthop *nh;  	struct interface *ifp; -	lookup.rpf.rpf_addr = bsr_addr; +	lookup.addr = bsr_addr; -	pnc = hash_lookup(pim->rpf_hash, &lookup); -	if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) { +	pnc = hash_lookup(pim->nht_hash, &lookup); +	if (!pnc || !pim_nht_pnc_has_answer(pim, pnc)) {  		/* BSM from a new freshly registered BSR - do a synchronous  		 * zebra query since otherwise we'd drop the first packet,  		 * leading to additional delay in picking up BSM data @@ -359,91 +507,92 @@ bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr,  		return false;  	} -	if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) -		return false; - -	/* if we accept BSMs from more than one ECMP nexthop, this will cause -	 * BSM message "multiplication" for each ECMP hop.  i.e. if you have -	 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM -	 * message. -	 * -	 * so...  only accept the first (IPv4) valid nexthop as source. -	 */ +	if (pim_nht_pnc_is_valid(pim, pnc)) { +		/* if we accept BSMs from more than one ECMP nexthop, this will cause +		 * BSM message "multiplication" for each ECMP hop.  i.e. if you have +		 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM +		 * message. +		 * +		 * so...  only accept the first (IPv4) valid nexthop as source. +		 */ +		struct pim_nexthop_cache_rib *rib = pim_pnc_get_rib(pim, pnc); -	for (nh = pnc->nexthop; nh; nh = nh->next) { -		pim_addr nhaddr; +		for (nh = rib->nexthop; nh; nh = nh->next) { +			pim_addr nhaddr; -		switch (nh->type) { +			switch (nh->type) {  #if PIM_IPV == 4 -		case NEXTHOP_TYPE_IPV4: -			if (nh->ifindex == IFINDEX_INTERNAL) -				continue; +			case NEXTHOP_TYPE_IPV4: +				if (nh->ifindex == IFINDEX_INTERNAL) +					continue; -			fallthrough; -		case NEXTHOP_TYPE_IPV4_IFINDEX: -			nhaddr = nh->gate.ipv4; -			break; -		case NEXTHOP_TYPE_IPV6: -		case NEXTHOP_TYPE_IPV6_IFINDEX: -			continue; -#else -		case NEXTHOP_TYPE_IPV6: -			if (nh->ifindex == IFINDEX_INTERNAL) +				fallthrough; +			case NEXTHOP_TYPE_IPV4_IFINDEX: +				nhaddr = nh->gate.ipv4; +				break; + +			case NEXTHOP_TYPE_IPV6: +			case NEXTHOP_TYPE_IPV6_IFINDEX:  				continue; +#else +			case NEXTHOP_TYPE_IPV6: +				if (nh->ifindex == IFINDEX_INTERNAL) +					continue; -			fallthrough; -		case NEXTHOP_TYPE_IPV6_IFINDEX: -			nhaddr = nh->gate.ipv6; -			break; -		case NEXTHOP_TYPE_IPV4: -		case NEXTHOP_TYPE_IPV4_IFINDEX: -			continue; +				fallthrough; +			case NEXTHOP_TYPE_IPV6_IFINDEX: +				nhaddr = nh->gate.ipv6; +				break; + +			case NEXTHOP_TYPE_IPV4: +			case NEXTHOP_TYPE_IPV4_IFINDEX: +				continue;  #endif -		case NEXTHOP_TYPE_IFINDEX: -			nhaddr = bsr_addr; -			break; +			case NEXTHOP_TYPE_IFINDEX: +				nhaddr = bsr_addr; +				break; -		case NEXTHOP_TYPE_BLACKHOLE: -			continue; -		} +			case NEXTHOP_TYPE_BLACKHOLE: +				continue; +			} -		ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id); -		if (!ifp || !ifp->info) -			continue; +			ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id); +			if (!ifp || !ifp->info) +				continue; -		if (if_is_loopback(ifp) && if_is_loopback(src_ifp)) -			return true; +			if (if_is_loopback(ifp) && if_is_loopback(src_ifp)) +				return true; -		/* MRIB (IGP) may be pointing at a router where PIM is down */ -		nbr = pim_neighbor_find(ifp, nhaddr, true); -		if (!nbr) -			continue; +			/* MRIB (IGP) may be pointing at a router where PIM is down */ +			nbr = pim_neighbor_find(ifp, nhaddr, true); +			if (!nbr) +				continue; -		/* Are we on the correct interface? */ -		if (nh->ifindex == src_ifp->ifindex) { -			/* Do we have the correct NH ? */ -			if (!pim_addr_cmp(nhaddr, src_ip)) -				return true; -			/* -			 * check If the packet came from the neighbor, -			 * and the dst is a secondary address on the connected interface -			 */ -			return (!pim_addr_cmp(nbr->source_addr, src_ip) && -				pim_if_connected_to_source(ifp, nhaddr)); +			/* Are we on the correct interface? */ +			if (nh->ifindex == src_ifp->ifindex) { +				/* Do we have the correct NH ? */ +				if (!pim_addr_cmp(nhaddr, src_ip)) +					return true; +				/* +				 * check If the packet came from the neighbor, +				 * and the dst is a secondary address on the connected interface +				 */ +				return (!pim_addr_cmp(nbr->source_addr, src_ip) && +					pim_if_connected_to_source(ifp, nhaddr)); +			} +			return false;  		} -		return false;  	}  	return false;  } -void pim_rp_nexthop_del(struct rp_info *rp_info) +void pim_nht_rp_del(struct rp_info *rp_info)  {  	rp_info->rp.source_nexthop.interface = NULL;  	rp_info->rp.source_nexthop.mrib_nexthop_addr = PIMADDR_ANY;  	rp_info->rp.source_nexthop.mrib_metric_preference =  		router->infinite_assert_metric.metric_preference; -	rp_info->rp.source_nexthop.mrib_route_metric = -		router->infinite_assert_metric.route_metric; +	rp_info->rp.source_nexthop.mrib_route_metric = router->infinite_assert_metric.route_metric;  }  /* Update RP nexthop info based on Nexthop update received from Zebra.*/ @@ -461,10 +610,9 @@ static void pim_update_rp_nh(struct pim_instance *pim,  		ifp = rp_info->rp.source_nexthop.interface;  		// Compute PIM RPF using cached nexthop -		if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop, -					     rp_info->rp.rpf_addr, -					     &rp_info->group, 1)) -			pim_rp_nexthop_del(rp_info); +		if (!pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, rp_info->rp.rpf_addr, +					 &rp_info->group, true)) +			pim_nht_rp_del(rp_info);  		/*  		 * If we transition from no path to a path @@ -544,33 +692,43 @@ static int pim_upstream_nh_if_update_helper(struct hash_bucket *bucket,  	struct pim_instance *pim = pwd->pim;  	struct interface *ifp = pwd->ifp;  	struct nexthop *nh_node = NULL; -	ifindex_t first_ifindex; -	for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next) { -		first_ifindex = nh_node->ifindex; -		if (ifp != if_lookup_by_index(first_ifindex, pim->vrf->vrf_id)) -			continue; +	/* This update happens when an interface is added to/removed from pim. +	 * So go through both MRIB and URIB and update any upstreams for any +	 * matching nexthop +	 */ +	for (nh_node = pnc->mrib.nexthop; nh_node; nh_node = nh_node->next) { +		if (ifp->ifindex == nh_node->ifindex) { +			if (pnc->upstream_hash->count) { +				pim_update_upstream_nh(pim, pnc); +				break; +			} +		} +	} -		if (pnc->upstream_hash->count) { -			pim_update_upstream_nh(pim, pnc); -			break; +	for (nh_node = pnc->urib.nexthop; nh_node; nh_node = nh_node->next) { +		if (ifp->ifindex == nh_node->ifindex) { +			if (pnc->upstream_hash->count) { +				pim_update_upstream_nh(pim, pnc); +				break; +			}  		}  	}  	return HASHWALK_CONTINUE;  } -void pim_upstream_nh_if_update(struct pim_instance *pim, struct interface *ifp) +void pim_nht_upstream_if_update(struct pim_instance *pim, struct interface *ifp)  {  	struct pnc_hash_walk_data pwd;  	pwd.pim = pim;  	pwd.ifp = ifp; -	hash_walk(pim->rpf_hash, pim_upstream_nh_if_update_helper, &pwd); +	hash_walk(pim->nht_hash, pim_upstream_nh_if_update_helper, &pwd);  } -uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp) +static uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)  {  	uint32_t hash_val; @@ -583,47 +741,42 @@ uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)  	return hash_val;  } -static int pim_ecmp_nexthop_search(struct pim_instance *pim, -				   struct pim_nexthop_cache *pnc, -				   struct pim_nexthop *nexthop, pim_addr src, -				   struct prefix *grp, int neighbor_needed) +static bool pim_ecmp_nexthop_search(struct pim_instance *pim, struct pim_nexthop_cache *pnc, +				    struct pim_nexthop *nexthop, pim_addr src, struct prefix *grp, +				    bool neighbor_needed)  { -	struct pim_neighbor *nbrs[router->multipath], *nbr = NULL; -	struct interface *ifps[router->multipath];  	struct nexthop *nh_node = NULL; -	ifindex_t first_ifindex; -	struct interface *ifp = NULL; -	uint32_t hash_val = 0, mod_val = 0; -	uint16_t nh_iter = 0, found = 0; -	uint32_t i, num_nbrs = 0; -	struct pim_interface *pim_ifp; - -	if (!pnc || !pnc->nexthop_num || !nexthop) -		return 0; - -	pim_addr nh_addr = nexthop->mrib_nexthop_addr; -	pim_addr grp_addr = pim_addr_from_prefix(grp); +	uint32_t hash_val = 0; +	uint32_t mod_val = 0; +	uint16_t nh_iter = 0; +	bool found = false; +	uint32_t num_nbrs = 0; +	pim_addr nh_addr; +	pim_addr grp_addr; +	struct pim_nexthop_cache_rib *rib; -	memset(&nbrs, 0, sizeof(nbrs)); -	memset(&ifps, 0, sizeof(ifps)); +	/* Early return if required parameters aren't provided */ +	if (!pim || !pnc || !pim_nht_pnc_is_valid(pim, pnc) || !nexthop || !grp) +		return false; +	nh_addr = nexthop->mrib_nexthop_addr; +	grp_addr = pim_addr_from_prefix(grp); +	rib = pim_pnc_get_rib(pim, pnc); -	// Current Nexthop is VALID, check to stay on the current path. +	/* Current Nexthop is VALID, check to stay on the current path. */  	if (nexthop->interface && nexthop->interface->info &&  	    (!pim_addr_is_any(nh_addr))) { -		/* User configured knob to explicitly switch -		   to new path is disabled or current path -		   metric is less than nexthop update. +		/* User configured knob to explicitly switch to new path is disabled or +		 * current path metric is less than nexthop update.  		 */ +		if (!pim->ecmp_rebalance_enable) { +			bool curr_route_valid = false; -		if (pim->ecmp_rebalance_enable == 0) { -			uint8_t curr_route_valid = 0; -			// Check if current nexthop is present in new updated -			// Nexthop list. -			// If the current nexthop is not valid, candidate to -			// choose new Nexthop. -			for (nh_node = pnc->nexthop; nh_node; -			     nh_node = nh_node->next) { +			/* Check if current nexthop is present in new updated Nexthop list. +			 * If the current nexthop is not valid, candidate to choose new +			 * Nexthop. +			 */ +			for (nh_node = rib->nexthop; nh_node; nh_node = nh_node->next) {  				curr_route_valid = (nexthop->interface->ifindex  						    == nh_node->ifindex);  				if (curr_route_valid) @@ -633,9 +786,9 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  			if (curr_route_valid &&  			    !pim_if_connected_to_source(nexthop->interface,  							src)) { -				nbr = pim_neighbor_find( -					nexthop->interface, -					nexthop->mrib_nexthop_addr, true); +				struct pim_neighbor *nbr = +					pim_neighbor_find(nexthop->interface, +							  nexthop->mrib_nexthop_addr, true);  				if (!nbr  				    && !if_is_loopback(nexthop->interface)) {  					if (PIM_DEBUG_PIM_NHT) @@ -646,10 +799,8 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  					/* update metric even if the upstream  					 * neighbor stays unchanged  					 */ -					nexthop->mrib_metric_preference = -						pnc->distance; -					nexthop->mrib_route_metric = -						pnc->metric; +					nexthop->mrib_metric_preference = rib->distance; +					nexthop->mrib_route_metric = rib->metric;  					if (PIM_DEBUG_PIM_NHT)  						zlog_debug(  							"%s: (%pPA,%pPA)(%s) current nexthop %s is valid, skipping new path selection", @@ -657,40 +808,39 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  							&grp_addr,  							pim->vrf->name,  							nexthop->interface->name); -					return 1; +					return true;  				}  			}  		}  	} -	/* -	 * Look up all interfaces and neighbors, -	 * store for later usage -	 */ -	for (nh_node = pnc->nexthop, i = 0; nh_node; -	     nh_node = nh_node->next, i++) { -		ifps[i] = -			if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); -		if (ifps[i]) { +	/* Count the number of neighbors for ECMP */ +	for (nh_node = rib->nexthop; nh_node; nh_node = nh_node->next) { +		struct pim_neighbor *nbr; +		struct interface *ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); + +		if (!ifp) +			continue; +  #if PIM_IPV == 4 -			pim_addr nhaddr = nh_node->gate.ipv4; +		pim_addr nhaddr = nh_node->gate.ipv4;  #else -			pim_addr nhaddr = nh_node->gate.ipv6; +		pim_addr nhaddr = nh_node->gate.ipv6;  #endif -			nbrs[i] = pim_neighbor_find(ifps[i], nhaddr, true); -			if (nbrs[i] || pim_if_connected_to_source(ifps[i], src)) -				num_nbrs++; -		} +		nbr = pim_neighbor_find(ifp, nhaddr, true); +		if (nbr || pim_if_connected_to_source(ifp, src)) +			num_nbrs++;  	} +  	if (pim->ecmp_enable) {  		struct prefix src_pfx; -		uint32_t consider = pnc->nexthop_num; +		uint32_t consider = rib->nexthop_num;  		if (neighbor_needed && num_nbrs < consider)  			consider = num_nbrs;  		if (consider == 0) -			return 0; +			return false;  		// PIM ECMP flag is enable then choose ECMP path.  		pim_addr_to_prefix(&src_pfx, src); @@ -698,16 +848,16 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  		mod_val = hash_val % consider;  	} -	for (nh_node = pnc->nexthop; nh_node && (found == 0); -	     nh_node = nh_node->next) { -		first_ifindex = nh_node->ifindex; -		ifp = ifps[nh_iter]; +	for (nh_node = rib->nexthop; nh_node && !found; nh_node = nh_node->next) { +		struct pim_neighbor *nbr = NULL; +		struct pim_interface *pim_ifp; +		struct interface *ifp = if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id); +  		if (!ifp) {  			if (PIM_DEBUG_PIM_NHT) -				zlog_debug( -					"%s %s: could not find interface for ifindex %d (address %pPA(%s))", -					__FILE__, __func__, first_ifindex, &src, -					pim->vrf->name); +				zlog_debug("%s %s: could not find interface for ifindex %d (address %pPA(%s))", +					   __FILE__, __func__, nh_node->ifindex, &src, +					   pim->vrf->name);  			if (nh_iter == mod_val)  				mod_val++; // Select nexthpath  			nh_iter++; @@ -718,10 +868,9 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  		if (!pim_ifp || !pim_ifp->pim_enable) {  			if (PIM_DEBUG_PIM_NHT) -				zlog_debug( -					"%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", -					__func__, ifp->name, pim->vrf->name, -					first_ifindex, &src); +				zlog_debug("%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", +					   __func__, ifp->name, pim->vrf->name, nh_node->ifindex, +					   &src);  			if (nh_iter == mod_val)  				mod_val++; // Select nexthpath  			nh_iter++; @@ -729,7 +878,12 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  		}  		if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) { -			nbr = nbrs[nh_iter]; +#if PIM_IPV == 4 +			nbr = pim_neighbor_find(ifp, nh_node->gate.ipv4, true); +#else +			nbr = pim_neighbor_find(ifp, nh_node->gate.ipv6, true); +#endif +  			if (!nbr && !if_is_loopback(ifp)) {  				if (PIM_DEBUG_PIM_NHT)  					zlog_debug( @@ -750,12 +904,12 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  #else  			nexthop->mrib_nexthop_addr = nh_node->gate.ipv6;  #endif -			nexthop->mrib_metric_preference = pnc->distance; -			nexthop->mrib_route_metric = pnc->metric; +			nexthop->mrib_metric_preference = rib->distance; +			nexthop->mrib_route_metric = rib->metric;  			nexthop->last_lookup = src;  			nexthop->last_lookup_time = pim_time_monotonic_usec();  			nexthop->nbr = nbr; -			found = 1; +			found = true;  			if (PIM_DEBUG_PIM_NHT)  				zlog_debug(  					"%s: (%pPA,%pPA)(%s) selected nhop interface %s addr %pPAs mod_val %u iter %d ecmp %d", @@ -766,260 +920,55 @@ static int pim_ecmp_nexthop_search(struct pim_instance *pim,  		nh_iter++;  	} -	if (found) -		return 1; -	else -		return 0; +	return found;  } -/* This API is used to parse Registered address nexthop update coming from Zebra - */ -void pim_nexthop_update(struct vrf *vrf, struct prefix *match, -			struct zapi_route *nhr) -{ -	struct nexthop *nexthop; -	struct nexthop *nhlist_head = NULL; -	struct nexthop *nhlist_tail = NULL; -	int i; -	struct pim_rpf rpf; -	struct pim_nexthop_cache *pnc = NULL; -	struct interface *ifp = NULL; -	struct pim_instance *pim; - -	pim = vrf->info; - -	rpf.rpf_addr = pim_addr_from_prefix(match); -	pnc = pim_nexthop_cache_find(pim, &rpf); -	if (!pnc) { -		if (PIM_DEBUG_PIM_NHT) -			zlog_debug( -				"%s: Skipping NHT update, addr %pPA is not in local cached DB.", -				__func__, &rpf.rpf_addr); -		return; -	} - -	pnc->last_update = pim_time_monotonic_usec(); - -	if (nhr->nexthop_num) { -		pnc->nexthop_num = 0; - -		for (i = 0; i < nhr->nexthop_num; i++) { -			nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]); -			switch (nexthop->type) { -			case NEXTHOP_TYPE_IFINDEX: -				/* -				 * Connected route (i.e. no nexthop), use -				 * RPF address from nexthop cache (i.e. -				 * destination) as PIM nexthop. -				 */ -#if PIM_IPV == 4 -				nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX; -				nexthop->gate.ipv4 = pnc->rpf.rpf_addr; -#else -				nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX; -				nexthop->gate.ipv6 = pnc->rpf.rpf_addr; -#endif -				break; -#if PIM_IPV == 4 -			/* RFC5549 IPv4-over-IPv6 nexthop handling: -			 * if we get an IPv6 nexthop in IPv4 PIM, hunt down a -			 * PIM neighbor and use that instead. -			 */ -			case NEXTHOP_TYPE_IPV6_IFINDEX: { -				struct interface *ifp1 = NULL; -				struct pim_neighbor *nbr = NULL; - -				ifp1 = if_lookup_by_index(nexthop->ifindex, -							  pim->vrf->vrf_id); - -				if (!ifp1) -					nbr = NULL; -				else -					/* FIXME: should really use nbr's -					 * secondary address list here -					 */ -					nbr = pim_neighbor_find_if(ifp1); - -				/* Overwrite with Nbr address as NH addr */ -				if (nbr) -					nexthop->gate.ipv4 = nbr->source_addr; -				else -					// Mark nexthop address to 0 until PIM -					// Nbr is resolved. -					nexthop->gate.ipv4 = PIMADDR_ANY; - -				break; -			} -#else -			case NEXTHOP_TYPE_IPV6_IFINDEX: -#endif -			case NEXTHOP_TYPE_IPV6: -			case NEXTHOP_TYPE_IPV4: -			case NEXTHOP_TYPE_IPV4_IFINDEX: -			case NEXTHOP_TYPE_BLACKHOLE: -				/* nothing to do for the other nexthop types */ -				break; -			} - -			ifp = if_lookup_by_index(nexthop->ifindex, -						 pim->vrf->vrf_id); -			if (!ifp) { -				if (PIM_DEBUG_PIM_NHT) { -					char buf[NEXTHOP_STRLEN]; -					zlog_debug( -						"%s: could not find interface for ifindex %d(%s) (addr %s)", -						__func__, nexthop->ifindex, -						pim->vrf->name, -						nexthop2str(nexthop, buf, -							    sizeof(buf))); -				} -				nexthop_free(nexthop); -				continue; -			} - -			if (PIM_DEBUG_PIM_NHT) { -#if PIM_IPV == 4 -				pim_addr nhaddr = nexthop->gate.ipv4; -#else -				pim_addr nhaddr = nexthop->gate.ipv6; -#endif -				zlog_debug("%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ", -					   __func__, match, pim->vrf->name, -					   i + 1, &nhaddr, ifp->name, -					   nexthop->type, nhr->distance, -					   nhr->metric); -			} - -			if (!ifp->info) { -				/* -				 * Though Multicast is not enabled on this -				 * Interface store it in database otheriwse we -				 * may miss this update and this will not cause -				 * any issue, because while choosing the path we -				 * are ommitting the Interfaces which are not -				 * multicast enabled -				 */ -				if (PIM_DEBUG_PIM_NHT) { -					char buf[NEXTHOP_STRLEN]; - -					zlog_debug( -						"%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)", -						__func__, ifp->name, -						pim->vrf->name, -						nexthop->ifindex, -						nexthop2str(nexthop, buf, -							    sizeof(buf))); -				} -			} - -			if (nhlist_tail) { -				nhlist_tail->next = nexthop; -				nhlist_tail = nexthop; -			} else { -				nhlist_tail = nexthop; -				nhlist_head = nexthop; -			} - -			// Keep track of all nexthops, even PIM-disabled ones. -			pnc->nexthop_num++; -		} -		/* Reset existing pnc->nexthop before assigning new list */ -		nexthops_free(pnc->nexthop); -		pnc->nexthop = nhlist_head; -		if (pnc->nexthop_num) { -			pnc->flags |= PIM_NEXTHOP_VALID; -			pnc->distance = nhr->distance; -			pnc->metric = nhr->metric; -		} -	} else { -		pnc->flags &= ~PIM_NEXTHOP_VALID; -		pnc->nexthop_num = nhr->nexthop_num; -		nexthops_free(pnc->nexthop); -		pnc->nexthop = NULL; -	} -	SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED); - -	if (PIM_DEBUG_PIM_NHT) -		zlog_debug("%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d", -			   __func__, match, pim->vrf->name, nhr->nexthop_num, -			   pnc->nexthop_num, vrf->vrf_id, -			   pnc->upstream_hash->count, listcount(pnc->rp_list)); - -	pim_rpf_set_refresh_time(pim); - -	if (listcount(pnc->rp_list)) -		pim_update_rp_nh(pim, pnc); -	if (pnc->upstream_hash->count) -		pim_update_upstream_nh(pim, pnc); - -	if (pnc->candrp_count) -		pim_crp_nht_update(pim, pnc); -} - -int pim_ecmp_nexthop_lookup(struct pim_instance *pim, -			    struct pim_nexthop *nexthop, pim_addr src, -			    struct prefix *grp, int neighbor_needed) +bool pim_nht_lookup_ecmp(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr src, +			 struct prefix *grp, bool neighbor_needed)  {  	struct pim_nexthop_cache *pnc;  	struct pim_zlookup_nexthop nexthop_tab[router->multipath]; -	struct pim_neighbor *nbrs[router->multipath], *nbr = NULL; -	struct pim_rpf rpf;  	int num_ifindex; -	struct interface *ifps[router->multipath], *ifp; -	int first_ifindex; -	int found = 0; +	bool found = false;  	uint16_t i = 0; -	uint32_t hash_val = 0, mod_val = 0; +	uint32_t hash_val = 0; +	uint32_t mod_val = 0;  	uint32_t num_nbrs = 0; -	struct pim_interface *pim_ifp;  	if (PIM_DEBUG_PIM_NHT_DETAIL) -		zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld", -			   __func__, &src, pim->vrf->name, -			   nexthop->last_lookup_time); - -	rpf.rpf_addr = src; +		zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld", __func__, &src, +			   pim->vrf->name, nexthop->last_lookup_time); -	pnc = pim_nexthop_cache_find(pim, &rpf); +	pnc = pim_nexthop_cache_find(pim, src);  	if (pnc) { -		if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) -		    return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp, -						   neighbor_needed); +		if (pim_nht_pnc_has_answer(pim, pnc)) +			return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp, neighbor_needed);  	} -	memset(nexthop_tab, 0, -	       sizeof(struct pim_zlookup_nexthop) * router->multipath); -	num_ifindex = -		zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src, -				       PIM_NEXTHOP_LOOKUP_MAX); +	memset(nexthop_tab, 0, sizeof(struct pim_zlookup_nexthop) * router->multipath); +	num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src, +					     PIM_NEXTHOP_LOOKUP_MAX);  	if (num_ifindex < 1) {  		if (PIM_DEBUG_PIM_NHT) -			zlog_warn( -				"%s: could not find nexthop ifindex for address %pPA(%s)", -				__func__, &src, pim->vrf->name); -		return 0; +			zlog_warn("%s: could not find nexthop ifindex for address %pPA(%s)", +				  __func__, &src, pim->vrf->name); +		return false;  	} -	memset(&nbrs, 0, sizeof(nbrs)); -	memset(&ifps, 0, sizeof(ifps)); - -	/* -	 * Look up all interfaces and neighbors, -	 * store for later usage -	 */ +	/* Count the number of neighbors for ECMP computation */  	for (i = 0; i < num_ifindex; i++) { -		ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex, -					     pim->vrf->vrf_id); -		if (ifps[i]) { -			nbrs[i] = pim_neighbor_find( -				ifps[i], nexthop_tab[i].nexthop_addr, true); - -			if (nbrs[i] || pim_if_connected_to_source(ifps[i], src)) -				num_nbrs++; -		} +		struct pim_neighbor *nbr; +		struct interface *ifp = if_lookup_by_index(nexthop_tab[i].ifindex, pim->vrf->vrf_id); + +		if (!ifp) +			continue; + +		nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true); +		if (nbr || pim_if_connected_to_source(ifp, src)) +			num_nbrs++;  	} -	// If PIM ECMP enable then choose ECMP path. +	/* If PIM ECMP enable then choose ECMP path. */  	if (pim->ecmp_enable) {  		struct prefix src_pfx;  		uint32_t consider = num_ifindex; @@ -1028,30 +977,27 @@ int pim_ecmp_nexthop_lookup(struct pim_instance *pim,  			consider = num_nbrs;  		if (consider == 0) -			return 0; +			return false;  		pim_addr_to_prefix(&src_pfx, src);  		hash_val = pim_compute_ecmp_hash(&src_pfx, grp);  		mod_val = hash_val % consider;  		if (PIM_DEBUG_PIM_NHT_DETAIL) -			zlog_debug("%s: hash_val %u mod_val %u", __func__, -				   hash_val, mod_val); +			zlog_debug("%s: hash_val %u mod_val %u", __func__, hash_val, mod_val);  	} -	i = 0; -	while (!found && (i < num_ifindex)) { -		first_ifindex = nexthop_tab[i].ifindex; +	for (i = 0; i < num_ifindex && !found; i++) { +		struct pim_neighbor *nbr = NULL; +		struct pim_interface *pim_ifp; +		struct interface *ifp = if_lookup_by_index(nexthop_tab[i].ifindex, pim->vrf->vrf_id); -		ifp = ifps[i];  		if (!ifp) {  			if (PIM_DEBUG_PIM_NHT) -				zlog_debug( -					"%s %s: could not find interface for ifindex %d (address %pPA(%s))", -					__FILE__, __func__, first_ifindex, &src, -					pim->vrf->name); +				zlog_debug("%s %s: could not find interface for ifindex %d (address %pPA(%s))", +					   __FILE__, __func__, nexthop_tab[i].ifindex, &src, +					   pim->vrf->name);  			if (i == mod_val)  				mod_val++; -			i++;  			continue;  		} @@ -1059,99 +1005,431 @@ int pim_ecmp_nexthop_lookup(struct pim_instance *pim,  		if (!pim_ifp || !pim_ifp->pim_enable) {  			if (PIM_DEBUG_PIM_NHT) -				zlog_debug( -					"%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", -					__func__, ifp->name, pim->vrf->name, -					first_ifindex, &src); +				zlog_debug("%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)", +					   __func__, ifp->name, pim->vrf->name, +					   nexthop_tab[i].ifindex, &src);  			if (i == mod_val)  				mod_val++; -			i++;  			continue;  		} +  		if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) { -			nbr = nbrs[i]; +			nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true);  			if (PIM_DEBUG_PIM_NHT_DETAIL) -				zlog_debug("ifp name: %s(%s), pim nbr: %p", -					   ifp->name, pim->vrf->name, nbr); +				zlog_debug("ifp name: %s(%s), pim nbr: %p", ifp->name, +					   pim->vrf->name, nbr);  			if (!nbr && !if_is_loopback(ifp)) { +				if (PIM_DEBUG_PIM_NHT) +					zlog_debug("%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)", +						   __func__, &nexthop_tab[i].nexthop_addr, +						   ifp->name, pim->vrf->name, &src);  				if (i == mod_val)  					mod_val++; -				if (PIM_DEBUG_PIM_NHT) -					zlog_debug( -						"%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)", -						__func__, -						&nexthop_tab[i].nexthop_addr, -						ifp->name, pim->vrf->name, -						&src); -				i++;  				continue;  			}  		}  		if (i == mod_val) {  			if (PIM_DEBUG_PIM_NHT) -				zlog_debug( -					"%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d", -					__func__, &nexthop_tab[i].nexthop_addr, -					&src, ifp->name, pim->vrf->name, -					nexthop_tab[i].route_metric, -					nexthop_tab[i].protocol_distance); +				zlog_debug("%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d", +					   __func__, &nexthop_tab[i].nexthop_addr, &src, ifp->name, +					   pim->vrf->name, nexthop_tab[i].route_metric, +					   nexthop_tab[i].protocol_distance);  			/* update nexthop data */  			nexthop->interface = ifp; -			nexthop->mrib_nexthop_addr = -				nexthop_tab[i].nexthop_addr; -			nexthop->mrib_metric_preference = -				nexthop_tab[i].protocol_distance; -			nexthop->mrib_route_metric = -				nexthop_tab[i].route_metric; +			nexthop->mrib_nexthop_addr = nexthop_tab[i].nexthop_addr; +			nexthop->mrib_metric_preference = nexthop_tab[i].protocol_distance; +			nexthop->mrib_route_metric = nexthop_tab[i].route_metric;  			nexthop->last_lookup = src;  			nexthop->last_lookup_time = pim_time_monotonic_usec();  			nexthop->nbr = nbr; -			found = 1; +			found = true;  		} -		i++;  	} -	if (found) -		return 1; -	else -		return 0; +	return found;  } -int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim, pim_addr src, -				     struct prefix *grp) +bool pim_nht_lookup(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr addr, +		    int neighbor_needed) +{ +	struct pim_zlookup_nexthop nexthop_tab[router->multipath]; +	struct pim_neighbor *nbr = NULL; +	int num_ifindex; +	struct interface *ifp = NULL; +	ifindex_t first_ifindex = 0; +	bool found = false; +	int i = 0; +	struct pim_interface *pim_ifp; + +#if PIM_IPV == 4 +	/* +	 * We should not attempt to lookup a +	 * 255.255.255.255 address, since +	 * it will never work +	 */ +	if (pim_addr_is_any(addr)) +		return false; +#endif + +	if ((!pim_addr_cmp(nexthop->last_lookup, addr)) && +	    (nexthop->last_lookup_time > pim->last_route_change_time)) { +		if (PIM_DEBUG_PIM_NHT) +			zlog_debug("%s: Using last lookup for %pPAs at %lld, %" PRId64 " addr %pPAs", +				   __func__, &addr, nexthop->last_lookup_time, +				   pim->last_route_change_time, &nexthop->mrib_nexthop_addr); +		pim->nexthop_lookups_avoided++; +		return true; +	} + +	if (PIM_DEBUG_PIM_NHT) +		zlog_debug("%s: Looking up: %pPAs, last lookup time: %lld, %" PRId64, __func__, +			   &addr, nexthop->last_lookup_time, pim->last_route_change_time); + +	memset(nexthop_tab, 0, sizeof(struct pim_zlookup_nexthop) * router->multipath); +	num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, addr, +					     PIM_NEXTHOP_LOOKUP_MAX); +	if (num_ifindex < 1) { +		if (PIM_DEBUG_PIM_NHT) +			zlog_debug("%s: could not find nexthop ifindex for address %pPAs", __func__, +				   &addr); +		return false; +	} + +	while (!found && (i < num_ifindex)) { +		first_ifindex = nexthop_tab[i].ifindex; + +		ifp = if_lookup_by_index(first_ifindex, pim->vrf->vrf_id); +		if (!ifp) { +			if (PIM_DEBUG_ZEBRA) +				zlog_debug("%s: could not find interface for ifindex %d (address %pPAs)", +					   __func__, first_ifindex, &addr); +			i++; +			continue; +		} + +		pim_ifp = ifp->info; +		if (!pim_ifp || !pim_ifp->pim_enable) { +			if (PIM_DEBUG_ZEBRA) +				zlog_debug("%s: pim not enabled on input interface %s (ifindex=%d, RPF for source %pPAs)", +					   __func__, ifp->name, first_ifindex, &addr); +			i++; +		} else if (neighbor_needed && !pim_if_connected_to_source(ifp, addr)) { +			nbr = pim_neighbor_find(ifp, nexthop_tab[i].nexthop_addr, true); +			if (PIM_DEBUG_PIM_TRACE_DETAIL) +				zlog_debug("ifp name: %s, pim nbr: %p", ifp->name, nbr); +			if (!nbr && !if_is_loopback(ifp)) +				i++; +			else +				found = true; +		} else +			found = true; +	} + +	if (found) { +		if (PIM_DEBUG_ZEBRA) +			zlog_debug("%s: found nexthop %pPAs for address %pPAs: interface %s ifindex=%d metric=%d pref=%d", +				   __func__, &nexthop_tab[i].nexthop_addr, &addr, ifp->name, +				   first_ifindex, nexthop_tab[i].route_metric, +				   nexthop_tab[i].protocol_distance); + +		/* update nexthop data */ +		nexthop->interface = ifp; +		nexthop->mrib_nexthop_addr = nexthop_tab[i].nexthop_addr; +		nexthop->mrib_metric_preference = nexthop_tab[i].protocol_distance; +		nexthop->mrib_route_metric = nexthop_tab[i].route_metric; +		nexthop->last_lookup = addr; +		nexthop->last_lookup_time = pim_time_monotonic_usec(); +		nexthop->nbr = nbr; +		return true; +	} else +		return false; +} + +int pim_nht_lookup_ecmp_if_vif_index(struct pim_instance *pim, pim_addr src, struct prefix *grp)  {  	struct pim_nexthop nhop;  	int vif_index;  	ifindex_t ifindex;  	memset(&nhop, 0, sizeof(nhop)); -	if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) { +	if (!pim_nht_lookup_ecmp(pim, &nhop, src, grp, true)) {  		if (PIM_DEBUG_PIM_NHT) -			zlog_debug( -				"%s: could not find nexthop ifindex for address %pPA(%s)", -				__func__, &src, pim->vrf->name); +			zlog_debug("%s: could not find nexthop ifindex for address %pPA(%s)", +				   __func__, &src, pim->vrf->name);  		return -1;  	}  	ifindex = nhop.interface->ifindex;  	if (PIM_DEBUG_PIM_NHT) -		zlog_debug( -			"%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA", -			__func__, ifindex, -			ifindex2ifname(ifindex, pim->vrf->vrf_id), -			pim->vrf->name, &src); +		zlog_debug("%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA", +			   __func__, ifindex, ifindex2ifname(ifindex, pim->vrf->vrf_id), +			   pim->vrf->name, &src);  	vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);  	if (vif_index < 0) {  		if (PIM_DEBUG_PIM_NHT) { -			zlog_debug( -				"%s: low vif_index=%d(%s) < 1 nexthop for address %pPA", -				__func__, vif_index, pim->vrf->name, &src); +			zlog_debug("%s: low vif_index=%d(%s) < 1 nexthop for address %pPA", +				   __func__, vif_index, pim->vrf->name, &src);  		}  		return -2;  	}  	return vif_index;  } + +/* This API is used to parse Registered address nexthop update coming from Zebra + */ +void pim_nexthop_update(struct vrf *vrf, struct prefix *match, struct zapi_route *nhr) +{ +	struct nexthop *nhlist_head = NULL; +	struct nexthop *nhlist_tail = NULL; +	struct pim_nexthop_cache *pnc = NULL; +	struct pim_nexthop_cache_rib *pnc_rib = NULL; +	struct interface *ifp = NULL; +	struct pim_instance *pim; +	pim_addr addr; + +	pim = vrf->info; +	addr = pim_addr_from_prefix(match); +	pnc = pim_nexthop_cache_find(pim, addr); +	if (!pnc) { +		if (PIM_DEBUG_PIM_NHT) +			zlog_debug("%s: Skipping NHT update, addr %pPA is not in local cached DB.", +				   __func__, &addr); +		return; +	} + +	if (nhr->safi == SAFI_UNICAST) +		pnc_rib = &pnc->urib; +	else if (nhr->safi == SAFI_MULTICAST) +		pnc_rib = &pnc->mrib; +	else +		return; + +	pnc_rib->last_update = pim_time_monotonic_usec(); +	SET_FLAG(pnc_rib->flags, PIM_NEXTHOP_ANSWER_RECEIVED); +	UNSET_FLAG(pnc_rib->flags, PIM_NEXTHOP_VALID); +	pnc_rib->nexthop_num = 0; +	/* Free the existing nexthop list, resets with any valid nexthops from the update */ +	nexthops_free(pnc_rib->nexthop); +	pnc_rib->nexthop = NULL; + +	for (int i = 0; i < nhr->nexthop_num; i++) { +		struct nexthop *nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]); + +		switch (nexthop->type) { +		case NEXTHOP_TYPE_IFINDEX: +			/* +			 * Connected route (i.e. no nexthop), use +			 * RPF address from nexthop cache (i.e. +			 * destination) as PIM nexthop. +			 */ +#if PIM_IPV == 4 +			nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX; +			nexthop->gate.ipv4 = pnc->addr; +#else +			nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX; +			nexthop->gate.ipv6 = pnc->addr; +#endif +			break; + +#if PIM_IPV == 4 +		/* RFC5549 IPv4-over-IPv6 nexthop handling: +		 * if we get an IPv6 nexthop in IPv4 PIM, hunt down a +		 * PIM neighbor and use that instead. +		 */ +		case NEXTHOP_TYPE_IPV6_IFINDEX: { +			struct pim_neighbor *nbr = NULL; +			struct interface *ifp1 = if_lookup_by_index(nexthop->ifindex, +								    pim->vrf->vrf_id); + +			if (ifp1) +				/* FIXME: should really use nbr's +				 * secondary address list here +				 */ +				nbr = pim_neighbor_find_if(ifp1); + +			/* Overwrite with Nbr address as NH addr */ +			if (nbr) +				nexthop->gate.ipv4 = nbr->source_addr; +			else +				/* Mark nexthop address to 0 until PIM Nbr is resolved. */ +				nexthop->gate.ipv4 = PIMADDR_ANY; + +			break; +		} +#else +		case NEXTHOP_TYPE_IPV6_IFINDEX: +#endif +		case NEXTHOP_TYPE_IPV6: +		case NEXTHOP_TYPE_IPV4: +		case NEXTHOP_TYPE_IPV4_IFINDEX: +		case NEXTHOP_TYPE_BLACKHOLE: +			/* nothing to do for the other nexthop types */ +			break; +		} + +		ifp = if_lookup_by_index(nexthop->ifindex, pim->vrf->vrf_id); +		if (!ifp) { +			if (PIM_DEBUG_PIM_NHT) { +				char buf[NEXTHOP_STRLEN]; +				zlog_debug("%s: could not find interface for ifindex %d(%s) (addr %s)", +					   __func__, nexthop->ifindex, pim->vrf->name, +					   nexthop2str(nexthop, buf, sizeof(buf))); +			} +			nexthop_free(nexthop); +			continue; +		} + +		if (PIM_DEBUG_PIM_NHT) { +#if PIM_IPV == 4 +			pim_addr nhaddr = nexthop->gate.ipv4; +#else +			pim_addr nhaddr = nexthop->gate.ipv6; +#endif +			zlog_debug("%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ", +				   __func__, match, pim->vrf->name, i + 1, &nhaddr, ifp->name, +				   nexthop->type, nhr->distance, nhr->metric); +		} + +		if (!ifp->info) { +			/* +			 * Though Multicast is not enabled on this +			 * Interface store it in database otheriwse we +			 * may miss this update and this will not cause +			 * any issue, because while choosing the path we +			 * are ommitting the Interfaces which are not +			 * multicast enabled +			 */ +			if (PIM_DEBUG_PIM_NHT) { +				char buf[NEXTHOP_STRLEN]; + +				zlog_debug("%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)", +					   __func__, ifp->name, pim->vrf->name, nexthop->ifindex, +					   nexthop2str(nexthop, buf, sizeof(buf))); +			} +		} + +		if (nhlist_tail) { +			nhlist_tail->next = nexthop; +			nhlist_tail = nexthop; +		} else { +			nhlist_tail = nexthop; +			nhlist_head = nexthop; +		} + +		/* Keep track of all nexthops, even PIM-disabled ones. */ +		pnc_rib->nexthop_num++; +	} /* End for nexthops */ + +	/* Assign the list if there are nexthops */ +	if (pnc_rib->nexthop_num) { +		SET_FLAG(pnc_rib->flags, PIM_NEXTHOP_VALID); +		pnc_rib->nexthop = nhlist_head; +		pnc_rib->distance = nhr->distance; +		pnc_rib->metric = nhr->metric; +		pnc_rib->prefix_len = nhr->prefix.prefixlen; +	} + +	if (PIM_DEBUG_PIM_NHT) +		zlog_debug("%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d", +			   __func__, match, pim->vrf->name, nhr->nexthop_num, pnc_rib->nexthop_num, +			   vrf->vrf_id, pnc->upstream_hash->count, listcount(pnc->rp_list)); + +	pim_rpf_set_refresh_time(pim); + +	if (listcount(pnc->rp_list)) +		pim_update_rp_nh(pim, pnc); +	if (pnc->upstream_hash->count) +		pim_update_upstream_nh(pim, pnc); + +	if (pnc->candrp_count) +		pim_crp_nht_update(pim, pnc); +} + +static int pim_nht_hash_mode_update_helper(struct hash_bucket *bucket, void *arg) +{ +	struct pim_nexthop_cache *pnc = bucket->data; +	struct pnc_hash_walk_data *pwd = arg; +	struct pim_instance *pim = pwd->pim; + +	if (listcount(pnc->rp_list)) +		pim_update_rp_nh(pim, pnc); + +	if (pnc->upstream_hash->count) +		pim_update_upstream_nh(pim, pnc); + +	if (pnc->candrp_count) +		pim_crp_nht_update(pim, pnc); + +	return HASHWALK_CONTINUE; +} + +void pim_nht_mode_changed(struct pim_instance *pim) +{ +	struct pnc_hash_walk_data pwd; + +	/* Update the refresh time to force new lookups if needed */ +	pim_rpf_set_refresh_time(pim); + +	/* Force update the registered RP and upstreams for all cache entries */ +	pwd.pim = pim; +	hash_walk(pim->nht_hash, pim_nht_hash_mode_update_helper, &pwd); +} + +/* Cleanup pim->nht_hash each node data */ +static void pim_nht_hash_clean(void *data) +{ +	struct pim_nexthop_cache *pnc = (struct pim_nexthop_cache *)data; + +	list_delete(&pnc->rp_list); +	hash_clean_and_free(&pnc->upstream_hash, NULL); + +	if (pnc->mrib.nexthop) +		nexthops_free(pnc->mrib.nexthop); + +	if (pnc->urib.nexthop) +		nexthops_free(pnc->urib.nexthop); + +	XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc); +} + +static unsigned int pim_nht_hash_key(const void *arg) +{ +	const struct pim_nexthop_cache *r = arg; + +#if PIM_IPV == 4 +	return jhash_1word(r->addr.s_addr, 0); +#else +	return jhash2(r->addr.s6_addr32, array_size(r->addr.s6_addr32), 0); +#endif +} + +static bool pim_nht_equal(const void *arg1, const void *arg2) +{ +	const struct pim_nexthop_cache *r1 = arg1; +	const struct pim_nexthop_cache *r2 = arg2; + +	return (!pim_addr_cmp(r1->addr, r2->addr)); +} + +void pim_nht_init(struct pim_instance *pim) +{ +	char hash_name[64]; + +	snprintf(hash_name, sizeof(hash_name), "PIM %s NHT Hash", pim->vrf->name); +	pim->nht_hash = hash_create_size(256, pim_nht_hash_key, pim_nht_equal, hash_name); + +	pim->rpf_mode = MCAST_NO_CONFIG; + +	if (PIM_DEBUG_ZEBRA) +		zlog_debug("%s: NHT hash init: %s ", __func__, hash_name); +} + +void pim_nht_terminate(struct pim_instance *pim) +{ +	/* Traverse and cleanup nht_hash */ +	hash_clean_and_free(&pim->nht_hash, (void *)pim_nht_hash_clean); +} diff --git a/pimd/pim_nht.h b/pimd/pim_nht.h index d064f714a5..144139f406 100644 --- a/pimd/pim_nht.h +++ b/pimd/pim_nht.h @@ -17,11 +17,12 @@  #include "pim_rpf.h"  /* PIM nexthop cache value structure. */ -struct pim_nexthop_cache { -	struct pim_rpf rpf; +struct pim_nexthop_cache_rib {  	/* IGP route's metric. */  	uint32_t metric;  	uint32_t distance; +	uint16_t prefix_len; +  	/* Nexthop number and nexthop linked list. */  	uint16_t nexthop_num;  	struct nexthop *nexthop; @@ -29,6 +30,13 @@ struct pim_nexthop_cache {  	uint16_t flags;  #define PIM_NEXTHOP_VALID             (1 << 0)  #define PIM_NEXTHOP_ANSWER_RECEIVED   (1 << 1) +}; + +struct pim_nexthop_cache { +	pim_addr addr; + +	struct pim_nexthop_cache_rib mrib; +	struct pim_nexthop_cache_rib urib;  	struct list *rp_list;  	struct hash *upstream_hash; @@ -46,36 +54,74 @@ struct pnc_hash_walk_data {  	struct interface *ifp;  }; -void pim_nexthop_update(struct vrf *vrf, struct prefix *match, -			struct zapi_route *nhr); -int pim_find_or_track_nexthop(struct pim_instance *pim, pim_addr addr, -			      struct pim_upstream *up, struct rp_info *rp, -			      struct pim_nexthop_cache *out_pnc); -void pim_delete_tracked_nexthop(struct pim_instance *pim, pim_addr addr, -				struct pim_upstream *up, struct rp_info *rp); -struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim, -						 struct pim_rpf *rpf); -uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp); -int pim_ecmp_nexthop_lookup(struct pim_instance *pim, -			    struct pim_nexthop *nexthop, pim_addr src, -			    struct prefix *grp, int neighbor_needed); -void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient, -			   struct pim_nexthop_cache *pnc, int command); -int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim, pim_addr src, -				     struct prefix *grp); -void pim_rp_nexthop_del(struct rp_info *rp_info); - -/* for RPF check on BSM message receipt */ +/* Verify that we have nexthop information in the cache entry */ +bool pim_nht_pnc_is_valid(struct pim_instance *pim, struct pim_nexthop_cache *pnc); + +/* Get (or add) the NH cache entry for the given address */ +struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim, pim_addr addr); + +/* Set the gateway address for all nexthops in the given cache entry to the given address + * unless the gateway is already set, and only if the nexthop is through the given interface. + */ +void pim_nht_set_gateway(struct pim_instance *pim, struct pim_nexthop_cache *pnc, pim_addr addr, +			 struct interface *ifp); + +/* Track a new addr, registers an upstream or RP for updates */ +bool pim_nht_find_or_track(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up, +			   struct rp_info *rp, struct pim_nexthop_cache *out_pnc); + +/* Track a new addr, increments BSR count */  void pim_nht_bsr_add(struct pim_instance *pim, pim_addr bsr_addr); -void pim_nht_bsr_del(struct pim_instance *pim, pim_addr bsr_addr); -/* RPF(bsr_addr) == src_ip%src_ifp? */ -bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr, -			   struct interface *src_ifp, pim_addr src_ip); -void pim_upstream_nh_if_update(struct pim_instance *pim, struct interface *ifp); -/* wrappers for usage with Candidate RPs in BSMs */ +/* Track a new addr, increments Cand RP count */  bool pim_nht_candrp_add(struct pim_instance *pim, pim_addr addr); + +/* Delete a tracked addr with registered upstream or RP, if no-one else is interested, stop tracking */ +void pim_nht_delete_tracked(struct pim_instance *pim, pim_addr addr, struct pim_upstream *up, +			    struct rp_info *rp); + +/* Delete a tracked addr and decrement BSR count, if no-one else is interested, stop tracking */ +void pim_nht_bsr_del(struct pim_instance *pim, pim_addr bsr_addr); + +/* Delete a tracked addr and decrement Cand RP count, if no-one else is interested, stop tracking */  void pim_nht_candrp_del(struct pim_instance *pim, pim_addr addr); -void pim_crp_nht_update(struct pim_instance *pim, struct pim_nexthop_cache *pnc); + +/* RPF(bsr_addr) == src_ip%src_ifp? */ +bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr, struct interface *src_ifp, +			   pim_addr src_ip); + +/* Reset the rp.source_nexthop of the given RP */ +void pim_nht_rp_del(struct rp_info *rp_info); + +/* Walk the NH cache and update every nexthop that uses the given interface */ +void pim_nht_upstream_if_update(struct pim_instance *pim, struct interface *ifp); + +/* Lookup nexthop information for src, returned in nexthop when function returns true. + * Tries to find in cache first and does a synchronous lookup if not found in the cache. + * If neighbor_needed is true, then nexthop is only considered valid if it's to a pim + * neighbor. + * Providing the group only effects the ECMP decision, if enabled + */ +bool pim_nht_lookup_ecmp(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr src, +			 struct prefix *grp, bool neighbor_needed); + +/* Very similar to pim_nht_lookup_ecmp, but does not check the nht cache and only does + * a synchronous lookup. No ECMP decision is made. + */ +bool pim_nht_lookup(struct pim_instance *pim, struct pim_nexthop *nexthop, pim_addr addr, +		    int neighbor_needed); + +/* Performs a pim_nht_lookup_ecmp and returns the mroute VIF index of the nexthop interface */ +int pim_nht_lookup_ecmp_if_vif_index(struct pim_instance *pim, pim_addr src, struct prefix *grp); + +/* Tracked nexthop update from zebra */ +void pim_nexthop_update(struct vrf *vrf, struct prefix *match, struct zapi_route *nhr); + +/* RPF lookup mode changed via configuration */ +void pim_nht_mode_changed(struct pim_instance *pim); + +/* NHT init and finish funcitons */ +void pim_nht_init(struct pim_instance *pim); +void pim_nht_terminate(struct pim_instance *pim);  #endif diff --git a/pimd/pim_rp.c b/pimd/pim_rp.c index 44cc006226..17e9c3f268 100644 --- a/pimd/pim_rp.c +++ b/pimd/pim_rp.c @@ -40,20 +40,6 @@  #include "pim_ssm.h"  #include "termtable.h" -/* Cleanup pim->rpf_hash each node data */ -void pim_rp_list_hash_clean(void *data) -{ -	struct pim_nexthop_cache *pnc = (struct pim_nexthop_cache *)data; - -	list_delete(&pnc->rp_list); - -	hash_clean_and_free(&pnc->upstream_hash, NULL); -	if (pnc->nexthop) -		nexthops_free(pnc->nexthop); - -	XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc); -} -  static void pim_rp_info_free(struct rp_info *rp_info)  {  	XFREE(MTYPE_PIM_FILTER_NAME, rp_info->plist); @@ -432,7 +418,7 @@ void pim_upstream_update(struct pim_instance *pim, struct pim_upstream *up)  			zlog_debug(  				"%s: Deregister upstream %s addr %pPA with Zebra NHT",  				__func__, up->sg_str, &old_upstream_addr); -		pim_delete_tracked_nexthop(pim, old_upstream_addr, up, NULL); +		pim_nht_delete_tracked(pim, old_upstream_addr, up, NULL);  	}  	/* Update the upstream address */ @@ -583,12 +569,10 @@ int pim_rp_new(struct pim_instance *pim, pim_addr rp_addr, struct prefix group,  				zlog_debug("new RP %pPA for %pFX is ourselves",  					   &rp_all->rp.rpf_addr, &rp_all->group);  			pim_rp_refresh_group_to_rp_mapping(pim); -			pim_find_or_track_nexthop(pim, nht_p, NULL, rp_all, -						  NULL); +			pim_nht_find_or_track(pim, nht_p, NULL, rp_all, NULL); -			if (!pim_ecmp_nexthop_lookup(pim, -						     &rp_all->rp.source_nexthop, -						     nht_p, &rp_all->group, 1)) +			if (!pim_nht_lookup_ecmp(pim, &rp_all->rp.source_nexthop, nht_p, +						 &rp_all->group, true))  				return PIM_RP_NO_PATH;  			return PIM_SUCCESS;  		} @@ -683,9 +667,8 @@ int pim_rp_new(struct pim_instance *pim, pim_addr rp_addr, struct prefix group,  	if (PIM_DEBUG_PIM_NHT_RP)  		zlog_debug("%s: NHT Register RP addr %pPA grp %pFX with Zebra ",  			   __func__, &nht_p, &rp_info->group); -	pim_find_or_track_nexthop(pim, nht_p, NULL, rp_info, NULL); -	if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop, nht_p, -				     &rp_info->group, 1)) +	pim_nht_find_or_track(pim, nht_p, NULL, rp_info, NULL); +	if (!pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, nht_p, &rp_info->group, true))  		return PIM_RP_NO_PATH;  	return PIM_SUCCESS; @@ -777,7 +760,7 @@ int pim_rp_del(struct pim_instance *pim, pim_addr rp_addr, struct prefix group,  	if (PIM_DEBUG_PIM_NHT_RP)  		zlog_debug("%s: Deregister RP addr %pPA with Zebra ", __func__,  			   &nht_p); -	pim_delete_tracked_nexthop(pim, nht_p, NULL, rp_info); +	pim_nht_delete_tracked(pim, nht_p, NULL, rp_info);  	pim_get_all_mcast_group(&g_all);  	rp_all = pim_rp_find_match_group(pim, &g_all); @@ -909,10 +892,10 @@ int pim_rp_change(struct pim_instance *pim, pim_addr new_rp_addr,  		if (PIM_DEBUG_PIM_NHT_RP)  			zlog_debug("%s: Deregister RP addr %pPA with Zebra ",  				   __func__, &nht_p); -		pim_delete_tracked_nexthop(pim, nht_p, NULL, rp_info); +		pim_nht_delete_tracked(pim, nht_p, NULL, rp_info);  	} -	pim_rp_nexthop_del(rp_info); +	pim_nht_rp_del(rp_info);  	listnode_delete(pim->rp_list, rp_info);  	/* Update the new RP address*/ @@ -946,9 +929,8 @@ int pim_rp_change(struct pim_instance *pim, pim_addr new_rp_addr,  		zlog_debug("%s: NHT Register RP addr %pPA grp %pFX with Zebra ",  			   __func__, &nht_p, &rp_info->group); -	pim_find_or_track_nexthop(pim, nht_p, NULL, rp_info, NULL); -	if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop, nht_p, -				     &rp_info->group, 1)) { +	pim_nht_find_or_track(pim, nht_p, NULL, rp_info, NULL); +	if (!pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, nht_p, &rp_info->group, true)) {  		route_unlock_node(rn);  		return PIM_RP_NO_PATH;  	} @@ -974,13 +956,13 @@ void pim_rp_setup(struct pim_instance *pim)  		nht_p = rp_info->rp.rpf_addr; -		pim_find_or_track_nexthop(pim, nht_p, NULL, rp_info, NULL); -		if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop, -					     nht_p, &rp_info->group, 1)) { +		pim_nht_find_or_track(pim, nht_p, NULL, rp_info, NULL); +		if (!pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, nht_p, &rp_info->group, +					 true)) {  			if (PIM_DEBUG_PIM_NHT_RP)  				zlog_debug(  					"Unable to lookup nexthop for rp specified"); -			pim_rp_nexthop_del(rp_info); +			pim_nht_rp_del(rp_info);  		}  	}  } @@ -1123,10 +1105,9 @@ struct pim_rpf *pim_rp_g(struct pim_instance *pim, pim_addr group)  			zlog_debug(  				"%s: NHT Register RP addr %pPA grp %pFX with Zebra",  				__func__, &nht_p, &rp_info->group); -		pim_find_or_track_nexthop(pim, nht_p, NULL, rp_info, NULL); +		pim_nht_find_or_track(pim, nht_p, NULL, rp_info, NULL);  		pim_rpf_set_refresh_time(pim); -		(void)pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop, -					      nht_p, &rp_info->group, 1); +		pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, nht_p, &rp_info->group, true);  		return (&rp_info->rp);  	} @@ -1331,7 +1312,6 @@ void pim_resolve_rp_nh(struct pim_instance *pim, struct pim_neighbor *nbr)  {  	struct listnode *node = NULL;  	struct rp_info *rp_info = NULL; -	struct nexthop *nh_node = NULL;  	pim_addr nht_p;  	struct pim_nexthop_cache pnc; @@ -1341,35 +1321,11 @@ void pim_resolve_rp_nh(struct pim_instance *pim, struct pim_neighbor *nbr)  		nht_p = rp_info->rp.rpf_addr;  		memset(&pnc, 0, sizeof(struct pim_nexthop_cache)); -		if (!pim_find_or_track_nexthop(pim, nht_p, NULL, rp_info, &pnc)) -			continue; - -		for (nh_node = pnc.nexthop; nh_node; nh_node = nh_node->next) { -#if PIM_IPV == 4 -			if (!pim_addr_is_any(nh_node->gate.ipv4)) -				continue; -#else -			if (!pim_addr_is_any(nh_node->gate.ipv6)) -				continue; -#endif - -			struct interface *ifp1 = if_lookup_by_index( -				nh_node->ifindex, pim->vrf->vrf_id); -			if (nbr->interface != ifp1) -				continue; +		if (!pim_nht_find_or_track(pim, nht_p, NULL, rp_info, &pnc)) +			continue; -#if PIM_IPV == 4 -			nh_node->gate.ipv4 = nbr->source_addr; -#else -			nh_node->gate.ipv6 = nbr->source_addr; -#endif -			if (PIM_DEBUG_PIM_NHT_RP) -				zlog_debug( -					"%s: addr %pPA new nexthop addr %pPAs interface %s", -					__func__, &nht_p, &nbr->source_addr, -					ifp1->name); -		} +		pim_nht_set_gateway(pim, &pnc, nbr->source_addr, nbr->interface);  	}  } @@ -1534,9 +1490,9 @@ void pim_embedded_rp_new(struct pim_instance *pim, const pim_addr *group, const  		zlog_debug("%s: NHT Register RP addr %pPA grp %pFX with Zebra", __func__,  			   &rp_info->rp.rpf_addr, &rp_info->group); -	pim_find_or_track_nexthop(pim, rp_info->rp.rpf_addr, NULL, rp_info, NULL); -	if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop, rp_info->rp.rpf_addr, -				     &rp_info->group, 1)) { +	pim_nht_find_or_track(pim, rp_info->rp.rpf_addr, NULL, rp_info, NULL); +	if (!pim_nht_lookup_ecmp(pim, &rp_info->rp.source_nexthop, rp_info->rp.rpf_addr, +				 &rp_info->group, 1)) {  		if (PIM_DEBUG_PIM_NHT_RP)  			zlog_debug("%s: Embedded RP %pPA learned but no next hop", __func__,  				   &rp_info->rp.rpf_addr); @@ -1576,7 +1532,7 @@ void pim_embedded_rp_free(struct pim_instance *pim, struct rp_info *rp_info)  	if (PIM_DEBUG_TRACE)  		zlog_debug("delete embedded RP %pPA", &rp_info->rp.rpf_addr); -	pim_delete_tracked_nexthop(pim, rp_info->rp.rpf_addr, NULL, rp_info); +	pim_nht_delete_tracked(pim, rp_info->rp.rpf_addr, NULL, rp_info);  	listnode_delete(pim->rp_list, rp_info);  	XFREE(MTYPE_PIM_EMBEDDED_RP_ENTRY, rp_info);  } diff --git a/pimd/pim_rp.h b/pimd/pim_rp.h index 9da059f8be..a7818a9d39 100644 --- a/pimd/pim_rp.h +++ b/pimd/pim_rp.h @@ -42,8 +42,6 @@ struct rp_info {  void pim_rp_init(struct pim_instance *pim);  void pim_rp_free(struct pim_instance *pim); -void pim_rp_list_hash_clean(void *data); -  int pim_rp_new(struct pim_instance *pim, pim_addr rp_addr, struct prefix group,  	       const char *plist, enum rp_source rp_src_flag);  void pim_rp_del_config(struct pim_instance *pim, pim_addr rp_addr, diff --git a/pimd/pim_rpf.c b/pimd/pim_rpf.c index d18ec4943a..75e9213825 100644 --- a/pimd/pim_rpf.c +++ b/pimd/pim_rpf.c @@ -38,120 +38,6 @@ void pim_rpf_set_refresh_time(struct pim_instance *pim)  			   pim->last_route_change_time);  } -bool pim_nexthop_lookup(struct pim_instance *pim, struct pim_nexthop *nexthop, -			pim_addr addr, int neighbor_needed) -{ -	struct pim_zlookup_nexthop nexthop_tab[router->multipath]; -	struct pim_neighbor *nbr = NULL; -	int num_ifindex; -	struct interface *ifp = NULL; -	ifindex_t first_ifindex = 0; -	int found = 0; -	int i = 0; -	struct pim_interface *pim_ifp; - -#if PIM_IPV == 4 -	/* -	 * We should not attempt to lookup a -	 * 255.255.255.255 address, since -	 * it will never work -	 */ -	if (pim_addr_is_any(addr)) -		return false; -#endif - -	if ((!pim_addr_cmp(nexthop->last_lookup, addr)) && -	    (nexthop->last_lookup_time > pim->last_route_change_time)) { -		if (PIM_DEBUG_PIM_NHT) -			zlog_debug( -				"%s: Using last lookup for %pPAs at %lld, %" PRId64 -				" addr %pPAs", -				__func__, &addr, nexthop->last_lookup_time, -				pim->last_route_change_time, -				&nexthop->mrib_nexthop_addr); -		pim->nexthop_lookups_avoided++; -		return true; -	} else { -		if (PIM_DEBUG_PIM_NHT) -			zlog_debug( -				"%s: Looking up: %pPAs, last lookup time: %lld, %" PRId64, -				__func__, &addr, nexthop->last_lookup_time, -				pim->last_route_change_time); -	} - -	memset(nexthop_tab, 0, -	       sizeof(struct pim_zlookup_nexthop) * router->multipath); -	num_ifindex = -		zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, -				       addr, PIM_NEXTHOP_LOOKUP_MAX); -	if (num_ifindex < 1) { -		if (PIM_DEBUG_PIM_NHT) -			zlog_debug( -				"%s %s: could not find nexthop ifindex for address %pPAs", -				__FILE__, __func__, &addr); -		return false; -	} - -	while (!found && (i < num_ifindex)) { -		first_ifindex = nexthop_tab[i].ifindex; - -		ifp = if_lookup_by_index(first_ifindex, pim->vrf->vrf_id); -		if (!ifp) { -			if (PIM_DEBUG_ZEBRA) -				zlog_debug( -					"%s %s: could not find interface for ifindex %d (address %pPAs)", -					__FILE__, __func__, first_ifindex, -					&addr); -			i++; -			continue; -		} - -		pim_ifp = ifp->info; -		if (!pim_ifp || !pim_ifp->pim_enable) { -			if (PIM_DEBUG_ZEBRA) -				zlog_debug( -					"%s: pim not enabled on input interface %s (ifindex=%d, RPF for source %pPAs)", -					__func__, ifp->name, first_ifindex, -					&addr); -			i++; -		} else if (neighbor_needed && -			   !pim_if_connected_to_source(ifp, addr)) { -			nbr = pim_neighbor_find( -				ifp, nexthop_tab[i].nexthop_addr, true); -			if (PIM_DEBUG_PIM_TRACE_DETAIL) -				zlog_debug("ifp name: %s, pim nbr: %p", -					   ifp->name, nbr); -			if (!nbr && !if_is_loopback(ifp)) -				i++; -			else -				found = 1; -		} else -			found = 1; -	} - -	if (found) { -		if (PIM_DEBUG_ZEBRA) -			zlog_debug( -				"%s %s: found nexthop %pPAs for address %pPAs: interface %s ifindex=%d metric=%d pref=%d", -				__FILE__, __func__, -				&nexthop_tab[i].nexthop_addr, &addr, ifp->name, -				first_ifindex, nexthop_tab[i].route_metric, -				nexthop_tab[i].protocol_distance); - -		/* update nexthop data */ -		nexthop->interface = ifp; -		nexthop->mrib_nexthop_addr = nexthop_tab[i].nexthop_addr; -		nexthop->mrib_metric_preference = -			nexthop_tab[i].protocol_distance; -		nexthop->mrib_route_metric = nexthop_tab[i].route_metric; -		nexthop->last_lookup = addr; -		nexthop->last_lookup_time = pim_time_monotonic_usec(); -		nexthop->nbr = nbr; -		return true; -	} else -		return false; -} -  static int nexthop_mismatch(const struct pim_nexthop *nh1,  			    const struct pim_nexthop *nh2)  { @@ -221,9 +107,9 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim,  	if ((pim_addr_is_any(up->sg.src) && I_am_RP(pim, up->sg.grp)) ||  	    PIM_UPSTREAM_FLAG_TEST_FHR(up->flags))  		neigh_needed = false; -	pim_find_or_track_nexthop(pim, up->upstream_addr, up, NULL, NULL); -	if (!pim_ecmp_nexthop_lookup(pim, &rpf->source_nexthop, src, &grp, -				     neigh_needed)) { + +	pim_nht_find_or_track(pim, up->upstream_addr, up, NULL, NULL); +	if (!pim_nht_lookup_ecmp(pim, &rpf->source_nexthop, src, &grp, neigh_needed)) {  		/* Route is Deleted in Zebra, reset the stored NH data */  		pim_upstream_rpf_clear(pim, up);  		pim_rpf_cost_change(pim, up, saved_mrib_route_metric); @@ -371,25 +257,3 @@ int pim_rpf_is_same(struct pim_rpf *rpf1, struct pim_rpf *rpf2)  	return 0;  } - -unsigned int pim_rpf_hash_key(const void *arg) -{ -	const struct pim_nexthop_cache *r = arg; - -#if PIM_IPV == 4 -	return jhash_1word(r->rpf.rpf_addr.s_addr, 0); -#else -	return jhash2(r->rpf.rpf_addr.s6_addr32, -		      array_size(r->rpf.rpf_addr.s6_addr32), 0); -#endif -} - -bool pim_rpf_equal(const void *arg1, const void *arg2) -{ -	const struct pim_nexthop_cache *r1 = -		(const struct pim_nexthop_cache *)arg1; -	const struct pim_nexthop_cache *r2 = -		(const struct pim_nexthop_cache *)arg2; - -	return (!pim_addr_cmp(r1->rpf.rpf_addr, r2->rpf.rpf_addr)); -} diff --git a/pimd/pim_rpf.h b/pimd/pim_rpf.h index 7dae53f8fc..84d6b7f6c2 100644 --- a/pimd/pim_rpf.h +++ b/pimd/pim_rpf.h @@ -11,6 +11,7 @@  #include "pim_str.h"  struct pim_instance; +struct pim_upstream;  /*    RFC 4601: @@ -41,13 +42,17 @@ struct pim_rpf {  enum pim_rpf_result { PIM_RPF_OK = 0, PIM_RPF_CHANGED, PIM_RPF_FAILURE }; -struct pim_upstream; - -unsigned int pim_rpf_hash_key(const void *arg); -bool pim_rpf_equal(const void *arg1, const void *arg2); +/* RPF lookup behaviour */ +enum pim_rpf_lookup_mode { +	MCAST_NO_CONFIG = 0,  /* MIX_MRIB_FIRST, but no show in config write */ +	MCAST_MRIB_ONLY,      /* MRIB only */ +	MCAST_URIB_ONLY,      /* URIB only */ +	MCAST_MIX_MRIB_FIRST, /* MRIB, if nothing at all then URIB */ +	MCAST_MIX_DISTANCE,   /* MRIB & URIB, lower distance wins */ +	MCAST_MIX_PFXLEN,     /* MRIB & URIB, longer prefix wins */ +			      /* on equal value, MRIB wins for last 2 */ +}; -bool pim_nexthop_lookup(struct pim_instance *pim, struct pim_nexthop *nexthop, -			pim_addr addr, int neighbor_needed);  enum pim_rpf_result pim_rpf_update(struct pim_instance *pim,  				   struct pim_upstream *up,  				   struct pim_rpf *old, const char *caller); diff --git a/pimd/pim_tib.c b/pimd/pim_tib.c index 2786ba440d..e9992691a5 100644 --- a/pimd/pim_tib.c +++ b/pimd/pim_tib.c @@ -34,16 +34,13 @@ tib_sg_oil_setup(struct pim_instance *pim, pim_sgaddr sg, struct interface *oif)  	up = pim_upstream_find(pim, &sg);  	if (up) { -		memcpy(&nexthop, &up->rpf.source_nexthop, -		       sizeof(struct pim_nexthop)); -		(void)pim_ecmp_nexthop_lookup(pim, &nexthop, vif_source, &grp, -					      0); +		memcpy(&nexthop, &up->rpf.source_nexthop, sizeof(struct pim_nexthop)); +		pim_nht_lookup_ecmp(pim, &nexthop, vif_source, &grp, false);  		if (nexthop.interface)  			input_iface_vif_index = pim_if_find_vifindex_by_ifindex(  				pim, nexthop.interface->ifindex);  	} else -		input_iface_vif_index = -			pim_ecmp_fib_lookup_if_vif_index(pim, vif_source, &grp); +		input_iface_vif_index = pim_nht_lookup_ecmp_if_vif_index(pim, vif_source, &grp);  	if (PIM_DEBUG_ZEBRA)  		zlog_debug("%s: NHT %pSG vif_source %pPAs vif_index:%d", diff --git a/pimd/pim_upstream.c b/pimd/pim_upstream.c index 8aa61b687d..c52119e43a 100644 --- a/pimd/pim_upstream.c +++ b/pimd/pim_upstream.c @@ -265,7 +265,7 @@ struct pim_upstream *pim_upstream_del(struct pim_instance *pim,  			zlog_debug(  				"%s: Deregister upstream %s addr %pPA with Zebra NHT",  				__func__, up->sg_str, &up->upstream_addr); -		pim_delete_tracked_nexthop(pim, up->upstream_addr, up, NULL); +		pim_nht_delete_tracked(pim, up->upstream_addr, up, NULL);  	}  	XFREE(MTYPE_PIM_UPSTREAM, up); diff --git a/pimd/pim_vty.c b/pimd/pim_vty.c index fc9781b239..974cf30cf1 100644 --- a/pimd/pim_vty.c +++ b/pimd/pim_vty.c @@ -275,6 +275,16 @@ int pim_global_config_write_worker(struct pim_instance *pim, struct vty *vty)  		}  	} +	if (pim->rpf_mode != MCAST_NO_CONFIG) { +		++writes; +		vty_out(vty, " rpf-lookup-mode %s\n", +			pim->rpf_mode == MCAST_URIB_ONLY	? "urib-only" +			: pim->rpf_mode == MCAST_MRIB_ONLY	? "mrib-only" +			: pim->rpf_mode == MCAST_MIX_MRIB_FIRST ? "mrib-then-urib" +			: pim->rpf_mode == MCAST_MIX_DISTANCE	? "lower-distance" +								: "longer-prefix"); +	} +  	return writes;  } diff --git a/pimd/pim_vxlan.c b/pimd/pim_vxlan.c index f1f315cc98..511d35bf76 100644 --- a/pimd/pim_vxlan.c +++ b/pimd/pim_vxlan.c @@ -411,10 +411,9 @@ static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg)  		 * we must dereg the old nexthop and force to new "static"  		 * iif  		 */ -		if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) { -			pim_delete_tracked_nexthop(vxlan_sg->pim, -						   up->upstream_addr, up, NULL); -		} +		if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) +			pim_nht_delete_tracked(vxlan_sg->pim, up->upstream_addr, up, NULL); +  		/* We are acting FHR; clear out use_rpt setting if any */  		pim_upstream_update_use_rpt(up, false /*update_mroute*/);  		pim_upstream_ref(up, flags, __func__); diff --git a/pimd/pim_zlookup.c b/pimd/pim_zlookup.c index 5d344f1f66..febc595ad4 100644 --- a/pimd/pim_zlookup.c +++ b/pimd/pim_zlookup.c @@ -153,6 +153,7 @@ static int zclient_read_nexthop(struct pim_instance *pim,  	struct ipaddr raddr;  	uint8_t distance;  	uint32_t metric; +	uint16_t prefix_len;  	int nexthop_num;  	int i, err; @@ -162,7 +163,7 @@ static int zclient_read_nexthop(struct pim_instance *pim,  	s = zlookup->ibuf; -	while (command != ZEBRA_NEXTHOP_LOOKUP_MRIB) { +	while (command != ZEBRA_NEXTHOP_LOOKUP) {  		stream_reset(s);  		err = zclient_read_header(s, zlookup->sock, &length, &marker,  					  &version, &vrf_id, &command); @@ -193,8 +194,14 @@ static int zclient_read_nexthop(struct pim_instance *pim,  	distance = stream_getc(s);  	metric = stream_getl(s); +	prefix_len = stream_getw(s);  	nexthop_num = stream_getw(s); +	if (PIM_DEBUG_PIM_NHT_DETAIL) +		zlog_debug("%s: addr=%pPAs(%s), distance=%d, metric=%d, prefix_len=%d, nexthop_num=%d", +			   __func__, &addr, pim->vrf->name, distance, metric, prefix_len, +			   nexthop_num); +  	if (nexthop_num < 1 || nexthop_num > router->multipath) {  		if (PIM_DEBUG_PIM_NHT_DETAIL)  			zlog_debug("%s: socket %d bad nexthop_num=%d", __func__, @@ -220,6 +227,7 @@ static int zclient_read_nexthop(struct pim_instance *pim,  		}  		nexthop_tab[num_ifindex].protocol_distance = distance;  		nexthop_tab[num_ifindex].route_metric = metric; +		nexthop_tab[num_ifindex].prefix_len = prefix_len;  		nexthop_tab[num_ifindex].vrf_id = nexthop_vrf_id;  		switch (nexthop_type) {  		case NEXTHOP_TYPE_IFINDEX: @@ -301,20 +309,23 @@ static int zclient_read_nexthop(struct pim_instance *pim,  		}  	} +	if (PIM_DEBUG_PIM_NHT_DETAIL) +		zlog_debug("%s: addr=%pPAs(%s), num_ifindex=%d", __func__, &addr, pim->vrf->name, +			   num_ifindex); +  	return num_ifindex;  } -static int zclient_lookup_nexthop_once(struct pim_instance *pim, -				       struct pim_zlookup_nexthop nexthop_tab[], -				       const int tab_size, pim_addr addr) +static int zclient_rib_lookup(struct pim_instance *pim, struct pim_zlookup_nexthop nexthop_tab[], +			      const int tab_size, pim_addr addr, safi_t safi)  {  	struct stream *s;  	int ret;  	struct ipaddr ipaddr;  	if (PIM_DEBUG_PIM_NHT_DETAIL) -		zlog_debug("%s: addr=%pPAs(%s)", __func__, &addr, -			   pim->vrf->name); +		zlog_debug("%s: addr=%pPAs(%s), %sRIB", __func__, &addr, pim->vrf->name, +			   (safi == SAFI_MULTICAST ? "M" : "U"));  	/* Check socket. */  	if (zlookup->sock < 0) { @@ -337,8 +348,9 @@ static int zclient_lookup_nexthop_once(struct pim_instance *pim,  	s = zlookup->obuf;  	stream_reset(s); -	zclient_create_header(s, ZEBRA_NEXTHOP_LOOKUP_MRIB, pim->vrf->vrf_id); +	zclient_create_header(s, ZEBRA_NEXTHOP_LOOKUP, pim->vrf->vrf_id);  	stream_put_ipaddr(s, &ipaddr); +	stream_putc(s, safi);  	stream_putw_at(s, 0, stream_get_endp(s));  	ret = writen(zlookup->sock, s->data, stream_get_endp(s)); @@ -361,6 +373,79 @@ static int zclient_lookup_nexthop_once(struct pim_instance *pim,  	return zclient_read_nexthop(pim, zlookup, nexthop_tab, tab_size, addr);  } +static int zclient_lookup_nexthop_once(struct pim_instance *pim, +				       struct pim_zlookup_nexthop nexthop_tab[], const int tab_size, +				       pim_addr addr) +{ +	if (pim->rpf_mode == MCAST_MRIB_ONLY) +		return zclient_rib_lookup(pim, nexthop_tab, tab_size, addr, SAFI_MULTICAST); + +	if (pim->rpf_mode == MCAST_URIB_ONLY) +		return zclient_rib_lookup(pim, nexthop_tab, tab_size, addr, SAFI_UNICAST); + +	/* All other modes require looking up both tables and making a choice */ +	struct pim_zlookup_nexthop mrib_tab[tab_size]; +	struct pim_zlookup_nexthop urib_tab[tab_size]; +	int mrib_num; +	int urib_num; + +	memset(mrib_tab, 0, sizeof(struct pim_zlookup_nexthop) * tab_size); +	memset(urib_tab, 0, sizeof(struct pim_zlookup_nexthop) * tab_size); + +	if (PIM_DEBUG_PIM_NHT_DETAIL) +		zlog_debug("%s: addr=%pPAs(%s), looking up both MRIB and URIB", __func__, &addr, +			   pim->vrf->name); + +	mrib_num = zclient_rib_lookup(pim, mrib_tab, tab_size, addr, SAFI_MULTICAST); +	urib_num = zclient_rib_lookup(pim, urib_tab, tab_size, addr, SAFI_UNICAST); + +	if (PIM_DEBUG_PIM_NHT_DETAIL) +		zlog_debug("%s: addr=%pPAs(%s), MRIB nexthops=%d, URIB nexthops=%d", __func__, +			   &addr, pim->vrf->name, mrib_num, urib_num); + +	/* If only one table has results, use that always */ +	if (mrib_num < 1) { +		if (urib_num > 0) +			memcpy(nexthop_tab, urib_tab, sizeof(struct pim_zlookup_nexthop) * tab_size); +		return urib_num; +	} + +	if (urib_num < 1) { +		if (mrib_num > 0) +			memcpy(nexthop_tab, mrib_tab, sizeof(struct pim_zlookup_nexthop) * tab_size); +		return mrib_num; +	} + +	/* See if we should use the URIB based on configured lookup mode */ +	/* Both tables have results, so compare them. Distance and prefix length are the same for all +	 * nexthops, so only compare the first in the list +	 */ +	if (pim->rpf_mode == MCAST_MIX_DISTANCE && +	    mrib_tab[0].protocol_distance > urib_tab[0].protocol_distance) { +		if (PIM_DEBUG_PIM_NHT_DETAIL) +			zlog_debug("%s: addr=%pPAs(%s), URIB has shortest distance", __func__, +				   &addr, pim->vrf->name); +		memcpy(nexthop_tab, urib_tab, sizeof(struct pim_zlookup_nexthop) * tab_size); +		return urib_num; +	} else if (pim->rpf_mode == MCAST_MIX_PFXLEN && +		   mrib_tab[0].prefix_len < urib_tab[0].prefix_len) { +		if (PIM_DEBUG_PIM_NHT_DETAIL) +			zlog_debug("%s: addr=%pPAs(%s), URIB has lengthest prefix length", __func__, +				   &addr, pim->vrf->name); +		memcpy(nexthop_tab, urib_tab, sizeof(struct pim_zlookup_nexthop) * tab_size); +		return urib_num; +	} + +	/* All others use the MRIB */ +	/* For MCAST_MIX_MRIB_FIRST (and by extension, MCAST_NO_CONFIG), +	 * always return mrib if both have results +	 */ +	if (PIM_DEBUG_PIM_NHT_DETAIL) +		zlog_debug("%s: addr=%pPAs(%s), MRIB has nexthops", __func__, &addr, pim->vrf->name); +	memcpy(nexthop_tab, mrib_tab, sizeof(struct pim_zlookup_nexthop) * tab_size); +	return mrib_num; +} +  void zclient_lookup_read_pipe(struct event *thread)  {  	struct zclient *zlookup = EVENT_ARG(thread); diff --git a/pimd/pim_zlookup.h b/pimd/pim_zlookup.h index ee2dd20113..c9461eb7e3 100644 --- a/pimd/pim_zlookup.h +++ b/pimd/pim_zlookup.h @@ -21,6 +21,7 @@ struct pim_zlookup_nexthop {  	ifindex_t ifindex;  	uint32_t route_metric;  	uint8_t protocol_distance; +	uint16_t prefix_len;  };  void zclient_lookup_new(void);  | 
