diff options
| author | Donatas Abraitis <donatas@opensourcerouting.org> | 2023-05-02 18:36:45 +0300 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-02 18:36:45 +0300 | 
| commit | 99a1ab0b2150ae82f0e831dc37d1b91da763318d (patch) | |
| tree | 7d7b940739d53db7d37205b6a6782f703635c510 /bgpd | |
| parent | d0053da26b49166353c5f48c11fb11f6bea691d0 (diff) | |
| parent | dab2df8dc0d8a83edb796c2205491d7e6664c2dc (diff) | |
Merge pull request #12646 from pguibert6WIND/mpls_alloc_per_nh
MPLS allocation mode per next hop
Diffstat (limited to 'bgpd')
| -rw-r--r-- | bgpd/bgp_labelpool.c | 185 | ||||
| -rw-r--r-- | bgpd/bgp_labelpool.h | 52 | ||||
| -rw-r--r-- | bgpd/bgp_mplsvpn.c | 303 | ||||
| -rw-r--r-- | bgpd/bgp_mplsvpn.h | 1 | ||||
| -rw-r--r-- | bgpd/bgp_nexthop.c | 3 | ||||
| -rw-r--r-- | bgpd/bgp_nht.c | 55 | ||||
| -rw-r--r-- | bgpd/bgp_route.c | 6 | ||||
| -rw-r--r-- | bgpd/bgp_route.h | 6 | ||||
| -rw-r--r-- | bgpd/bgp_vty.c | 67 | ||||
| -rw-r--r-- | bgpd/bgp_zebra.c | 29 | ||||
| -rw-r--r-- | bgpd/bgp_zebra.h | 4 | ||||
| -rw-r--r-- | bgpd/bgpd.c | 7 | ||||
| -rw-r--r-- | bgpd/bgpd.h | 5 | 
13 files changed, 708 insertions, 15 deletions
diff --git a/bgpd/bgp_labelpool.c b/bgpd/bgp_labelpool.c index 9943f57fb3..faddfc995f 100644 --- a/bgpd/bgp_labelpool.c +++ b/bgpd/bgp_labelpool.c @@ -23,6 +23,9 @@  #include "bgpd/bgp_debug.h"  #include "bgpd/bgp_errors.h"  #include "bgpd/bgp_route.h" +#include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_vty.h" +#include "bgpd/bgp_rd.h"  #define BGP_LABELPOOL_ENABLE_TESTS 0 @@ -830,6 +833,16 @@ DEFUN(show_bgp_labelpool_ledger, show_bgp_labelpool_ledger_cmd,  					lcb->label);  			break; +		case LP_TYPE_NEXTHOP: +			if (uj) { +				json_object_string_add(json_elem, "prefix", +						       "nexthop"); +				json_object_int_add(json_elem, "label", +						    lcb->label); +			} else +				vty_out(vty, "%-18s         %u\n", "nexthop", +					lcb->label); +			break;  		}  	}  	if (uj) @@ -919,6 +932,15 @@ DEFUN(show_bgp_labelpool_inuse, show_bgp_labelpool_inuse_cmd,  				vty_out(vty, "%-18s         %u\n", "VRF",  					label);  			break; +		case LP_TYPE_NEXTHOP: +			if (uj) { +				json_object_string_add(json_elem, "prefix", +						       "nexthop"); +				json_object_int_add(json_elem, "label", label); +			} else +				vty_out(vty, "%-18s         %u\n", "nexthop", +					label); +			break;  		}  	}  	if (uj) @@ -991,6 +1013,13 @@ DEFUN(show_bgp_labelpool_requests, show_bgp_labelpool_requests_cmd,  			else  				vty_out(vty, "VRF\n");  			break; +		case LP_TYPE_NEXTHOP: +			if (uj) +				json_object_string_add(json_elem, "prefix", +						       "nexthop"); +			else +				vty_out(vty, "Nexthop\n"); +			break;  		}  	}  	if (uj) @@ -1053,6 +1082,99 @@ DEFUN(show_bgp_labelpool_chunks, show_bgp_labelpool_chunks_cmd,  	return CMD_SUCCESS;  } +static void show_bgp_nexthop_label_afi(struct vty *vty, afi_t afi, +				       struct bgp *bgp, bool detail) +{ +	struct bgp_label_per_nexthop_cache_head *tree; +	struct bgp_label_per_nexthop_cache *iter; +	safi_t safi; +	void *src; +	char buf[PREFIX2STR_BUFFER]; +	char labelstr[MPLS_LABEL_STRLEN]; +	struct bgp_dest *dest; +	struct bgp_path_info *path; +	struct bgp *bgp_path; +	struct bgp_table *table; +	time_t tbuf; + +	vty_out(vty, "Current BGP label nexthop cache for %s, VRF %s\n", +		afi2str(afi), bgp->name_pretty); + +	tree = &bgp->mpls_labels_per_nexthop[afi]; +	frr_each (bgp_label_per_nexthop_cache, tree, iter) { +		if (afi2family(afi) == AF_INET) +			src = (void *)&iter->nexthop.u.prefix4; +		else +			src = (void *)&iter->nexthop.u.prefix6; + +		vty_out(vty, " %s, label %s #paths %u\n", +			inet_ntop(afi2family(afi), src, buf, sizeof(buf)), +			mpls_label2str(1, &iter->label, labelstr, +				       sizeof(labelstr), 0, true), +			iter->path_count); +		if (iter->nh) +			vty_out(vty, "  if %s\n", +				ifindex2ifname(iter->nh->ifindex, +					       iter->nh->vrf_id)); +		tbuf = time(NULL) - (monotime(NULL) - iter->last_update); +		vty_out(vty, "  Last update: %s", ctime(&tbuf)); +		if (!detail) +			continue; +		vty_out(vty, "  Paths:\n"); +		LIST_FOREACH (path, &(iter->paths), label_nh_thread) { +			dest = path->net; +			table = bgp_dest_table(dest); +			assert(dest && table); +			afi = family2afi(bgp_dest_get_prefix(dest)->family); +			safi = table->safi; +			bgp_path = table->bgp; + +			if (dest->pdest) { +				vty_out(vty, "    %d/%d %pBD RD ", afi, safi, +					dest); + +				vty_out(vty, BGP_RD_AS_FORMAT(bgp->asnotation), +					(struct prefix_rd *)bgp_dest_get_prefix( +						dest->pdest)); +				vty_out(vty, " %s flags 0x%x\n", +					bgp_path->name_pretty, path->flags); +			} else +				vty_out(vty, "    %d/%d %pBD %s flags 0x%x\n", +					afi, safi, dest, bgp_path->name_pretty, +					path->flags); +		} +	} +} + +DEFPY(show_bgp_nexthop_label, show_bgp_nexthop_label_cmd, +      "show bgp [<view|vrf> VIEWVRFNAME] label-nexthop [detail]", +      SHOW_STR BGP_STR BGP_INSTANCE_HELP_STR +      "BGP label per-nexthop table\n" +      "Show detailed information\n") +{ +	int idx = 0; +	char *vrf = NULL; +	struct bgp *bgp; +	bool detail = false; +	int afi; + +	if (argv_find(argv, argc, "vrf", &idx)) { +		vrf = argv[++idx]->arg; +		bgp = bgp_lookup_by_name(vrf); +	} else +		bgp = bgp_get_default(); + +	if (!bgp) +		return CMD_SUCCESS; + +	if (argv_find(argv, argc, "detail", &idx)) +		detail = true; + +	for (afi = AFI_IP; afi <= AFI_IP6; afi++) +		show_bgp_nexthop_label_afi(vty, afi, bgp, detail); +	return CMD_SUCCESS; +} +  #if BGP_LABELPOOL_ENABLE_TESTS  /*------------------------------------------------------------------------   *			Testing code start @@ -1532,3 +1654,66 @@ void bgp_lp_vty_init(void)  	install_element(ENABLE_NODE, &clear_labelpool_perf_test_cmd);  #endif /* BGP_LABELPOOL_ENABLE_TESTS */  } + +DEFINE_MTYPE_STATIC(BGPD, LABEL_PER_NEXTHOP_CACHE, +		    "BGP Label Per Nexthop entry"); + +/* The nexthops values are compared to + * find in the tree the appropriate cache entry + */ +int bgp_label_per_nexthop_cache_cmp(const struct bgp_label_per_nexthop_cache *a, +				    const struct bgp_label_per_nexthop_cache *b) +{ +	return prefix_cmp(&a->nexthop, &b->nexthop); +} + +struct bgp_label_per_nexthop_cache * +bgp_label_per_nexthop_new(struct bgp_label_per_nexthop_cache_head *tree, +			  struct prefix *nexthop) +{ +	struct bgp_label_per_nexthop_cache *blnc; + +	blnc = XCALLOC(MTYPE_LABEL_PER_NEXTHOP_CACHE, +		       sizeof(struct bgp_label_per_nexthop_cache)); +	blnc->tree = tree; +	blnc->label = MPLS_INVALID_LABEL; +	prefix_copy(&blnc->nexthop, nexthop); +	LIST_INIT(&(blnc->paths)); +	bgp_label_per_nexthop_cache_add(tree, blnc); + +	return blnc; +} + +struct bgp_label_per_nexthop_cache * +bgp_label_per_nexthop_find(struct bgp_label_per_nexthop_cache_head *tree, +			   struct prefix *nexthop) +{ +	struct bgp_label_per_nexthop_cache blnc = {}; + +	if (!tree) +		return NULL; + +	memcpy(&blnc.nexthop, nexthop, sizeof(struct prefix)); +	return bgp_label_per_nexthop_cache_find(tree, &blnc); +} + +void bgp_label_per_nexthop_free(struct bgp_label_per_nexthop_cache *blnc) +{ +	if (blnc->label != MPLS_INVALID_LABEL) { +		bgp_zebra_send_nexthop_label(ZEBRA_MPLS_LABELS_DELETE, +					     blnc->label, blnc->nh->ifindex, +					     blnc->nh->vrf_id, ZEBRA_LSP_BGP, +					     &blnc->nexthop); +		bgp_lp_release(LP_TYPE_NEXTHOP, blnc, blnc->label); +	} +	bgp_label_per_nexthop_cache_del(blnc->tree, blnc); +	if (blnc->nh) +		nexthop_free(blnc->nh); +	blnc->nh = NULL; +	XFREE(MTYPE_LABEL_PER_NEXTHOP_CACHE, blnc); +} + +void bgp_label_per_nexthop_init(void) +{ +	install_element(VIEW_NODE, &show_bgp_nexthop_label_cmd); +} diff --git a/bgpd/bgp_labelpool.h b/bgpd/bgp_labelpool.h index 9526cba0ce..b33527186e 100644 --- a/bgpd/bgp_labelpool.h +++ b/bgpd/bgp_labelpool.h @@ -17,6 +17,7 @@   */  #define LP_TYPE_VRF	0x00000001  #define LP_TYPE_BGP_LU	0x00000002 +#define LP_TYPE_NEXTHOP 0x00000003  PREDECL_LIST(lp_fifo); @@ -41,4 +42,55 @@ extern void bgp_lp_event_zebra_down(void);  extern void bgp_lp_event_zebra_up(void);  extern void bgp_lp_vty_init(void); +struct bgp_label_per_nexthop_cache; +PREDECL_RBTREE_UNIQ(bgp_label_per_nexthop_cache); + +extern int +bgp_label_per_nexthop_cache_cmp(const struct bgp_label_per_nexthop_cache *a, +				const struct bgp_label_per_nexthop_cache *b); + +struct bgp_label_per_nexthop_cache { + +	/* RB-tree entry. */ +	struct bgp_label_per_nexthop_cache_item entry; + +	/* the nexthop is the key of the list */ +	struct prefix nexthop; + +	/* calculated label */ +	mpls_label_t label; + +	/* number of path_vrfs */ +	unsigned int path_count; + +	/* back pointer to bgp instance */ +	struct bgp *to_bgp; + +	/* copy a nexthop resolution from bgp nexthop tracking +	 * used to extract the interface nexthop +	 */ +	struct nexthop *nh; + +	/* list of path_vrfs using it */ +	LIST_HEAD(path_lists, bgp_path_info) paths; + +	time_t last_update; + +	/* Back pointer to the cache tree this entry belongs to. */ +	struct bgp_label_per_nexthop_cache_head *tree; +}; + +DECLARE_RBTREE_UNIQ(bgp_label_per_nexthop_cache, +		    struct bgp_label_per_nexthop_cache, entry, +		    bgp_label_per_nexthop_cache_cmp); + +void bgp_label_per_nexthop_free(struct bgp_label_per_nexthop_cache *blnc); + +struct bgp_label_per_nexthop_cache * +bgp_label_per_nexthop_new(struct bgp_label_per_nexthop_cache_head *tree, +			  struct prefix *nexthop); +struct bgp_label_per_nexthop_cache * +bgp_label_per_nexthop_find(struct bgp_label_per_nexthop_cache_head *tree, +			   struct prefix *nexthop); +void bgp_label_per_nexthop_init(void);  #endif /* _FRR_BGP_LABELPOOL_H */ diff --git a/bgpd/bgp_mplsvpn.c b/bgpd/bgp_mplsvpn.c index 63168f1e7a..ecc84533b0 100644 --- a/bgpd/bgp_mplsvpn.c +++ b/bgpd/bgp_mplsvpn.c @@ -1116,12 +1116,14 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn,  	/*  	 * Routes that are redistributed into BGP from zebra do not get -	 * nexthop tracking. However, if those routes are subsequently -	 * imported to other RIBs within BGP, the leaked routes do not -	 * carry the original BGP_ROUTE_REDISTRIBUTE sub_type. Therefore, -	 * in order to determine if the route we are currently leaking -	 * should have nexthop tracking, we must find the ultimate -	 * parent so we can check its sub_type. +	 * nexthop tracking, unless MPLS allocation per nexthop is +	 * performed. In the default case nexthop tracking does not apply, +	 * if those routes are subsequently imported to other RIBs within +	 * BGP, the leaked routes do not carry the original +	 * BGP_ROUTE_REDISTRIBUTE sub_type. Therefore, in order to determine +	 * if the route we are currently leaking should have nexthop +	 * tracking, we must find the ultimate parent so we can check its +	 * sub_type.  	 *  	 * As of now, source_bpi may at most be a second-generation route  	 * (only one hop back to ultimate parent for vrf-vpn-vrf scheme). @@ -1336,6 +1338,265 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn,  	return new;  } +void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi) +{ +	struct bgp_label_per_nexthop_cache *blnc; + +	if (!pi) +		return; + +	blnc = pi->label_nexthop_cache; + +	if (!blnc) +		return; + +	LIST_REMOVE(pi, label_nh_thread); +	pi->label_nexthop_cache->path_count--; +	pi->label_nexthop_cache = NULL; + +	if (LIST_EMPTY(&(blnc->paths))) +		bgp_label_per_nexthop_free(blnc); +} + +/* Called upon reception of a ZAPI Message from zebra, about + * a new available label. + */ +static int bgp_mplsvpn_get_label_per_nexthop_cb(mpls_label_t label, +						void *context, bool allocated) +{ +	struct bgp_label_per_nexthop_cache *blnc = context; +	mpls_label_t old_label; +	int debug = BGP_DEBUG(vpn, VPN_LEAK_LABEL); +	struct bgp_path_info *pi; +	struct bgp_table *table; + +	old_label = blnc->label; + +	if (debug) +		zlog_debug("%s: label=%u, allocated=%d, nexthop=%pFX", __func__, +			   label, allocated, &blnc->nexthop); +	if (allocated) +		/* update the entry with the new label */ +		blnc->label = label; +	else +		/* +		 * previously-allocated label is now invalid +		 * eg: zebra deallocated the labels and notifies it +		 */ +		blnc->label = MPLS_INVALID_LABEL; + +	if (old_label == blnc->label) +		return 0; /* no change */ + +	/* update paths */ +	if (blnc->label != MPLS_INVALID_LABEL) +		bgp_zebra_send_nexthop_label( +			ZEBRA_MPLS_LABELS_ADD, blnc->label, blnc->nh->ifindex, +			blnc->nh->vrf_id, ZEBRA_LSP_BGP, &blnc->nexthop); + +	LIST_FOREACH (pi, &(blnc->paths), label_nh_thread) { +		if (!pi->net) +			continue; +		table = bgp_dest_table(pi->net); +		if (!table) +			continue; +		vpn_leak_from_vrf_update(blnc->to_bgp, table->bgp, pi); +	} + +	return 0; +} + +/* Get a per label nexthop value: + *  - Find and return a per label nexthop from the cache + *  - else allocate a new per label nexthop cache entry and request a + *    label to zebra. Return MPLS_INVALID_LABEL + */ +static mpls_label_t _vpn_leak_from_vrf_get_per_nexthop_label( +	struct bgp_path_info *pi, struct bgp *to_bgp, struct bgp *from_bgp, +	afi_t afi, safi_t safi) +{ +	struct bgp_nexthop_cache *bnc = pi->nexthop; +	struct bgp_label_per_nexthop_cache *blnc; +	struct bgp_label_per_nexthop_cache_head *tree; +	struct prefix *nh_pfx = NULL; +	struct prefix nh_gate = {0}; + +	/* extract the nexthop from the BNC nexthop cache */ +	switch (bnc->nexthop->type) { +	case NEXTHOP_TYPE_IPV4: +	case NEXTHOP_TYPE_IPV4_IFINDEX: +		/* the nexthop is recursive */ +		nh_gate.family = AF_INET; +		nh_gate.prefixlen = IPV4_MAX_BITLEN; +		IPV4_ADDR_COPY(&nh_gate.u.prefix4, &bnc->nexthop->gate.ipv4); +		nh_pfx = &nh_gate; +		break; +	case NEXTHOP_TYPE_IPV6: +	case NEXTHOP_TYPE_IPV6_IFINDEX: +		/* the nexthop is recursive */ +		nh_gate.family = AF_INET6; +		nh_gate.prefixlen = IPV6_MAX_BITLEN; +		IPV6_ADDR_COPY(&nh_gate.u.prefix6, &bnc->nexthop->gate.ipv6); +		nh_pfx = &nh_gate; +		break; +	case NEXTHOP_TYPE_IFINDEX: +		/* the nexthop is direcly connected */ +		nh_pfx = &bnc->prefix; +		break; +	case NEXTHOP_TYPE_BLACKHOLE: +		assert(!"Blackhole nexthop. Already checked by the caller."); +	} + +	/* find or allocate a nexthop label cache entry */ +	tree = &from_bgp->mpls_labels_per_nexthop[family2afi(nh_pfx->family)]; +	blnc = bgp_label_per_nexthop_find(tree, nh_pfx); +	if (!blnc) { +		blnc = bgp_label_per_nexthop_new(tree, nh_pfx); +		blnc->to_bgp = to_bgp; +		/* request a label to zebra for this nexthop +		 * the response from zebra will trigger the callback +		 */ +		bgp_lp_get(LP_TYPE_NEXTHOP, blnc, +			   bgp_mplsvpn_get_label_per_nexthop_cb); +	} + +	if (pi->label_nexthop_cache == blnc) +		/* no change */ +		return blnc->label; + +	/* Unlink from any existing nexthop cache. Free the entry if unused. +	 */ +	bgp_mplsvpn_path_nh_label_unlink(pi); +	if (blnc) { +		/* updates NHT pi list reference */ +		LIST_INSERT_HEAD(&(blnc->paths), pi, label_nh_thread); +		pi->label_nexthop_cache = blnc; +		pi->label_nexthop_cache->path_count++; +		blnc->last_update = monotime(NULL); +	} + +	/* then add or update the selected nexthop */ +	if (!blnc->nh) +		blnc->nh = nexthop_dup(bnc->nexthop, NULL); +	else if (!nexthop_same(bnc->nexthop, blnc->nh)) { +		nexthop_free(blnc->nh); +		blnc->nh = nexthop_dup(bnc->nexthop, NULL); +		if (blnc->label != MPLS_INVALID_LABEL) { +			bgp_zebra_send_nexthop_label( +				ZEBRA_MPLS_LABELS_REPLACE, blnc->label, +				bnc->nexthop->ifindex, bnc->nexthop->vrf_id, +				ZEBRA_LSP_BGP, &blnc->nexthop); +		} +	} + +	return blnc->label; +} + +/* Filter out all the cases where a per nexthop label is not possible: + * - return an invalid label when the nexthop is invalid + * - return the per VRF label when the per nexthop label is not supported + * Otherwise, find or request a per label nexthop. + */ +static mpls_label_t vpn_leak_from_vrf_get_per_nexthop_label( +	afi_t afi, safi_t safi, struct bgp_path_info *pi, struct bgp *from_bgp, +	struct bgp *to_bgp) +{ +	struct bgp_path_info *bpi_ultimate = bgp_get_imported_bpi_ultimate(pi); +	struct bgp *bgp_nexthop = NULL; +	bool nh_valid; +	afi_t nh_afi; +	bool is_bgp_static_route; + +	is_bgp_static_route = bpi_ultimate->sub_type == BGP_ROUTE_STATIC && +			      bpi_ultimate->type == ZEBRA_ROUTE_BGP; + +	if (is_bgp_static_route == false && afi == AFI_IP && +	    CHECK_FLAG(pi->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP)) && +	    (pi->attr->nexthop.s_addr == INADDR_ANY || +	     !ipv4_unicast_valid(&pi->attr->nexthop))) { +		/* IPv4 nexthop in standard BGP encoding format. +		 * Format of address is not valid (not any, not unicast). +		 * Fallback to the per VRF label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	if (is_bgp_static_route == false && afi == AFI_IP && +	    pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV4 && +	    (pi->attr->mp_nexthop_global_in.s_addr == INADDR_ANY || +	     !ipv4_unicast_valid(&pi->attr->mp_nexthop_global_in))) { +		/* IPv4 nexthop is in MP-BGP encoding format. +		 * Format of address is not valid (not any, not unicast). +		 * Fallback to the per VRF label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	if (is_bgp_static_route == false && afi == AFI_IP6 && +	    (pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL || +	     pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) && +	    (IN6_IS_ADDR_UNSPECIFIED(&pi->attr->mp_nexthop_global) || +	     IN6_IS_ADDR_LOOPBACK(&pi->attr->mp_nexthop_global) || +	     IN6_IS_ADDR_MULTICAST(&pi->attr->mp_nexthop_global))) { +		/* IPv6 nexthop is in MP-BGP encoding format. +		 * Format of address is not valid +		 * Fallback to the per VRF label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	/* Check the next-hop reachability. +	 * Get the bgp instance where the bgp_path_info originates. +	 */ +	if (pi->extra && pi->extra->bgp_orig) +		bgp_nexthop = pi->extra->bgp_orig; +	else +		bgp_nexthop = from_bgp; + +	nh_afi = BGP_ATTR_NH_AFI(afi, pi->attr); +	nh_valid = bgp_find_or_add_nexthop(from_bgp, bgp_nexthop, nh_afi, safi, +					   pi, NULL, 0, NULL); + +	if (!nh_valid && is_bgp_static_route && +	    !CHECK_FLAG(from_bgp->flags, BGP_FLAG_IMPORT_CHECK)) { +		/* "network" prefixes not routable, but since 'no bgp network +		 * import-check' is configured, they are always valid in the BGP +		 * table. Fallback to the per-vrf label +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	if (!nh_valid || !pi->nexthop || pi->nexthop->nexthop_num == 0 || +	    !pi->nexthop->nexthop) { +		/* invalid next-hop: +		 * do not send the per-vrf label +		 * otherwise, when the next-hop becomes valid, +		 * we will have 2 BGP updates: +		 * - one with the per-vrf label +		 * - the second with the per-nexthop label +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return MPLS_INVALID_LABEL; +	} + +	if (pi->nexthop->nexthop_num > 1 || +	    pi->nexthop->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { +		/* Blackhole or ECMP routes +		 * is not compatible with per-nexthop label. +		 * Fallback to per-vrf label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	return _vpn_leak_from_vrf_get_per_nexthop_label(pi, to_bgp, from_bgp, +							afi, safi); +} +  /* cf vnc_import_bgp_add_route_mode_nvegroup() and add_vnc_route() */  void vpn_leak_from_vrf_update(struct bgp *to_bgp,	     /* to */  			      struct bgp *from_bgp,	   /* from */ @@ -1528,12 +1789,32 @@ void vpn_leak_from_vrf_update(struct bgp *to_bgp,	     /* to */  		nexthop_self_flag = 1;  	} -	label_val = from_bgp->vpn_policy[afi].tovpn_label; -	if (label_val == MPLS_LABEL_NONE) { +	if (CHECK_FLAG(from_bgp->vpn_policy[afi].flags, +		       BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) +		/* per nexthop label mode */ +		label_val = vpn_leak_from_vrf_get_per_nexthop_label( +			afi, safi, path_vrf, from_bgp, to_bgp); +	else +		/* per VRF label mode */ +		label_val = from_bgp->vpn_policy[afi].tovpn_label; + +	if (label_val == MPLS_INVALID_LABEL && +	    CHECK_FLAG(from_bgp->vpn_policy[afi].flags, +		       BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) { +		/* no valid label for the moment +		 * when the 'bgp_mplsvpn_get_label_per_nexthop_cb' callback gets +		 * a valid label value, it will call the current function again. +		 */ +		if (debug) +			zlog_debug( +				"%s: %s skipping: waiting for a valid per-label nexthop.", +				__func__, from_bgp->name_pretty); +		return; +	} +	if (label_val == MPLS_LABEL_NONE)  		encode_label(MPLS_LABEL_IMPLICIT_NULL, &label); -	} else { +	else  		encode_label(label_val, &label); -	}  	/* Set originator ID to "me" */  	SET_FLAG(static_attr.flag, ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID)); @@ -1770,6 +2051,8 @@ void vpn_leak_from_vrf_withdraw_all(struct bgp *to_bgp, struct bgp *from_bgp,  						bpi, afi, safi);  					bgp_path_info_delete(bn, bpi);  					bgp_process(to_bgp, bn, afi, safi); +					bgp_mplsvpn_path_nh_label_unlink( +						bpi->extra->parent);  				}  			}  		} diff --git a/bgpd/bgp_mplsvpn.h b/bgpd/bgp_mplsvpn.h index c832b4abd4..75758edcc2 100644 --- a/bgpd/bgp_mplsvpn.h +++ b/bgpd/bgp_mplsvpn.h @@ -31,6 +31,7 @@  #define BGP_PREFIX_SID_SRV6_MAX_FUNCTION_LENGTH 20  extern void bgp_mplsvpn_init(void); +extern void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi);  extern int bgp_nlri_parse_vpn(struct peer *, struct attr *, struct bgp_nlri *);  extern uint32_t decode_label(mpls_label_t *);  extern void encode_label(mpls_label_t, mpls_label_t *); diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 1c79d7d03b..c878512389 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -31,6 +31,7 @@  #include "bgpd/bgp_fsm.h"  #include "bgpd/bgp_vty.h"  #include "bgpd/bgp_rd.h" +#include "bgpd/bgp_mplsvpn.h"  DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Addr Intf String"); @@ -119,6 +120,8 @@ static void bgp_nexthop_cache_reset(struct bgp_nexthop_cache_head *tree)  		while (!LIST_EMPTY(&(bnc->paths))) {  			struct bgp_path_info *path = LIST_FIRST(&(bnc->paths)); +			bgp_mplsvpn_path_nh_label_unlink(path); +  			path_nh_map(path, bnc, false);  		} diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index a294ebcc63..bda163d7a5 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -31,6 +31,7 @@  #include "bgpd/bgp_flowspec_util.h"  #include "bgpd/bgp_evpn.h"  #include "bgpd/bgp_rd.h" +#include "bgpd/bgp_mplsvpn.h"  extern struct zclient *zclient; @@ -149,6 +150,8 @@ void bgp_unlink_nexthop(struct bgp_path_info *path)  {  	struct bgp_nexthop_cache *bnc = path->nexthop; +	bgp_mplsvpn_path_nh_label_unlink(path); +  	if (!bnc)  		return; @@ -1134,10 +1137,21 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)  	}  	LIST_FOREACH (path, &(bnc->paths), nh_thread) { -		if (!(path->type == ZEBRA_ROUTE_BGP -		      && ((path->sub_type == BGP_ROUTE_NORMAL) -			  || (path->sub_type == BGP_ROUTE_STATIC) -			  || (path->sub_type == BGP_ROUTE_IMPORTED)))) +		if (path->type == ZEBRA_ROUTE_BGP && +		    (path->sub_type == BGP_ROUTE_NORMAL || +		     path->sub_type == BGP_ROUTE_STATIC || +		     path->sub_type == BGP_ROUTE_IMPORTED)) +			/* evaluate the path */ +			; +		else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE) { +			/* evaluate the path for redistributed routes +			 * except those from VNC +			 */ +			if ((path->type == ZEBRA_ROUTE_VNC) || +			    (path->type == ZEBRA_ROUTE_VNC_DIRECT)) +				continue; +		} else +			/* don't evaluate the path */  			continue;  		dest = path->net; @@ -1230,7 +1244,26 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)  			SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);  		path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID); -		if (path_valid != bnc_is_valid_nexthop) { +		if (path->type == ZEBRA_ROUTE_BGP && +		    path->sub_type == BGP_ROUTE_STATIC && +		    !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK)) +			/* static routes with 'no bgp network import-check' are +			 * always valid. if nht is called with static routes, +			 * the vpn exportation needs to be triggered +			 */ +			vpn_leak_from_vrf_update(bgp_get_default(), bgp_path, +						 path); +		else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE && +			 safi == SAFI_UNICAST && +			 (bgp_path->inst_type == BGP_INSTANCE_TYPE_VRF || +			  bgp_path->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) +			/* redistribute routes are always valid +			 * if nht is called with redistribute routes, the vpn +			 * exportation needs to be triggered +			 */ +			vpn_leak_from_vrf_update(bgp_get_default(), bgp_path, +						 path); +		else if (path_valid != bnc_is_valid_nexthop) {  			if (path_valid) {  				/* No longer valid, clear flag; also for EVPN  				 * routes, unimport from VRFs if needed. @@ -1243,6 +1276,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)  				    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))  					bgp_evpn_unimport_route(bgp_path,  						afi, safi, bgp_dest_get_prefix(dest), path); +				if (safi == SAFI_UNICAST && +				    (bgp_path->inst_type != +				     BGP_INSTANCE_TYPE_VIEW)) +					vpn_leak_from_vrf_withdraw( +						bgp_get_default(), bgp_path, +						path);  			} else {  				/* Path becomes valid, set flag; also for EVPN  				 * routes, import from VRFs if needed. @@ -1255,6 +1294,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)  				    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))  					bgp_evpn_import_route(bgp_path,  						afi, safi, bgp_dest_get_prefix(dest), path); +				if (safi == SAFI_UNICAST && +				    (bgp_path->inst_type != +				     BGP_INSTANCE_TYPE_VIEW)) +					vpn_leak_from_vrf_update( +						bgp_get_default(), bgp_path, +						path);  			}  		} diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index b51396c8d1..f5ead66f25 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -8676,12 +8676,16 @@ void bgp_redistribute_add(struct bgp *bgp, struct prefix *p,  	 */  	assert(attr.aspath); +	if (p->family == AF_INET6) +		UNSET_FLAG(attr.flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP)); +  	switch (nhtype) {  	case NEXTHOP_TYPE_IFINDEX:  		switch (p->family) {  		case AF_INET:  			attr.nexthop.s_addr = INADDR_ANY;  			attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; +			attr.mp_nexthop_global_in.s_addr = INADDR_ANY;  			break;  		case AF_INET6:  			memset(&attr.mp_nexthop_global, 0, @@ -8694,6 +8698,7 @@ void bgp_redistribute_add(struct bgp *bgp, struct prefix *p,  	case NEXTHOP_TYPE_IPV4_IFINDEX:  		attr.nexthop = nexthop->ipv4;  		attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; +		attr.mp_nexthop_global_in = nexthop->ipv4;  		break;  	case NEXTHOP_TYPE_IPV6:  	case NEXTHOP_TYPE_IPV6_IFINDEX: @@ -8705,6 +8710,7 @@ void bgp_redistribute_add(struct bgp *bgp, struct prefix *p,  		case AF_INET:  			attr.nexthop.s_addr = INADDR_ANY;  			attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; +			attr.mp_nexthop_global_in.s_addr = INADDR_ANY;  			break;  		case AF_INET6:  			memset(&attr.mp_nexthop_global, 0, diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index a64144b625..fbdd5fae7d 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -319,6 +319,12 @@ struct bgp_path_info {  	/* Addpath identifiers */  	uint32_t addpath_rx_id;  	struct bgp_addpath_info_data tx_addpath; + +	/* For nexthop per label linked list */ +	LIST_ENTRY(bgp_path_info) label_nh_thread; + +	/* Back pointer to the bgp label per nexthop structure */ +	struct bgp_label_per_nexthop_cache *label_nexthop_cache;  };  /* Structure used in BGP path selection */ diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index ccf198c392..1be44adde8 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -9183,6 +9183,63 @@ ALIAS (af_rd_vpn_export,         "Between current address-family and vpn\n"         "For routes leaked from current address-family to vpn\n") +DEFPY(af_label_vpn_export_allocation_mode, +      af_label_vpn_export_allocation_mode_cmd, +      "[no$no] label vpn export allocation-mode <per-vrf$label_per_vrf|per-nexthop$label_per_nh>", +      NO_STR +      "label value for VRF\n" +      "Between current address-family and vpn\n" +      "For routes leaked from current address-family to vpn\n" +      "Label allocation mode\n" +      "Allocate one label for all BGP updates of the VRF\n" +      "Allocate a label per connected next-hop in the VRF\n") +{ +	VTY_DECLVAR_CONTEXT(bgp, bgp); +	afi_t afi; +	bool old_per_nexthop, new_per_nexthop; + +	afi = vpn_policy_getafi(vty, bgp, false); + +	old_per_nexthop = !!CHECK_FLAG(bgp->vpn_policy[afi].flags, +				       BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP); +	if (no) { +		if (old_per_nexthop == false && label_per_nh) +			return CMD_ERR_NO_MATCH; +		if (old_per_nexthop == true && label_per_vrf) +			return CMD_ERR_NO_MATCH; +		new_per_nexthop = false; +	} else { +		if (label_per_nh) +			new_per_nexthop = true; +		else +			new_per_nexthop = false; +	} + +	/* no change */ +	if (old_per_nexthop == new_per_nexthop) +		return CMD_SUCCESS; + +	/* +	 * pre-change: un-export vpn routes (vpn->vrf routes unaffected) +	 */ +	vpn_leak_prechange(BGP_VPN_POLICY_DIR_TOVPN, afi, bgp_get_default(), +			   bgp); + +	if (new_per_nexthop) +		SET_FLAG(bgp->vpn_policy[afi].flags, +			 BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP); +	else +		UNSET_FLAG(bgp->vpn_policy[afi].flags, +			   BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP); + +	/* post-change: re-export vpn routes */ +	vpn_leak_postchange(BGP_VPN_POLICY_DIR_TOVPN, afi, bgp_get_default(), +			    bgp); + +	hook_call(bgp_snmp_update_last_changed, bgp); +	return CMD_SUCCESS; +} +  DEFPY (af_label_vpn_export,         af_label_vpn_export_cmd,         "[no] label vpn export <(0-1048575)$label_val|auto$label_auto>", @@ -17300,6 +17357,12 @@ static void bgp_vpn_policy_config_write_afi(struct vty *vty, struct bgp *bgp,  		}  	} +	if (CHECK_FLAG(bgp->vpn_policy[afi].flags, +		       BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) +		vty_out(vty, +			"%*slabel vpn export allocation-mode per-nexthop\n", +			indent, ""); +  	tovpn_sid_index = bgp->vpn_policy[afi].tovpn_sid_index;  	if (CHECK_FLAG(bgp->vpn_policy[afi].flags,  		       BGP_VPN_POLICY_TOVPN_SID_AUTO)) { @@ -20473,6 +20536,10 @@ void bgp_vty_init(void)  	install_element(BGP_IPV6_NODE, &af_rd_vpn_export_cmd);  	install_element(BGP_IPV4_NODE, &af_label_vpn_export_cmd);  	install_element(BGP_IPV6_NODE, &af_label_vpn_export_cmd); +	install_element(BGP_IPV4_NODE, +			&af_label_vpn_export_allocation_mode_cmd); +	install_element(BGP_IPV6_NODE, +			&af_label_vpn_export_allocation_mode_cmd);  	install_element(BGP_IPV4_NODE, &af_nexthop_vpn_export_cmd);  	install_element(BGP_IPV6_NODE, &af_nexthop_vpn_export_cmd);  	install_element(BGP_IPV4_NODE, &af_rt_vpn_imexport_cmd); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 96b1f3e00f..1965cd2704 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -3911,3 +3911,32 @@ int bgp_zebra_srv6_manager_release_locator_chunk(const char *name)  {  	return srv6_manager_release_locator_chunk(zclient, name);  } + +void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label, +				  ifindex_t ifindex, vrf_id_t vrf_id, +				  enum lsp_types_t ltype, struct prefix *p) +{ +	struct zapi_labels zl = {}; +	struct zapi_nexthop *znh; + +	zl.type = ltype; +	zl.local_label = label; +	zl.nexthop_num = 1; +	znh = &zl.nexthops[0]; +	if (p->family == AF_INET) +		IPV4_ADDR_COPY(&znh->gate.ipv4, &p->u.prefix4); +	else +		IPV6_ADDR_COPY(&znh->gate.ipv6, &p->u.prefix6); +	if (ifindex == IFINDEX_INTERNAL) +		znh->type = (p->family == AF_INET) ? NEXTHOP_TYPE_IPV4 +						   : NEXTHOP_TYPE_IPV6; +	else +		znh->type = (p->family == AF_INET) ? NEXTHOP_TYPE_IPV4_IFINDEX +						   : NEXTHOP_TYPE_IPV6_IFINDEX; +	znh->ifindex = ifindex; +	znh->vrf_id = vrf_id; +	znh->label_num = 0; + +	/* vrf_id is DEFAULT_VRF */ +	zebra_send_mpls_labels(zclient, cmd, &zl); +} diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index b09be890e5..7c85d86b31 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -118,4 +118,8 @@ extern int bgp_zebra_update(struct bgp *bgp, afi_t afi, safi_t safi,  extern int bgp_zebra_stale_timer_update(struct bgp *bgp);  extern int bgp_zebra_srv6_manager_get_locator_chunk(const char *name);  extern int bgp_zebra_srv6_manager_release_locator_chunk(const char *name); +extern void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label, +					 ifindex_t index, vrf_id_t vrfid, +					 enum lsp_types_t ltype, +					 struct prefix *p);  #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 9d7a1f967e..42ad8a5632 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -3354,6 +3354,11 @@ static struct bgp *bgp_create(as_t *as, const char *name,  		SET_FLAG(bgp->af_flags[afi][SAFI_MPLS_VPN],  			 BGP_VPNVX_RETAIN_ROUTE_TARGET_ALL);  	} + +	for (afi = AFI_IP; afi < AFI_MAX; afi++) +		bgp_label_per_nexthop_cache_init( +			&bgp->mpls_labels_per_nexthop[afi]); +  	if (name)  		bgp->name = XSTRDUP(MTYPE_BGP, name); @@ -8251,6 +8256,8 @@ void bgp_init(unsigned short instance)  	bgp_lp_vty_init(); +	bgp_label_per_nexthop_init(); +  	cmd_variable_handler_register(bgp_viewvrf_var_handlers);  } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index c3cb6ba91e..68b32b5945 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -211,6 +211,7 @@ struct vpn_policy {  #define BGP_VPN_POLICY_TOVPN_RD_SET            (1 << 1)  #define BGP_VPN_POLICY_TOVPN_NEXTHOP_SET       (1 << 2)  #define BGP_VPN_POLICY_TOVPN_SID_AUTO          (1 << 3) +#define BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP (1 << 4)  	/*  	 * If we are importing another vrf into us keep a list of @@ -573,6 +574,10 @@ struct bgp {  	/* Allocate MPLS labels */  	uint8_t allocate_mpls_labels[AFI_MAX][SAFI_MAX]; +	/* Tree for next-hop lookup cache. */ +	struct bgp_label_per_nexthop_cache_head +		mpls_labels_per_nexthop[AFI_MAX]; +  	/* Allocate hash entries to store policy routing information  	 * The hash are used to host pbr rules somewhere.  	 * Actually, pbr will only be used by flowspec  | 
