diff options
Diffstat (limited to 'bgpd/bgp_mplsvpn.c')
| -rw-r--r-- | bgpd/bgp_mplsvpn.c | 303 | 
1 files changed, 293 insertions, 10 deletions
diff --git a/bgpd/bgp_mplsvpn.c b/bgpd/bgp_mplsvpn.c index 63168f1e7a..ecc84533b0 100644 --- a/bgpd/bgp_mplsvpn.c +++ b/bgpd/bgp_mplsvpn.c @@ -1116,12 +1116,14 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn,  	/*  	 * Routes that are redistributed into BGP from zebra do not get -	 * nexthop tracking. However, if those routes are subsequently -	 * imported to other RIBs within BGP, the leaked routes do not -	 * carry the original BGP_ROUTE_REDISTRIBUTE sub_type. Therefore, -	 * in order to determine if the route we are currently leaking -	 * should have nexthop tracking, we must find the ultimate -	 * parent so we can check its sub_type. +	 * nexthop tracking, unless MPLS allocation per nexthop is +	 * performed. In the default case nexthop tracking does not apply, +	 * if those routes are subsequently imported to other RIBs within +	 * BGP, the leaked routes do not carry the original +	 * BGP_ROUTE_REDISTRIBUTE sub_type. Therefore, in order to determine +	 * if the route we are currently leaking should have nexthop +	 * tracking, we must find the ultimate parent so we can check its +	 * sub_type.  	 *  	 * As of now, source_bpi may at most be a second-generation route  	 * (only one hop back to ultimate parent for vrf-vpn-vrf scheme). @@ -1336,6 +1338,265 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn,  	return new;  } +void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi) +{ +	struct bgp_label_per_nexthop_cache *blnc; + +	if (!pi) +		return; + +	blnc = pi->label_nexthop_cache; + +	if (!blnc) +		return; + +	LIST_REMOVE(pi, label_nh_thread); +	pi->label_nexthop_cache->path_count--; +	pi->label_nexthop_cache = NULL; + +	if (LIST_EMPTY(&(blnc->paths))) +		bgp_label_per_nexthop_free(blnc); +} + +/* Called upon reception of a ZAPI Message from zebra, about + * a new available label. + */ +static int bgp_mplsvpn_get_label_per_nexthop_cb(mpls_label_t label, +						void *context, bool allocated) +{ +	struct bgp_label_per_nexthop_cache *blnc = context; +	mpls_label_t old_label; +	int debug = BGP_DEBUG(vpn, VPN_LEAK_LABEL); +	struct bgp_path_info *pi; +	struct bgp_table *table; + +	old_label = blnc->label; + +	if (debug) +		zlog_debug("%s: label=%u, allocated=%d, nexthop=%pFX", __func__, +			   label, allocated, &blnc->nexthop); +	if (allocated) +		/* update the entry with the new label */ +		blnc->label = label; +	else +		/* +		 * previously-allocated label is now invalid +		 * eg: zebra deallocated the labels and notifies it +		 */ +		blnc->label = MPLS_INVALID_LABEL; + +	if (old_label == blnc->label) +		return 0; /* no change */ + +	/* update paths */ +	if (blnc->label != MPLS_INVALID_LABEL) +		bgp_zebra_send_nexthop_label( +			ZEBRA_MPLS_LABELS_ADD, blnc->label, blnc->nh->ifindex, +			blnc->nh->vrf_id, ZEBRA_LSP_BGP, &blnc->nexthop); + +	LIST_FOREACH (pi, &(blnc->paths), label_nh_thread) { +		if (!pi->net) +			continue; +		table = bgp_dest_table(pi->net); +		if (!table) +			continue; +		vpn_leak_from_vrf_update(blnc->to_bgp, table->bgp, pi); +	} + +	return 0; +} + +/* Get a per label nexthop value: + *  - Find and return a per label nexthop from the cache + *  - else allocate a new per label nexthop cache entry and request a + *    label to zebra. Return MPLS_INVALID_LABEL + */ +static mpls_label_t _vpn_leak_from_vrf_get_per_nexthop_label( +	struct bgp_path_info *pi, struct bgp *to_bgp, struct bgp *from_bgp, +	afi_t afi, safi_t safi) +{ +	struct bgp_nexthop_cache *bnc = pi->nexthop; +	struct bgp_label_per_nexthop_cache *blnc; +	struct bgp_label_per_nexthop_cache_head *tree; +	struct prefix *nh_pfx = NULL; +	struct prefix nh_gate = {0}; + +	/* extract the nexthop from the BNC nexthop cache */ +	switch (bnc->nexthop->type) { +	case NEXTHOP_TYPE_IPV4: +	case NEXTHOP_TYPE_IPV4_IFINDEX: +		/* the nexthop is recursive */ +		nh_gate.family = AF_INET; +		nh_gate.prefixlen = IPV4_MAX_BITLEN; +		IPV4_ADDR_COPY(&nh_gate.u.prefix4, &bnc->nexthop->gate.ipv4); +		nh_pfx = &nh_gate; +		break; +	case NEXTHOP_TYPE_IPV6: +	case NEXTHOP_TYPE_IPV6_IFINDEX: +		/* the nexthop is recursive */ +		nh_gate.family = AF_INET6; +		nh_gate.prefixlen = IPV6_MAX_BITLEN; +		IPV6_ADDR_COPY(&nh_gate.u.prefix6, &bnc->nexthop->gate.ipv6); +		nh_pfx = &nh_gate; +		break; +	case NEXTHOP_TYPE_IFINDEX: +		/* the nexthop is direcly connected */ +		nh_pfx = &bnc->prefix; +		break; +	case NEXTHOP_TYPE_BLACKHOLE: +		assert(!"Blackhole nexthop. Already checked by the caller."); +	} + +	/* find or allocate a nexthop label cache entry */ +	tree = &from_bgp->mpls_labels_per_nexthop[family2afi(nh_pfx->family)]; +	blnc = bgp_label_per_nexthop_find(tree, nh_pfx); +	if (!blnc) { +		blnc = bgp_label_per_nexthop_new(tree, nh_pfx); +		blnc->to_bgp = to_bgp; +		/* request a label to zebra for this nexthop +		 * the response from zebra will trigger the callback +		 */ +		bgp_lp_get(LP_TYPE_NEXTHOP, blnc, +			   bgp_mplsvpn_get_label_per_nexthop_cb); +	} + +	if (pi->label_nexthop_cache == blnc) +		/* no change */ +		return blnc->label; + +	/* Unlink from any existing nexthop cache. Free the entry if unused. +	 */ +	bgp_mplsvpn_path_nh_label_unlink(pi); +	if (blnc) { +		/* updates NHT pi list reference */ +		LIST_INSERT_HEAD(&(blnc->paths), pi, label_nh_thread); +		pi->label_nexthop_cache = blnc; +		pi->label_nexthop_cache->path_count++; +		blnc->last_update = monotime(NULL); +	} + +	/* then add or update the selected nexthop */ +	if (!blnc->nh) +		blnc->nh = nexthop_dup(bnc->nexthop, NULL); +	else if (!nexthop_same(bnc->nexthop, blnc->nh)) { +		nexthop_free(blnc->nh); +		blnc->nh = nexthop_dup(bnc->nexthop, NULL); +		if (blnc->label != MPLS_INVALID_LABEL) { +			bgp_zebra_send_nexthop_label( +				ZEBRA_MPLS_LABELS_REPLACE, blnc->label, +				bnc->nexthop->ifindex, bnc->nexthop->vrf_id, +				ZEBRA_LSP_BGP, &blnc->nexthop); +		} +	} + +	return blnc->label; +} + +/* Filter out all the cases where a per nexthop label is not possible: + * - return an invalid label when the nexthop is invalid + * - return the per VRF label when the per nexthop label is not supported + * Otherwise, find or request a per label nexthop. + */ +static mpls_label_t vpn_leak_from_vrf_get_per_nexthop_label( +	afi_t afi, safi_t safi, struct bgp_path_info *pi, struct bgp *from_bgp, +	struct bgp *to_bgp) +{ +	struct bgp_path_info *bpi_ultimate = bgp_get_imported_bpi_ultimate(pi); +	struct bgp *bgp_nexthop = NULL; +	bool nh_valid; +	afi_t nh_afi; +	bool is_bgp_static_route; + +	is_bgp_static_route = bpi_ultimate->sub_type == BGP_ROUTE_STATIC && +			      bpi_ultimate->type == ZEBRA_ROUTE_BGP; + +	if (is_bgp_static_route == false && afi == AFI_IP && +	    CHECK_FLAG(pi->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP)) && +	    (pi->attr->nexthop.s_addr == INADDR_ANY || +	     !ipv4_unicast_valid(&pi->attr->nexthop))) { +		/* IPv4 nexthop in standard BGP encoding format. +		 * Format of address is not valid (not any, not unicast). +		 * Fallback to the per VRF label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	if (is_bgp_static_route == false && afi == AFI_IP && +	    pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV4 && +	    (pi->attr->mp_nexthop_global_in.s_addr == INADDR_ANY || +	     !ipv4_unicast_valid(&pi->attr->mp_nexthop_global_in))) { +		/* IPv4 nexthop is in MP-BGP encoding format. +		 * Format of address is not valid (not any, not unicast). +		 * Fallback to the per VRF label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	if (is_bgp_static_route == false && afi == AFI_IP6 && +	    (pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL || +	     pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) && +	    (IN6_IS_ADDR_UNSPECIFIED(&pi->attr->mp_nexthop_global) || +	     IN6_IS_ADDR_LOOPBACK(&pi->attr->mp_nexthop_global) || +	     IN6_IS_ADDR_MULTICAST(&pi->attr->mp_nexthop_global))) { +		/* IPv6 nexthop is in MP-BGP encoding format. +		 * Format of address is not valid +		 * Fallback to the per VRF label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	/* Check the next-hop reachability. +	 * Get the bgp instance where the bgp_path_info originates. +	 */ +	if (pi->extra && pi->extra->bgp_orig) +		bgp_nexthop = pi->extra->bgp_orig; +	else +		bgp_nexthop = from_bgp; + +	nh_afi = BGP_ATTR_NH_AFI(afi, pi->attr); +	nh_valid = bgp_find_or_add_nexthop(from_bgp, bgp_nexthop, nh_afi, safi, +					   pi, NULL, 0, NULL); + +	if (!nh_valid && is_bgp_static_route && +	    !CHECK_FLAG(from_bgp->flags, BGP_FLAG_IMPORT_CHECK)) { +		/* "network" prefixes not routable, but since 'no bgp network +		 * import-check' is configured, they are always valid in the BGP +		 * table. Fallback to the per-vrf label +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	if (!nh_valid || !pi->nexthop || pi->nexthop->nexthop_num == 0 || +	    !pi->nexthop->nexthop) { +		/* invalid next-hop: +		 * do not send the per-vrf label +		 * otherwise, when the next-hop becomes valid, +		 * we will have 2 BGP updates: +		 * - one with the per-vrf label +		 * - the second with the per-nexthop label +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return MPLS_INVALID_LABEL; +	} + +	if (pi->nexthop->nexthop_num > 1 || +	    pi->nexthop->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { +		/* Blackhole or ECMP routes +		 * is not compatible with per-nexthop label. +		 * Fallback to per-vrf label. +		 */ +		bgp_mplsvpn_path_nh_label_unlink(pi); +		return from_bgp->vpn_policy[afi].tovpn_label; +	} + +	return _vpn_leak_from_vrf_get_per_nexthop_label(pi, to_bgp, from_bgp, +							afi, safi); +} +  /* cf vnc_import_bgp_add_route_mode_nvegroup() and add_vnc_route() */  void vpn_leak_from_vrf_update(struct bgp *to_bgp,	     /* to */  			      struct bgp *from_bgp,	   /* from */ @@ -1528,12 +1789,32 @@ void vpn_leak_from_vrf_update(struct bgp *to_bgp,	     /* to */  		nexthop_self_flag = 1;  	} -	label_val = from_bgp->vpn_policy[afi].tovpn_label; -	if (label_val == MPLS_LABEL_NONE) { +	if (CHECK_FLAG(from_bgp->vpn_policy[afi].flags, +		       BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) +		/* per nexthop label mode */ +		label_val = vpn_leak_from_vrf_get_per_nexthop_label( +			afi, safi, path_vrf, from_bgp, to_bgp); +	else +		/* per VRF label mode */ +		label_val = from_bgp->vpn_policy[afi].tovpn_label; + +	if (label_val == MPLS_INVALID_LABEL && +	    CHECK_FLAG(from_bgp->vpn_policy[afi].flags, +		       BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) { +		/* no valid label for the moment +		 * when the 'bgp_mplsvpn_get_label_per_nexthop_cb' callback gets +		 * a valid label value, it will call the current function again. +		 */ +		if (debug) +			zlog_debug( +				"%s: %s skipping: waiting for a valid per-label nexthop.", +				__func__, from_bgp->name_pretty); +		return; +	} +	if (label_val == MPLS_LABEL_NONE)  		encode_label(MPLS_LABEL_IMPLICIT_NULL, &label); -	} else { +	else  		encode_label(label_val, &label); -	}  	/* Set originator ID to "me" */  	SET_FLAG(static_attr.flag, ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID)); @@ -1770,6 +2051,8 @@ void vpn_leak_from_vrf_withdraw_all(struct bgp *to_bgp, struct bgp *from_bgp,  						bpi, afi, safi);  					bgp_path_info_delete(bn, bpi);  					bgp_process(to_bgp, bn, afi, safi); +					bgp_mplsvpn_path_nh_label_unlink( +						bpi->extra->parent);  				}  			}  		}  | 
