From 92d5e31acee8381b7841e9554d82fb845e31c48f Mon Sep 17 00:00:00 2001
From: Philippe Guibert <philippe.guibert@6wind.com>
Date: Tue, 28 Feb 2023 14:25:02 +0100
Subject: bgpd: add support for l3vpn per-nexthop label

This commit introduces a new method to associate a label to
prefixes to export to a VPNv4 backbone. All the methods to
associate a label to a BGP update is documented in rfc4364,
chapter 4.3.2. Initially, the "single label for an entire
VRF" method was available. This commit adds "single label
for each attachment circuit" method.

The change impacts the control-plane, because each BGP update
is checked to know if the nexthop has reachability in the VRF
or not. If this is the case, then a unique label for a given
destination IP in the VRF will be picked up. This label will
be reused for an other BGP update that will have the same
nexthop IP address.

The change impacts the data-plane, because the MPLs pop
mechanism applied to incoming labelled packets changes: the
MPLS label is popped, and the packet is directly sent to the
connected nexthop described in the previous outgoing BGP VPN
update.

By default per-vrf mode is done, but the user may choose
the per-nexthop mode, by using the vty command from the
previous commit. In the latter case, a per-vrf label
will however be allocated to handle networks that are not directly
connected. This is the case for local traffic for instance.

The change also include the following:

-  ECMP case
In case a route is learnt in a given VRF, and is resolved via an
ECMP nexthop. This implies that when exporting the route as a BGP
update, if label allocation per nexthop is used, then two possible
MPLS values could be picked up, which is not possible with the
current implementation. Actually, the NLRI for VPNv4 stores one
prefix, and one single label value, not two. Today, RFC8277 with
multiple label capability is not yet available.
To avoid this corner case, when a route is resolved via more than one
nexthop, the label allocation per nexthop will not apply, and the
default per-vrf label will be chosen.
Let us imagine BGP redistributes a static route using the `172.31.0.20`
nexthop. The nexthop resolution will find two different nexthops fo a
unique BGP update.

 > r1# show running-config
 > [..]
 > vrf vrf1
 >  ip route 172.31.0.30/32 172.31.0.20
 > r1# show bgp vrf vrf1 nexthop
 > [..]
 > 172.31.0.20 valid [IGP metric 0], #paths 1
 >  gate 192.0.2.11
 >  gate 192.0.2.12
 >  Last update: Mon Jan 16 09:27:09 2023
 >  Paths:
 >    1/1 172.31.0.30/32 VRF vrf1 flags 0x20018

To avoid this situation, BGP updates that resolve over multiple
nexthops are using the unique per-vrf label.

- recursive route case

Prefixes that need a recursive route to be resolved can
also be eligible for mpls allocation per nexthop. In that
case, the nexthop will be the recursive nexthop calculated.

To achieve this, all nexthop types in bnc contexts are valid,
except for the blackhole nexthops.

- network declared prefixes

Nexthop tracking is used to look for the reachability of the
prefixes. When the the 'no bgp network import-check' command
is used, network declared prefixes are maintained active,
even if there is no active nexthop.

Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
---
 bgpd/bgp_nht.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'bgpd/bgp_nht.c')

diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c
index 473c95071c..f3d9bd1a82 100644
--- a/bgpd/bgp_nht.c
+++ b/bgpd/bgp_nht.c
@@ -31,6 +31,7 @@
 #include "bgpd/bgp_flowspec_util.h"
 #include "bgpd/bgp_evpn.h"
 #include "bgpd/bgp_rd.h"
+#include "bgpd/bgp_mplsvpn.h"
 
 extern struct zclient *zclient;
 
@@ -149,6 +150,8 @@ void bgp_unlink_nexthop(struct bgp_path_info *path)
 {
 	struct bgp_nexthop_cache *bnc = path->nexthop;
 
+	bgp_mplsvpn_path_nh_label_unlink(path);
+
 	if (!bnc)
 		return;
 
@@ -1230,7 +1233,16 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
 			SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
 
 		path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
-		if (path_valid != bnc_is_valid_nexthop) {
+		if (path->type == ZEBRA_ROUTE_BGP &&
+		    path->sub_type == BGP_ROUTE_STATIC &&
+		    !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK))
+			/* static routes with 'no bgp network import-check' are
+			 * always valid. if nht is called with static routes,
+			 * the vpn exportation needs to be triggered
+			 */
+			vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
+						 path);
+		else if (path_valid != bnc_is_valid_nexthop) {
 			if (path_valid) {
 				/* No longer valid, clear flag; also for EVPN
 				 * routes, unimport from VRFs if needed.
@@ -1243,6 +1255,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
 				    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
 					bgp_evpn_unimport_route(bgp_path,
 						afi, safi, bgp_dest_get_prefix(dest), path);
+				if (safi == SAFI_UNICAST &&
+				    (bgp_path->inst_type !=
+				     BGP_INSTANCE_TYPE_VIEW))
+					vpn_leak_from_vrf_withdraw(
+						bgp_get_default(), bgp_path,
+						path);
 			} else {
 				/* Path becomes valid, set flag; also for EVPN
 				 * routes, import from VRFs if needed.
@@ -1255,6 +1273,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
 				    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
 					bgp_evpn_import_route(bgp_path,
 						afi, safi, bgp_dest_get_prefix(dest), path);
+				if (safi == SAFI_UNICAST &&
+				    (bgp_path->inst_type !=
+				     BGP_INSTANCE_TYPE_VIEW))
+					vpn_leak_from_vrf_update(
+						bgp_get_default(), bgp_path,
+						path);
 			}
 		}
 
-- 
cgit v1.2.3