summaryrefslogtreecommitdiff
path: root/bgpd
diff options
context:
space:
mode:
authorDonatas Abraitis <donatas@opensourcerouting.org>2023-05-02 18:36:45 +0300
committerGitHub <noreply@github.com>2023-05-02 18:36:45 +0300
commit99a1ab0b2150ae82f0e831dc37d1b91da763318d (patch)
tree7d7b940739d53db7d37205b6a6782f703635c510 /bgpd
parentd0053da26b49166353c5f48c11fb11f6bea691d0 (diff)
parentdab2df8dc0d8a83edb796c2205491d7e6664c2dc (diff)
Merge pull request #12646 from pguibert6WIND/mpls_alloc_per_nh
MPLS allocation mode per next hop
Diffstat (limited to 'bgpd')
-rw-r--r--bgpd/bgp_labelpool.c185
-rw-r--r--bgpd/bgp_labelpool.h52
-rw-r--r--bgpd/bgp_mplsvpn.c303
-rw-r--r--bgpd/bgp_mplsvpn.h1
-rw-r--r--bgpd/bgp_nexthop.c3
-rw-r--r--bgpd/bgp_nht.c55
-rw-r--r--bgpd/bgp_route.c6
-rw-r--r--bgpd/bgp_route.h6
-rw-r--r--bgpd/bgp_vty.c67
-rw-r--r--bgpd/bgp_zebra.c29
-rw-r--r--bgpd/bgp_zebra.h4
-rw-r--r--bgpd/bgpd.c7
-rw-r--r--bgpd/bgpd.h5
13 files changed, 708 insertions, 15 deletions
diff --git a/bgpd/bgp_labelpool.c b/bgpd/bgp_labelpool.c
index 9943f57fb3..faddfc995f 100644
--- a/bgpd/bgp_labelpool.c
+++ b/bgpd/bgp_labelpool.c
@@ -23,6 +23,9 @@
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_errors.h"
#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_zebra.h"
+#include "bgpd/bgp_vty.h"
+#include "bgpd/bgp_rd.h"
#define BGP_LABELPOOL_ENABLE_TESTS 0
@@ -830,6 +833,16 @@ DEFUN(show_bgp_labelpool_ledger, show_bgp_labelpool_ledger_cmd,
lcb->label);
break;
+ case LP_TYPE_NEXTHOP:
+ if (uj) {
+ json_object_string_add(json_elem, "prefix",
+ "nexthop");
+ json_object_int_add(json_elem, "label",
+ lcb->label);
+ } else
+ vty_out(vty, "%-18s %u\n", "nexthop",
+ lcb->label);
+ break;
}
}
if (uj)
@@ -919,6 +932,15 @@ DEFUN(show_bgp_labelpool_inuse, show_bgp_labelpool_inuse_cmd,
vty_out(vty, "%-18s %u\n", "VRF",
label);
break;
+ case LP_TYPE_NEXTHOP:
+ if (uj) {
+ json_object_string_add(json_elem, "prefix",
+ "nexthop");
+ json_object_int_add(json_elem, "label", label);
+ } else
+ vty_out(vty, "%-18s %u\n", "nexthop",
+ label);
+ break;
}
}
if (uj)
@@ -991,6 +1013,13 @@ DEFUN(show_bgp_labelpool_requests, show_bgp_labelpool_requests_cmd,
else
vty_out(vty, "VRF\n");
break;
+ case LP_TYPE_NEXTHOP:
+ if (uj)
+ json_object_string_add(json_elem, "prefix",
+ "nexthop");
+ else
+ vty_out(vty, "Nexthop\n");
+ break;
}
}
if (uj)
@@ -1053,6 +1082,99 @@ DEFUN(show_bgp_labelpool_chunks, show_bgp_labelpool_chunks_cmd,
return CMD_SUCCESS;
}
+static void show_bgp_nexthop_label_afi(struct vty *vty, afi_t afi,
+ struct bgp *bgp, bool detail)
+{
+ struct bgp_label_per_nexthop_cache_head *tree;
+ struct bgp_label_per_nexthop_cache *iter;
+ safi_t safi;
+ void *src;
+ char buf[PREFIX2STR_BUFFER];
+ char labelstr[MPLS_LABEL_STRLEN];
+ struct bgp_dest *dest;
+ struct bgp_path_info *path;
+ struct bgp *bgp_path;
+ struct bgp_table *table;
+ time_t tbuf;
+
+ vty_out(vty, "Current BGP label nexthop cache for %s, VRF %s\n",
+ afi2str(afi), bgp->name_pretty);
+
+ tree = &bgp->mpls_labels_per_nexthop[afi];
+ frr_each (bgp_label_per_nexthop_cache, tree, iter) {
+ if (afi2family(afi) == AF_INET)
+ src = (void *)&iter->nexthop.u.prefix4;
+ else
+ src = (void *)&iter->nexthop.u.prefix6;
+
+ vty_out(vty, " %s, label %s #paths %u\n",
+ inet_ntop(afi2family(afi), src, buf, sizeof(buf)),
+ mpls_label2str(1, &iter->label, labelstr,
+ sizeof(labelstr), 0, true),
+ iter->path_count);
+ if (iter->nh)
+ vty_out(vty, " if %s\n",
+ ifindex2ifname(iter->nh->ifindex,
+ iter->nh->vrf_id));
+ tbuf = time(NULL) - (monotime(NULL) - iter->last_update);
+ vty_out(vty, " Last update: %s", ctime(&tbuf));
+ if (!detail)
+ continue;
+ vty_out(vty, " Paths:\n");
+ LIST_FOREACH (path, &(iter->paths), label_nh_thread) {
+ dest = path->net;
+ table = bgp_dest_table(dest);
+ assert(dest && table);
+ afi = family2afi(bgp_dest_get_prefix(dest)->family);
+ safi = table->safi;
+ bgp_path = table->bgp;
+
+ if (dest->pdest) {
+ vty_out(vty, " %d/%d %pBD RD ", afi, safi,
+ dest);
+
+ vty_out(vty, BGP_RD_AS_FORMAT(bgp->asnotation),
+ (struct prefix_rd *)bgp_dest_get_prefix(
+ dest->pdest));
+ vty_out(vty, " %s flags 0x%x\n",
+ bgp_path->name_pretty, path->flags);
+ } else
+ vty_out(vty, " %d/%d %pBD %s flags 0x%x\n",
+ afi, safi, dest, bgp_path->name_pretty,
+ path->flags);
+ }
+ }
+}
+
+DEFPY(show_bgp_nexthop_label, show_bgp_nexthop_label_cmd,
+ "show bgp [<view|vrf> VIEWVRFNAME] label-nexthop [detail]",
+ SHOW_STR BGP_STR BGP_INSTANCE_HELP_STR
+ "BGP label per-nexthop table\n"
+ "Show detailed information\n")
+{
+ int idx = 0;
+ char *vrf = NULL;
+ struct bgp *bgp;
+ bool detail = false;
+ int afi;
+
+ if (argv_find(argv, argc, "vrf", &idx)) {
+ vrf = argv[++idx]->arg;
+ bgp = bgp_lookup_by_name(vrf);
+ } else
+ bgp = bgp_get_default();
+
+ if (!bgp)
+ return CMD_SUCCESS;
+
+ if (argv_find(argv, argc, "detail", &idx))
+ detail = true;
+
+ for (afi = AFI_IP; afi <= AFI_IP6; afi++)
+ show_bgp_nexthop_label_afi(vty, afi, bgp, detail);
+ return CMD_SUCCESS;
+}
+
#if BGP_LABELPOOL_ENABLE_TESTS
/*------------------------------------------------------------------------
* Testing code start
@@ -1532,3 +1654,66 @@ void bgp_lp_vty_init(void)
install_element(ENABLE_NODE, &clear_labelpool_perf_test_cmd);
#endif /* BGP_LABELPOOL_ENABLE_TESTS */
}
+
+DEFINE_MTYPE_STATIC(BGPD, LABEL_PER_NEXTHOP_CACHE,
+ "BGP Label Per Nexthop entry");
+
+/* The nexthops values are compared to
+ * find in the tree the appropriate cache entry
+ */
+int bgp_label_per_nexthop_cache_cmp(const struct bgp_label_per_nexthop_cache *a,
+ const struct bgp_label_per_nexthop_cache *b)
+{
+ return prefix_cmp(&a->nexthop, &b->nexthop);
+}
+
+struct bgp_label_per_nexthop_cache *
+bgp_label_per_nexthop_new(struct bgp_label_per_nexthop_cache_head *tree,
+ struct prefix *nexthop)
+{
+ struct bgp_label_per_nexthop_cache *blnc;
+
+ blnc = XCALLOC(MTYPE_LABEL_PER_NEXTHOP_CACHE,
+ sizeof(struct bgp_label_per_nexthop_cache));
+ blnc->tree = tree;
+ blnc->label = MPLS_INVALID_LABEL;
+ prefix_copy(&blnc->nexthop, nexthop);
+ LIST_INIT(&(blnc->paths));
+ bgp_label_per_nexthop_cache_add(tree, blnc);
+
+ return blnc;
+}
+
+struct bgp_label_per_nexthop_cache *
+bgp_label_per_nexthop_find(struct bgp_label_per_nexthop_cache_head *tree,
+ struct prefix *nexthop)
+{
+ struct bgp_label_per_nexthop_cache blnc = {};
+
+ if (!tree)
+ return NULL;
+
+ memcpy(&blnc.nexthop, nexthop, sizeof(struct prefix));
+ return bgp_label_per_nexthop_cache_find(tree, &blnc);
+}
+
+void bgp_label_per_nexthop_free(struct bgp_label_per_nexthop_cache *blnc)
+{
+ if (blnc->label != MPLS_INVALID_LABEL) {
+ bgp_zebra_send_nexthop_label(ZEBRA_MPLS_LABELS_DELETE,
+ blnc->label, blnc->nh->ifindex,
+ blnc->nh->vrf_id, ZEBRA_LSP_BGP,
+ &blnc->nexthop);
+ bgp_lp_release(LP_TYPE_NEXTHOP, blnc, blnc->label);
+ }
+ bgp_label_per_nexthop_cache_del(blnc->tree, blnc);
+ if (blnc->nh)
+ nexthop_free(blnc->nh);
+ blnc->nh = NULL;
+ XFREE(MTYPE_LABEL_PER_NEXTHOP_CACHE, blnc);
+}
+
+void bgp_label_per_nexthop_init(void)
+{
+ install_element(VIEW_NODE, &show_bgp_nexthop_label_cmd);
+}
diff --git a/bgpd/bgp_labelpool.h b/bgpd/bgp_labelpool.h
index 9526cba0ce..b33527186e 100644
--- a/bgpd/bgp_labelpool.h
+++ b/bgpd/bgp_labelpool.h
@@ -17,6 +17,7 @@
*/
#define LP_TYPE_VRF 0x00000001
#define LP_TYPE_BGP_LU 0x00000002
+#define LP_TYPE_NEXTHOP 0x00000003
PREDECL_LIST(lp_fifo);
@@ -41,4 +42,55 @@ extern void bgp_lp_event_zebra_down(void);
extern void bgp_lp_event_zebra_up(void);
extern void bgp_lp_vty_init(void);
+struct bgp_label_per_nexthop_cache;
+PREDECL_RBTREE_UNIQ(bgp_label_per_nexthop_cache);
+
+extern int
+bgp_label_per_nexthop_cache_cmp(const struct bgp_label_per_nexthop_cache *a,
+ const struct bgp_label_per_nexthop_cache *b);
+
+struct bgp_label_per_nexthop_cache {
+
+ /* RB-tree entry. */
+ struct bgp_label_per_nexthop_cache_item entry;
+
+ /* the nexthop is the key of the list */
+ struct prefix nexthop;
+
+ /* calculated label */
+ mpls_label_t label;
+
+ /* number of path_vrfs */
+ unsigned int path_count;
+
+ /* back pointer to bgp instance */
+ struct bgp *to_bgp;
+
+ /* copy a nexthop resolution from bgp nexthop tracking
+ * used to extract the interface nexthop
+ */
+ struct nexthop *nh;
+
+ /* list of path_vrfs using it */
+ LIST_HEAD(path_lists, bgp_path_info) paths;
+
+ time_t last_update;
+
+ /* Back pointer to the cache tree this entry belongs to. */
+ struct bgp_label_per_nexthop_cache_head *tree;
+};
+
+DECLARE_RBTREE_UNIQ(bgp_label_per_nexthop_cache,
+ struct bgp_label_per_nexthop_cache, entry,
+ bgp_label_per_nexthop_cache_cmp);
+
+void bgp_label_per_nexthop_free(struct bgp_label_per_nexthop_cache *blnc);
+
+struct bgp_label_per_nexthop_cache *
+bgp_label_per_nexthop_new(struct bgp_label_per_nexthop_cache_head *tree,
+ struct prefix *nexthop);
+struct bgp_label_per_nexthop_cache *
+bgp_label_per_nexthop_find(struct bgp_label_per_nexthop_cache_head *tree,
+ struct prefix *nexthop);
+void bgp_label_per_nexthop_init(void);
#endif /* _FRR_BGP_LABELPOOL_H */
diff --git a/bgpd/bgp_mplsvpn.c b/bgpd/bgp_mplsvpn.c
index 63168f1e7a..ecc84533b0 100644
--- a/bgpd/bgp_mplsvpn.c
+++ b/bgpd/bgp_mplsvpn.c
@@ -1116,12 +1116,14 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn,
/*
* Routes that are redistributed into BGP from zebra do not get
- * nexthop tracking. However, if those routes are subsequently
- * imported to other RIBs within BGP, the leaked routes do not
- * carry the original BGP_ROUTE_REDISTRIBUTE sub_type. Therefore,
- * in order to determine if the route we are currently leaking
- * should have nexthop tracking, we must find the ultimate
- * parent so we can check its sub_type.
+ * nexthop tracking, unless MPLS allocation per nexthop is
+ * performed. In the default case nexthop tracking does not apply,
+ * if those routes are subsequently imported to other RIBs within
+ * BGP, the leaked routes do not carry the original
+ * BGP_ROUTE_REDISTRIBUTE sub_type. Therefore, in order to determine
+ * if the route we are currently leaking should have nexthop
+ * tracking, we must find the ultimate parent so we can check its
+ * sub_type.
*
* As of now, source_bpi may at most be a second-generation route
* (only one hop back to ultimate parent for vrf-vpn-vrf scheme).
@@ -1336,6 +1338,265 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn,
return new;
}
+void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi)
+{
+ struct bgp_label_per_nexthop_cache *blnc;
+
+ if (!pi)
+ return;
+
+ blnc = pi->label_nexthop_cache;
+
+ if (!blnc)
+ return;
+
+ LIST_REMOVE(pi, label_nh_thread);
+ pi->label_nexthop_cache->path_count--;
+ pi->label_nexthop_cache = NULL;
+
+ if (LIST_EMPTY(&(blnc->paths)))
+ bgp_label_per_nexthop_free(blnc);
+}
+
+/* Called upon reception of a ZAPI Message from zebra, about
+ * a new available label.
+ */
+static int bgp_mplsvpn_get_label_per_nexthop_cb(mpls_label_t label,
+ void *context, bool allocated)
+{
+ struct bgp_label_per_nexthop_cache *blnc = context;
+ mpls_label_t old_label;
+ int debug = BGP_DEBUG(vpn, VPN_LEAK_LABEL);
+ struct bgp_path_info *pi;
+ struct bgp_table *table;
+
+ old_label = blnc->label;
+
+ if (debug)
+ zlog_debug("%s: label=%u, allocated=%d, nexthop=%pFX", __func__,
+ label, allocated, &blnc->nexthop);
+ if (allocated)
+ /* update the entry with the new label */
+ blnc->label = label;
+ else
+ /*
+ * previously-allocated label is now invalid
+ * eg: zebra deallocated the labels and notifies it
+ */
+ blnc->label = MPLS_INVALID_LABEL;
+
+ if (old_label == blnc->label)
+ return 0; /* no change */
+
+ /* update paths */
+ if (blnc->label != MPLS_INVALID_LABEL)
+ bgp_zebra_send_nexthop_label(
+ ZEBRA_MPLS_LABELS_ADD, blnc->label, blnc->nh->ifindex,
+ blnc->nh->vrf_id, ZEBRA_LSP_BGP, &blnc->nexthop);
+
+ LIST_FOREACH (pi, &(blnc->paths), label_nh_thread) {
+ if (!pi->net)
+ continue;
+ table = bgp_dest_table(pi->net);
+ if (!table)
+ continue;
+ vpn_leak_from_vrf_update(blnc->to_bgp, table->bgp, pi);
+ }
+
+ return 0;
+}
+
+/* Get a per label nexthop value:
+ * - Find and return a per label nexthop from the cache
+ * - else allocate a new per label nexthop cache entry and request a
+ * label to zebra. Return MPLS_INVALID_LABEL
+ */
+static mpls_label_t _vpn_leak_from_vrf_get_per_nexthop_label(
+ struct bgp_path_info *pi, struct bgp *to_bgp, struct bgp *from_bgp,
+ afi_t afi, safi_t safi)
+{
+ struct bgp_nexthop_cache *bnc = pi->nexthop;
+ struct bgp_label_per_nexthop_cache *blnc;
+ struct bgp_label_per_nexthop_cache_head *tree;
+ struct prefix *nh_pfx = NULL;
+ struct prefix nh_gate = {0};
+
+ /* extract the nexthop from the BNC nexthop cache */
+ switch (bnc->nexthop->type) {
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ /* the nexthop is recursive */
+ nh_gate.family = AF_INET;
+ nh_gate.prefixlen = IPV4_MAX_BITLEN;
+ IPV4_ADDR_COPY(&nh_gate.u.prefix4, &bnc->nexthop->gate.ipv4);
+ nh_pfx = &nh_gate;
+ break;
+ case NEXTHOP_TYPE_IPV6:
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ /* the nexthop is recursive */
+ nh_gate.family = AF_INET6;
+ nh_gate.prefixlen = IPV6_MAX_BITLEN;
+ IPV6_ADDR_COPY(&nh_gate.u.prefix6, &bnc->nexthop->gate.ipv6);
+ nh_pfx = &nh_gate;
+ break;
+ case NEXTHOP_TYPE_IFINDEX:
+ /* the nexthop is direcly connected */
+ nh_pfx = &bnc->prefix;
+ break;
+ case NEXTHOP_TYPE_BLACKHOLE:
+ assert(!"Blackhole nexthop. Already checked by the caller.");
+ }
+
+ /* find or allocate a nexthop label cache entry */
+ tree = &from_bgp->mpls_labels_per_nexthop[family2afi(nh_pfx->family)];
+ blnc = bgp_label_per_nexthop_find(tree, nh_pfx);
+ if (!blnc) {
+ blnc = bgp_label_per_nexthop_new(tree, nh_pfx);
+ blnc->to_bgp = to_bgp;
+ /* request a label to zebra for this nexthop
+ * the response from zebra will trigger the callback
+ */
+ bgp_lp_get(LP_TYPE_NEXTHOP, blnc,
+ bgp_mplsvpn_get_label_per_nexthop_cb);
+ }
+
+ if (pi->label_nexthop_cache == blnc)
+ /* no change */
+ return blnc->label;
+
+ /* Unlink from any existing nexthop cache. Free the entry if unused.
+ */
+ bgp_mplsvpn_path_nh_label_unlink(pi);
+ if (blnc) {
+ /* updates NHT pi list reference */
+ LIST_INSERT_HEAD(&(blnc->paths), pi, label_nh_thread);
+ pi->label_nexthop_cache = blnc;
+ pi->label_nexthop_cache->path_count++;
+ blnc->last_update = monotime(NULL);
+ }
+
+ /* then add or update the selected nexthop */
+ if (!blnc->nh)
+ blnc->nh = nexthop_dup(bnc->nexthop, NULL);
+ else if (!nexthop_same(bnc->nexthop, blnc->nh)) {
+ nexthop_free(blnc->nh);
+ blnc->nh = nexthop_dup(bnc->nexthop, NULL);
+ if (blnc->label != MPLS_INVALID_LABEL) {
+ bgp_zebra_send_nexthop_label(
+ ZEBRA_MPLS_LABELS_REPLACE, blnc->label,
+ bnc->nexthop->ifindex, bnc->nexthop->vrf_id,
+ ZEBRA_LSP_BGP, &blnc->nexthop);
+ }
+ }
+
+ return blnc->label;
+}
+
+/* Filter out all the cases where a per nexthop label is not possible:
+ * - return an invalid label when the nexthop is invalid
+ * - return the per VRF label when the per nexthop label is not supported
+ * Otherwise, find or request a per label nexthop.
+ */
+static mpls_label_t vpn_leak_from_vrf_get_per_nexthop_label(
+ afi_t afi, safi_t safi, struct bgp_path_info *pi, struct bgp *from_bgp,
+ struct bgp *to_bgp)
+{
+ struct bgp_path_info *bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
+ struct bgp *bgp_nexthop = NULL;
+ bool nh_valid;
+ afi_t nh_afi;
+ bool is_bgp_static_route;
+
+ is_bgp_static_route = bpi_ultimate->sub_type == BGP_ROUTE_STATIC &&
+ bpi_ultimate->type == ZEBRA_ROUTE_BGP;
+
+ if (is_bgp_static_route == false && afi == AFI_IP &&
+ CHECK_FLAG(pi->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP)) &&
+ (pi->attr->nexthop.s_addr == INADDR_ANY ||
+ !ipv4_unicast_valid(&pi->attr->nexthop))) {
+ /* IPv4 nexthop in standard BGP encoding format.
+ * Format of address is not valid (not any, not unicast).
+ * Fallback to the per VRF label.
+ */
+ bgp_mplsvpn_path_nh_label_unlink(pi);
+ return from_bgp->vpn_policy[afi].tovpn_label;
+ }
+
+ if (is_bgp_static_route == false && afi == AFI_IP &&
+ pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV4 &&
+ (pi->attr->mp_nexthop_global_in.s_addr == INADDR_ANY ||
+ !ipv4_unicast_valid(&pi->attr->mp_nexthop_global_in))) {
+ /* IPv4 nexthop is in MP-BGP encoding format.
+ * Format of address is not valid (not any, not unicast).
+ * Fallback to the per VRF label.
+ */
+ bgp_mplsvpn_path_nh_label_unlink(pi);
+ return from_bgp->vpn_policy[afi].tovpn_label;
+ }
+
+ if (is_bgp_static_route == false && afi == AFI_IP6 &&
+ (pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL ||
+ pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) &&
+ (IN6_IS_ADDR_UNSPECIFIED(&pi->attr->mp_nexthop_global) ||
+ IN6_IS_ADDR_LOOPBACK(&pi->attr->mp_nexthop_global) ||
+ IN6_IS_ADDR_MULTICAST(&pi->attr->mp_nexthop_global))) {
+ /* IPv6 nexthop is in MP-BGP encoding format.
+ * Format of address is not valid
+ * Fallback to the per VRF label.
+ */
+ bgp_mplsvpn_path_nh_label_unlink(pi);
+ return from_bgp->vpn_policy[afi].tovpn_label;
+ }
+
+ /* Check the next-hop reachability.
+ * Get the bgp instance where the bgp_path_info originates.
+ */
+ if (pi->extra && pi->extra->bgp_orig)
+ bgp_nexthop = pi->extra->bgp_orig;
+ else
+ bgp_nexthop = from_bgp;
+
+ nh_afi = BGP_ATTR_NH_AFI(afi, pi->attr);
+ nh_valid = bgp_find_or_add_nexthop(from_bgp, bgp_nexthop, nh_afi, safi,
+ pi, NULL, 0, NULL);
+
+ if (!nh_valid && is_bgp_static_route &&
+ !CHECK_FLAG(from_bgp->flags, BGP_FLAG_IMPORT_CHECK)) {
+ /* "network" prefixes not routable, but since 'no bgp network
+ * import-check' is configured, they are always valid in the BGP
+ * table. Fallback to the per-vrf label
+ */
+ bgp_mplsvpn_path_nh_label_unlink(pi);
+ return from_bgp->vpn_policy[afi].tovpn_label;
+ }
+
+ if (!nh_valid || !pi->nexthop || pi->nexthop->nexthop_num == 0 ||
+ !pi->nexthop->nexthop) {
+ /* invalid next-hop:
+ * do not send the per-vrf label
+ * otherwise, when the next-hop becomes valid,
+ * we will have 2 BGP updates:
+ * - one with the per-vrf label
+ * - the second with the per-nexthop label
+ */
+ bgp_mplsvpn_path_nh_label_unlink(pi);
+ return MPLS_INVALID_LABEL;
+ }
+
+ if (pi->nexthop->nexthop_num > 1 ||
+ pi->nexthop->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
+ /* Blackhole or ECMP routes
+ * is not compatible with per-nexthop label.
+ * Fallback to per-vrf label.
+ */
+ bgp_mplsvpn_path_nh_label_unlink(pi);
+ return from_bgp->vpn_policy[afi].tovpn_label;
+ }
+
+ return _vpn_leak_from_vrf_get_per_nexthop_label(pi, to_bgp, from_bgp,
+ afi, safi);
+}
+
/* cf vnc_import_bgp_add_route_mode_nvegroup() and add_vnc_route() */
void vpn_leak_from_vrf_update(struct bgp *to_bgp, /* to */
struct bgp *from_bgp, /* from */
@@ -1528,12 +1789,32 @@ void vpn_leak_from_vrf_update(struct bgp *to_bgp, /* to */
nexthop_self_flag = 1;
}
- label_val = from_bgp->vpn_policy[afi].tovpn_label;
- if (label_val == MPLS_LABEL_NONE) {
+ if (CHECK_FLAG(from_bgp->vpn_policy[afi].flags,
+ BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP))
+ /* per nexthop label mode */
+ label_val = vpn_leak_from_vrf_get_per_nexthop_label(
+ afi, safi, path_vrf, from_bgp, to_bgp);
+ else
+ /* per VRF label mode */
+ label_val = from_bgp->vpn_policy[afi].tovpn_label;
+
+ if (label_val == MPLS_INVALID_LABEL &&
+ CHECK_FLAG(from_bgp->vpn_policy[afi].flags,
+ BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) {
+ /* no valid label for the moment
+ * when the 'bgp_mplsvpn_get_label_per_nexthop_cb' callback gets
+ * a valid label value, it will call the current function again.
+ */
+ if (debug)
+ zlog_debug(
+ "%s: %s skipping: waiting for a valid per-label nexthop.",
+ __func__, from_bgp->name_pretty);
+ return;
+ }
+ if (label_val == MPLS_LABEL_NONE)
encode_label(MPLS_LABEL_IMPLICIT_NULL, &label);
- } else {
+ else
encode_label(label_val, &label);
- }
/* Set originator ID to "me" */
SET_FLAG(static_attr.flag, ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID));
@@ -1770,6 +2051,8 @@ void vpn_leak_from_vrf_withdraw_all(struct bgp *to_bgp, struct bgp *from_bgp,
bpi, afi, safi);
bgp_path_info_delete(bn, bpi);
bgp_process(to_bgp, bn, afi, safi);
+ bgp_mplsvpn_path_nh_label_unlink(
+ bpi->extra->parent);
}
}
}
diff --git a/bgpd/bgp_mplsvpn.h b/bgpd/bgp_mplsvpn.h
index c832b4abd4..75758edcc2 100644
--- a/bgpd/bgp_mplsvpn.h
+++ b/bgpd/bgp_mplsvpn.h
@@ -31,6 +31,7 @@
#define BGP_PREFIX_SID_SRV6_MAX_FUNCTION_LENGTH 20
extern void bgp_mplsvpn_init(void);
+extern void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi);
extern int bgp_nlri_parse_vpn(struct peer *, struct attr *, struct bgp_nlri *);
extern uint32_t decode_label(mpls_label_t *);
extern void encode_label(mpls_label_t, mpls_label_t *);
diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c
index 1c79d7d03b..c878512389 100644
--- a/bgpd/bgp_nexthop.c
+++ b/bgpd/bgp_nexthop.c
@@ -31,6 +31,7 @@
#include "bgpd/bgp_fsm.h"
#include "bgpd/bgp_vty.h"
#include "bgpd/bgp_rd.h"
+#include "bgpd/bgp_mplsvpn.h"
DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Addr Intf String");
@@ -119,6 +120,8 @@ static void bgp_nexthop_cache_reset(struct bgp_nexthop_cache_head *tree)
while (!LIST_EMPTY(&(bnc->paths))) {
struct bgp_path_info *path = LIST_FIRST(&(bnc->paths));
+ bgp_mplsvpn_path_nh_label_unlink(path);
+
path_nh_map(path, bnc, false);
}
diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c
index a294ebcc63..bda163d7a5 100644
--- a/bgpd/bgp_nht.c
+++ b/bgpd/bgp_nht.c
@@ -31,6 +31,7 @@
#include "bgpd/bgp_flowspec_util.h"
#include "bgpd/bgp_evpn.h"
#include "bgpd/bgp_rd.h"
+#include "bgpd/bgp_mplsvpn.h"
extern struct zclient *zclient;
@@ -149,6 +150,8 @@ void bgp_unlink_nexthop(struct bgp_path_info *path)
{
struct bgp_nexthop_cache *bnc = path->nexthop;
+ bgp_mplsvpn_path_nh_label_unlink(path);
+
if (!bnc)
return;
@@ -1134,10 +1137,21 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
}
LIST_FOREACH (path, &(bnc->paths), nh_thread) {
- if (!(path->type == ZEBRA_ROUTE_BGP
- && ((path->sub_type == BGP_ROUTE_NORMAL)
- || (path->sub_type == BGP_ROUTE_STATIC)
- || (path->sub_type == BGP_ROUTE_IMPORTED))))
+ if (path->type == ZEBRA_ROUTE_BGP &&
+ (path->sub_type == BGP_ROUTE_NORMAL ||
+ path->sub_type == BGP_ROUTE_STATIC ||
+ path->sub_type == BGP_ROUTE_IMPORTED))
+ /* evaluate the path */
+ ;
+ else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE) {
+ /* evaluate the path for redistributed routes
+ * except those from VNC
+ */
+ if ((path->type == ZEBRA_ROUTE_VNC) ||
+ (path->type == ZEBRA_ROUTE_VNC_DIRECT))
+ continue;
+ } else
+ /* don't evaluate the path */
continue;
dest = path->net;
@@ -1230,7 +1244,26 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
- if (path_valid != bnc_is_valid_nexthop) {
+ if (path->type == ZEBRA_ROUTE_BGP &&
+ path->sub_type == BGP_ROUTE_STATIC &&
+ !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK))
+ /* static routes with 'no bgp network import-check' are
+ * always valid. if nht is called with static routes,
+ * the vpn exportation needs to be triggered
+ */
+ vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
+ path);
+ else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE &&
+ safi == SAFI_UNICAST &&
+ (bgp_path->inst_type == BGP_INSTANCE_TYPE_VRF ||
+ bgp_path->inst_type == BGP_INSTANCE_TYPE_DEFAULT))
+ /* redistribute routes are always valid
+ * if nht is called with redistribute routes, the vpn
+ * exportation needs to be triggered
+ */
+ vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
+ path);
+ else if (path_valid != bnc_is_valid_nexthop) {
if (path_valid) {
/* No longer valid, clear flag; also for EVPN
* routes, unimport from VRFs if needed.
@@ -1243,6 +1276,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
bgp_evpn_unimport_route(bgp_path,
afi, safi, bgp_dest_get_prefix(dest), path);
+ if (safi == SAFI_UNICAST &&
+ (bgp_path->inst_type !=
+ BGP_INSTANCE_TYPE_VIEW))
+ vpn_leak_from_vrf_withdraw(
+ bgp_get_default(), bgp_path,
+ path);
} else {
/* Path becomes valid, set flag; also for EVPN
* routes, import from VRFs if needed.
@@ -1255,6 +1294,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
bgp_evpn_import_route(bgp_path,
afi, safi, bgp_dest_get_prefix(dest), path);
+ if (safi == SAFI_UNICAST &&
+ (bgp_path->inst_type !=
+ BGP_INSTANCE_TYPE_VIEW))
+ vpn_leak_from_vrf_update(
+ bgp_get_default(), bgp_path,
+ path);
}
}
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index b51396c8d1..f5ead66f25 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -8676,12 +8676,16 @@ void bgp_redistribute_add(struct bgp *bgp, struct prefix *p,
*/
assert(attr.aspath);
+ if (p->family == AF_INET6)
+ UNSET_FLAG(attr.flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP));
+
switch (nhtype) {
case NEXTHOP_TYPE_IFINDEX:
switch (p->family) {
case AF_INET:
attr.nexthop.s_addr = INADDR_ANY;
attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+ attr.mp_nexthop_global_in.s_addr = INADDR_ANY;
break;
case AF_INET6:
memset(&attr.mp_nexthop_global, 0,
@@ -8694,6 +8698,7 @@ void bgp_redistribute_add(struct bgp *bgp, struct prefix *p,
case NEXTHOP_TYPE_IPV4_IFINDEX:
attr.nexthop = nexthop->ipv4;
attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+ attr.mp_nexthop_global_in = nexthop->ipv4;
break;
case NEXTHOP_TYPE_IPV6:
case NEXTHOP_TYPE_IPV6_IFINDEX:
@@ -8705,6 +8710,7 @@ void bgp_redistribute_add(struct bgp *bgp, struct prefix *p,
case AF_INET:
attr.nexthop.s_addr = INADDR_ANY;
attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
+ attr.mp_nexthop_global_in.s_addr = INADDR_ANY;
break;
case AF_INET6:
memset(&attr.mp_nexthop_global, 0,
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
index a64144b625..fbdd5fae7d 100644
--- a/bgpd/bgp_route.h
+++ b/bgpd/bgp_route.h
@@ -319,6 +319,12 @@ struct bgp_path_info {
/* Addpath identifiers */
uint32_t addpath_rx_id;
struct bgp_addpath_info_data tx_addpath;
+
+ /* For nexthop per label linked list */
+ LIST_ENTRY(bgp_path_info) label_nh_thread;
+
+ /* Back pointer to the bgp label per nexthop structure */
+ struct bgp_label_per_nexthop_cache *label_nexthop_cache;
};
/* Structure used in BGP path selection */
diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c
index ccf198c392..1be44adde8 100644
--- a/bgpd/bgp_vty.c
+++ b/bgpd/bgp_vty.c
@@ -9183,6 +9183,63 @@ ALIAS (af_rd_vpn_export,
"Between current address-family and vpn\n"
"For routes leaked from current address-family to vpn\n")
+DEFPY(af_label_vpn_export_allocation_mode,
+ af_label_vpn_export_allocation_mode_cmd,
+ "[no$no] label vpn export allocation-mode <per-vrf$label_per_vrf|per-nexthop$label_per_nh>",
+ NO_STR
+ "label value for VRF\n"
+ "Between current address-family and vpn\n"
+ "For routes leaked from current address-family to vpn\n"
+ "Label allocation mode\n"
+ "Allocate one label for all BGP updates of the VRF\n"
+ "Allocate a label per connected next-hop in the VRF\n")
+{
+ VTY_DECLVAR_CONTEXT(bgp, bgp);
+ afi_t afi;
+ bool old_per_nexthop, new_per_nexthop;
+
+ afi = vpn_policy_getafi(vty, bgp, false);
+
+ old_per_nexthop = !!CHECK_FLAG(bgp->vpn_policy[afi].flags,
+ BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP);
+ if (no) {
+ if (old_per_nexthop == false && label_per_nh)
+ return CMD_ERR_NO_MATCH;
+ if (old_per_nexthop == true && label_per_vrf)
+ return CMD_ERR_NO_MATCH;
+ new_per_nexthop = false;
+ } else {
+ if (label_per_nh)
+ new_per_nexthop = true;
+ else
+ new_per_nexthop = false;
+ }
+
+ /* no change */
+ if (old_per_nexthop == new_per_nexthop)
+ return CMD_SUCCESS;
+
+ /*
+ * pre-change: un-export vpn routes (vpn->vrf routes unaffected)
+ */
+ vpn_leak_prechange(BGP_VPN_POLICY_DIR_TOVPN, afi, bgp_get_default(),
+ bgp);
+
+ if (new_per_nexthop)
+ SET_FLAG(bgp->vpn_policy[afi].flags,
+ BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP);
+ else
+ UNSET_FLAG(bgp->vpn_policy[afi].flags,
+ BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP);
+
+ /* post-change: re-export vpn routes */
+ vpn_leak_postchange(BGP_VPN_POLICY_DIR_TOVPN, afi, bgp_get_default(),
+ bgp);
+
+ hook_call(bgp_snmp_update_last_changed, bgp);
+ return CMD_SUCCESS;
+}
+
DEFPY (af_label_vpn_export,
af_label_vpn_export_cmd,
"[no] label vpn export <(0-1048575)$label_val|auto$label_auto>",
@@ -17300,6 +17357,12 @@ static void bgp_vpn_policy_config_write_afi(struct vty *vty, struct bgp *bgp,
}
}
+ if (CHECK_FLAG(bgp->vpn_policy[afi].flags,
+ BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP))
+ vty_out(vty,
+ "%*slabel vpn export allocation-mode per-nexthop\n",
+ indent, "");
+
tovpn_sid_index = bgp->vpn_policy[afi].tovpn_sid_index;
if (CHECK_FLAG(bgp->vpn_policy[afi].flags,
BGP_VPN_POLICY_TOVPN_SID_AUTO)) {
@@ -20473,6 +20536,10 @@ void bgp_vty_init(void)
install_element(BGP_IPV6_NODE, &af_rd_vpn_export_cmd);
install_element(BGP_IPV4_NODE, &af_label_vpn_export_cmd);
install_element(BGP_IPV6_NODE, &af_label_vpn_export_cmd);
+ install_element(BGP_IPV4_NODE,
+ &af_label_vpn_export_allocation_mode_cmd);
+ install_element(BGP_IPV6_NODE,
+ &af_label_vpn_export_allocation_mode_cmd);
install_element(BGP_IPV4_NODE, &af_nexthop_vpn_export_cmd);
install_element(BGP_IPV6_NODE, &af_nexthop_vpn_export_cmd);
install_element(BGP_IPV4_NODE, &af_rt_vpn_imexport_cmd);
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index 96b1f3e00f..1965cd2704 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -3911,3 +3911,32 @@ int bgp_zebra_srv6_manager_release_locator_chunk(const char *name)
{
return srv6_manager_release_locator_chunk(zclient, name);
}
+
+void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label,
+ ifindex_t ifindex, vrf_id_t vrf_id,
+ enum lsp_types_t ltype, struct prefix *p)
+{
+ struct zapi_labels zl = {};
+ struct zapi_nexthop *znh;
+
+ zl.type = ltype;
+ zl.local_label = label;
+ zl.nexthop_num = 1;
+ znh = &zl.nexthops[0];
+ if (p->family == AF_INET)
+ IPV4_ADDR_COPY(&znh->gate.ipv4, &p->u.prefix4);
+ else
+ IPV6_ADDR_COPY(&znh->gate.ipv6, &p->u.prefix6);
+ if (ifindex == IFINDEX_INTERNAL)
+ znh->type = (p->family == AF_INET) ? NEXTHOP_TYPE_IPV4
+ : NEXTHOP_TYPE_IPV6;
+ else
+ znh->type = (p->family == AF_INET) ? NEXTHOP_TYPE_IPV4_IFINDEX
+ : NEXTHOP_TYPE_IPV6_IFINDEX;
+ znh->ifindex = ifindex;
+ znh->vrf_id = vrf_id;
+ znh->label_num = 0;
+
+ /* vrf_id is DEFAULT_VRF */
+ zebra_send_mpls_labels(zclient, cmd, &zl);
+}
diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h
index b09be890e5..7c85d86b31 100644
--- a/bgpd/bgp_zebra.h
+++ b/bgpd/bgp_zebra.h
@@ -118,4 +118,8 @@ extern int bgp_zebra_update(struct bgp *bgp, afi_t afi, safi_t safi,
extern int bgp_zebra_stale_timer_update(struct bgp *bgp);
extern int bgp_zebra_srv6_manager_get_locator_chunk(const char *name);
extern int bgp_zebra_srv6_manager_release_locator_chunk(const char *name);
+extern void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label,
+ ifindex_t index, vrf_id_t vrfid,
+ enum lsp_types_t ltype,
+ struct prefix *p);
#endif /* _QUAGGA_BGP_ZEBRA_H */
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 9d7a1f967e..42ad8a5632 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -3354,6 +3354,11 @@ static struct bgp *bgp_create(as_t *as, const char *name,
SET_FLAG(bgp->af_flags[afi][SAFI_MPLS_VPN],
BGP_VPNVX_RETAIN_ROUTE_TARGET_ALL);
}
+
+ for (afi = AFI_IP; afi < AFI_MAX; afi++)
+ bgp_label_per_nexthop_cache_init(
+ &bgp->mpls_labels_per_nexthop[afi]);
+
if (name)
bgp->name = XSTRDUP(MTYPE_BGP, name);
@@ -8251,6 +8256,8 @@ void bgp_init(unsigned short instance)
bgp_lp_vty_init();
+ bgp_label_per_nexthop_init();
+
cmd_variable_handler_register(bgp_viewvrf_var_handlers);
}
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index c3cb6ba91e..68b32b5945 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -211,6 +211,7 @@ struct vpn_policy {
#define BGP_VPN_POLICY_TOVPN_RD_SET (1 << 1)
#define BGP_VPN_POLICY_TOVPN_NEXTHOP_SET (1 << 2)
#define BGP_VPN_POLICY_TOVPN_SID_AUTO (1 << 3)
+#define BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP (1 << 4)
/*
* If we are importing another vrf into us keep a list of
@@ -573,6 +574,10 @@ struct bgp {
/* Allocate MPLS labels */
uint8_t allocate_mpls_labels[AFI_MAX][SAFI_MAX];
+ /* Tree for next-hop lookup cache. */
+ struct bgp_label_per_nexthop_cache_head
+ mpls_labels_per_nexthop[AFI_MAX];
+
/* Allocate hash entries to store policy routing information
* The hash are used to host pbr rules somewhere.
* Actually, pbr will only be used by flowspec