]> git.puffer.fish Git - mirror/frr.git/commitdiff
zebra: add tc netlink and dplane ops
authorSiger Yang <siger.yang@outlook.com>
Fri, 29 Jul 2022 13:59:19 +0000 (21:59 +0800)
committerSiger Yang <siger.yang@outlook.com>
Wed, 10 Aug 2022 18:32:43 +0000 (02:32 +0800)
This commit implements necessary netlink encoders for traffic control
including QDISC, TCLASS and TFILTER, and adds basic dplane operations.

Co-authored-by: Stephen Worley <sworley@nvidia.com>
Signed-off-by: Siger Yang <siger.yang@outlook.com>
12 files changed:
zebra/debug_nl.c
zebra/dplane_fpm_nl.c
zebra/interface.c
zebra/kernel_netlink.c
zebra/rt.h
zebra/subdir.am
zebra/tc_netlink.c [new file with mode: 0644]
zebra/tc_netlink.h [new file with mode: 0644]
zebra/zebra_dplane.c
zebra/zebra_dplane.h
zebra/zebra_nhg.c
zebra/zebra_rib.c

index a16d442521b185d0c39945c88908fbb706ba276c..afefab66746d4543b5fafe326cd7f0cc3aedfaea 100644 (file)
@@ -1536,6 +1536,24 @@ next_rta:
        goto next_rta;
 }
 
+static const char *tcm_nltype2str(int nltype)
+{
+       switch (nltype) {
+       case RTM_NEWQDISC:
+       case RTM_DELQDISC:
+               return "qdisc";
+       case RTM_NEWTCLASS:
+       case RTM_DELTCLASS:
+               return "tclass";
+       case RTM_NEWTFILTER:
+       case RTM_DELTFILTER:
+               return "tfilter";
+       default:
+               /* should never hit */
+               return "unknown";
+       }
+}
+
 static void nlncm_dump(const struct netconfmsg *ncm, size_t msglen)
 {
        const struct rtattr *rta;
@@ -1595,6 +1613,8 @@ void nl_dump(void *msg, size_t msglen)
        struct ifinfomsg *ifi;
        struct tunnel_msg *tnlm;
        struct fib_rule_hdr *frh;
+       struct tcmsg *tcm;
+
        char fbuf[128];
        char ibuf[128];
 
@@ -1730,6 +1750,21 @@ next_header:
                nlncm_dump(ncm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ncm)));
                break;
 
+       case RTM_NEWQDISC:
+       case RTM_DELQDISC:
+       case RTM_NEWTCLASS:
+       case RTM_DELTCLASS:
+       case RTM_NEWTFILTER:
+       case RTM_DELTFILTER:
+               tcm = NLMSG_DATA(nlmsg);
+               zlog_debug(
+                       " tcm [type=%s family=%s (%d) ifindex=%d handle=%04x:%04x]",
+                       tcm_nltype2str(nlmsg->nlmsg_type),
+                       af_type2str(tcm->tcm_family), tcm->tcm_family,
+                       tcm->tcm_ifindex, tcm->tcm_handle >> 16,
+                       tcm->tcm_handle & 0xffff);
+               break;
+
        default:
                break;
        }
index ec4ea372f1c3950c2140e190fcdc66424225b7df..d07c4c63324b9985acd4725e097cdd20677dbf96 100644 (file)
@@ -815,6 +815,9 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx)
        case DPLANE_OP_INTF_INSTALL:
        case DPLANE_OP_INTF_UPDATE:
        case DPLANE_OP_INTF_DELETE:
+       case DPLANE_OP_TC_INSTALL:
+       case DPLANE_OP_TC_UPDATE:
+       case DPLANE_OP_TC_DELETE:
        case DPLANE_OP_NONE:
                break;
 
index 205fa8829314f48937efd62d480650238502f98b..c674b499ac82ad6ec4e30506300262ab653630ec 100644 (file)
@@ -1573,6 +1573,9 @@ void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx)
        case DPLANE_OP_IPSET_ENTRY_DELETE:
        case DPLANE_OP_NEIGH_TABLE_UPDATE:
        case DPLANE_OP_GRE_SET:
+       case DPLANE_OP_TC_INSTALL:
+       case DPLANE_OP_TC_UPDATE:
+       case DPLANE_OP_TC_DELETE:
                break; /* should never hit here */
        }
 }
index 396ccb34bdbbcd3a1b0cc626d14206a4a3719e72..45a372f88c9e5e78ec5a4177e64a251009079948 100644 (file)
@@ -47,6 +47,7 @@
 #include "zebra/rt_netlink.h"
 #include "zebra/if_netlink.h"
 #include "zebra/rule_netlink.h"
+#include "zebra/tc_netlink.h"
 #include "zebra/netconf_netlink.h"
 #include "zebra/zebra_errors.h"
 
@@ -114,6 +115,15 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
                                           {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"},
                                           {RTM_DELTUNNEL, "RTM_DELTUNNEL"},
                                           {RTM_GETTUNNEL, "RTM_GETTUNNEL"},
+                                          {RTM_NEWQDISC, "RTM_NEWQDISC"},
+                                          {RTM_DELQDISC, "RTM_DELQDISC"},
+                                          {RTM_GETQDISC, "RTM_GETQDISC"},
+                                          {RTM_NEWTCLASS, "RTM_NEWTCLASS"},
+                                          {RTM_DELTCLASS, "RTM_DELTCLASS"},
+                                          {RTM_GETTCLASS, "RTM_GETTCLASS"},
+                                          {RTM_NEWTFILTER, "RTM_NEWTFILTER"},
+                                          {RTM_DELTFILTER, "RTM_DELTFILTER"},
+                                          {RTM_GETTFILTER, "RTM_GETTFILTER"},
                                           {0}};
 
 static const struct message rtproto_str[] = {
@@ -1623,6 +1633,11 @@ static enum netlink_msg_status nl_put_msg(struct nl_batch *bth,
        case DPLANE_OP_INTF_UPDATE:
        case DPLANE_OP_INTF_DELETE:
                return netlink_put_intf_update_msg(bth, ctx);
+
+       case DPLANE_OP_TC_INSTALL:
+       case DPLANE_OP_TC_UPDATE:
+       case DPLANE_OP_TC_DELETE:
+               return netlink_put_tc_update_msg(bth, ctx);
        }
 
        return FRR_NETLINK_ERROR;
index 0a86a2897cb8086e3c0e28b3ffba7cbc206e2dcc..d8a22d2cfcd82e8b751a9fef96ee7e196c7af206 100644 (file)
@@ -71,6 +71,7 @@ kernel_intf_update(struct zebra_dplane_ctx *ctx);
 
 extern enum zebra_dplane_result
 kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx);
+extern enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx);
 
 #endif /* !HAVE_NETLINK */
 
index a926c14adf0585b87a1647f97f70b968a4b05f57..6fd3d20d372dfdc7a2845c93a2c82dfc3bb5a0e0 100644 (file)
@@ -82,6 +82,7 @@ zebra_zebra_SOURCES = \
        zebra/rule_netlink.c \
        zebra/rule_socket.c \
        zebra/table_manager.c \
+       zebra/tc_netlink.c \
        zebra/zapi_msg.c \
        zebra/zebra_dplane.c \
        zebra/zebra_errors.c \
@@ -163,6 +164,7 @@ noinst_HEADERS += \
        zebra/rtadv.h \
        zebra/rule_netlink.h \
        zebra/table_manager.h \
+       zebra/tc_netlink.h \
        zebra/zapi_msg.h \
        zebra/zebra_dplane.h \
        zebra/zebra_errors.h \
diff --git a/zebra/tc_netlink.c b/zebra/tc_netlink.c
new file mode 100644 (file)
index 0000000..89ce075
--- /dev/null
@@ -0,0 +1,468 @@
+/*
+ * Zebra Traffic Control (TC) interaction with the kernel using netlink.
+ *
+ * Copyright (C) 2022 Shichu Yang
+ *
+ * This file is part of FRR.
+ *
+ * FRR is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRR is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FRR; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#ifdef HAVE_NETLINK
+
+#include <linux/if_ether.h>
+#include <sys/socket.h>
+
+#include "if.h"
+#include "prefix.h"
+#include "vrf.h"
+
+#include <linux/fib_rules.h>
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include "zebra/zserv.h"
+#include "zebra/zebra_ns.h"
+#include "zebra/zebra_vrf.h"
+#include "zebra/rt.h"
+#include "zebra/interface.h"
+#include "zebra/debug.h"
+#include "zebra/rtadv.h"
+#include "zebra/kernel_netlink.h"
+#include "zebra/tc_netlink.h"
+#include "zebra/zebra_errors.h"
+#include "zebra/zebra_dplane.h"
+#include "zebra/zebra_trace.h"
+
+/* TODO: move these bitflags to zebra_tc.h */
+#define TC_FILTER_SRC_IP (1 << 0)
+#define TC_FILTER_DST_IP (1 << 1)
+#define TC_FILTER_IP_PROTOCOL (1 << 9)
+
+#define TC_FREQ_DEFAULT (100)
+
+#define TC_MAJOR_BASE (0x1000u)
+#define TC_MINOR_NOCLASS (0xffffu)
+
+#define TC_FILTER_MASK (0x8000u)
+
+#define TIME_UNITS_PER_SEC (1000000)
+#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r)))
+
+static uint32_t tc_get_freq(void)
+{
+       int freq = 0;
+       FILE *fp = fopen("/proc/net/psched", "r");
+
+       if (fp) {
+               uint32_t nom, denom;
+
+               if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) {
+                       if (nom == 1000000)
+                               freq = denom;
+               }
+               fclose(fp);
+       }
+
+       return freq == 0 ? TC_FREQ_DEFAULT : freq;
+}
+
+static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor)
+{
+       return (major) << 16 | (minor);
+}
+
+static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx,
+                                    uint16_t minor)
+{
+       uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx);
+
+       return tc_make_handle(major, minor);
+}
+
+static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table,
+                              uint32_t mtu)
+{
+       if (mtu == 0)
+               mtu = 2047;
+
+       int cell_log = -1;
+
+       if (cell_log < 0) {
+               cell_log = 0;
+               while ((mtu >> cell_log) > 255)
+                       cell_log++;
+       }
+
+       for (int i = 0; i < 256; i++)
+               table[i] = xmittime(ratespec->rate, (i + 1) << cell_log);
+
+       ratespec->cell_align = -1;
+       ratespec->cell_log = cell_log;
+       ratespec->linklayer = TC_LINKLAYER_ETHERNET;
+}
+
+static int tc_flower_get_inet_prefix(const struct prefix *prefix,
+                                    struct inet_prefix *addr)
+{
+       addr->family = prefix->family;
+
+       if (addr->family == AF_INET) {
+               addr->bytelen = 4;
+               addr->bitlen = prefix->prefixlen;
+               addr->flags = 0;
+               addr->flags |= PREFIXLEN_SPECIFIED;
+               addr->flags |= ADDRTYPE_INET;
+               memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32));
+       } else if (addr->family == AF_INET6) {
+               addr->bytelen = 16;
+               addr->bitlen = prefix->prefixlen;
+               addr->flags = 0;
+               addr->flags |= PREFIXLEN_SPECIFIED;
+               addr->flags |= ADDRTYPE_INET;
+               memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val));
+       } else {
+               return -1;
+       }
+
+       return 0;
+}
+
+static int tc_flower_get_inet_mask(const struct prefix *prefix,
+                                  struct inet_prefix *addr)
+{
+       addr->family = prefix->family;
+
+       if (addr->family == AF_INET) {
+               addr->bytelen = 4;
+               addr->bitlen = prefix->prefixlen;
+               addr->flags = 0;
+               addr->flags |= PREFIXLEN_SPECIFIED;
+               addr->flags |= ADDRTYPE_INET;
+       } else if (addr->family == AF_INET6) {
+               addr->bytelen = 16;
+               addr->bitlen = prefix->prefixlen;
+               addr->flags = 0;
+               addr->flags |= PREFIXLEN_SPECIFIED;
+               addr->flags |= ADDRTYPE_INET;
+       } else {
+               return -1;
+       }
+
+       memset(addr->data, 0xff, addr->bytelen);
+
+       int rest = prefix->prefixlen;
+
+       for (int i = 0; i < addr->bytelen / 4; i++) {
+               if (!rest) {
+                       addr->data[i] = 0;
+               } else if (rest / 32 >= 1) {
+                       rest -= 32;
+               } else {
+                       addr->data[i] <<= 32 - rest;
+                       addr->data[i] = htonl(addr->data[i]);
+                       rest = 0;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Traffic control queue discipline encoding (only "htb" supported)
+ */
+static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+                                       void *data, size_t datalen)
+{
+       struct nlsock *nl;
+
+       const char *kind = "htb";
+
+       struct tc_htb_glob htb_glob = {
+               .rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS};
+
+       struct rtattr *nest;
+
+       struct {
+               struct nlmsghdr n;
+               struct tcmsg t;
+               char buf[0];
+       } *req = (void *)data;
+
+       if (datalen < sizeof(*req))
+               return 0;
+
+       nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+
+       memset(req, 0, sizeof(*req));
+
+       req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+       req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+
+       req->n.nlmsg_flags |= NLM_F_REPLACE;
+
+       req->n.nlmsg_type = cmd;
+
+       req->n.nlmsg_pid = nl->snl.nl_pid;
+
+       req->t.tcm_family = AF_UNSPEC;
+       req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
+       req->t.tcm_handle = tc_get_handle(ctx, 0);
+       req->t.tcm_parent = TC_H_ROOT;
+
+       nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
+
+       nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
+
+       nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob,
+                   sizeof(htb_glob));
+       nl_attr_nest_end(&req->n, nest);
+
+       return NLMSG_ALIGN(req->n.nlmsg_len);
+}
+
+/*
+ * Traffic control class encoding
+ */
+static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+                                        void *data, size_t datalen)
+{
+       struct nlsock *nl;
+       struct tc_htb_opt htb_opt = {};
+
+       uint64_t rate, ceil;
+       uint64_t buffer, cbuffer;
+
+       /* TODO: fetch mtu from interface */
+       uint32_t mtu = 0;
+
+       uint32_t rtab[256];
+       uint32_t ctab[256];
+
+       struct rtattr *nest;
+
+       struct {
+               struct nlmsghdr n;
+               struct tcmsg t;
+               char buf[0];
+       } *req = (void *)data;
+
+       if (datalen < sizeof(*req))
+               return 0;
+
+       nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+
+       memset(req, 0, sizeof(*req));
+
+       req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+       req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+
+       req->n.nlmsg_type = cmd;
+
+       req->n.nlmsg_pid = nl->snl.nl_pid;
+
+       req->t.tcm_family = AF_UNSPEC;
+       req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
+       req->t.tcm_handle = tc_get_handle(ctx, 1);
+       req->t.tcm_parent = tc_get_handle(ctx, 0);
+
+       rate = dplane_ctx_tc_get_rate(ctx);
+       ceil = dplane_ctx_tc_get_ceil(ctx);
+
+       ceil = ceil < rate ? rate : ceil;
+
+       htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate;
+       htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil;
+
+       buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq();
+
+       htb_opt.buffer = buffer;
+       htb_opt.cbuffer = cbuffer;
+
+       tc_calc_rate_table(&htb_opt.rate, rtab, mtu);
+       tc_calc_rate_table(&htb_opt.ceil, rtab, mtu);
+
+       htb_opt.ceil.mpu = htb_opt.rate.mpu = 0;
+       htb_opt.ceil.overhead = htb_opt.rate.overhead = 0;
+
+       nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
+
+       if (rate >> 32 != 0) {
+               nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate,
+                           sizeof(rate));
+       }
+
+       if (ceil >> 32 != 0) {
+               nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil,
+                           sizeof(ceil));
+       }
+
+       nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt));
+
+       nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab));
+       nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab));
+       nl_attr_nest_end(&req->n, nest);
+
+       return NLMSG_ALIGN(req->n.nlmsg_len);
+}
+
+/*
+ * Traffic control filter encoding (only "flower" supported)
+ */
+static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
+                                         void *data, size_t datalen)
+{
+       struct nlsock *nl;
+       struct rtattr *nest;
+
+       const char *kind = "flower";
+
+       uint16_t priority;
+       uint16_t protocol;
+       uint32_t classid;
+       uint32_t filter_bm;
+       uint32_t flags = 0;
+
+       struct inet_prefix addr;
+
+       struct {
+               struct nlmsghdr n;
+               struct tcmsg t;
+               char buf[0];
+       } *req = (void *)data;
+
+       if (datalen < sizeof(*req))
+               return 0;
+
+       nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
+
+       memset(req, 0, sizeof(*req));
+
+       req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+       req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+
+       req->n.nlmsg_flags |= NLM_F_EXCL;
+
+       req->n.nlmsg_type = cmd;
+
+       req->n.nlmsg_pid = nl->snl.nl_pid;
+
+       req->t.tcm_family = AF_UNSPEC;
+       req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
+
+       /* TODO: priority and layer-3 protocol support */
+       priority = 0;
+       protocol = htons(ETH_P_IP);
+       classid = tc_get_handle(ctx, 1);
+       filter_bm = dplane_ctx_tc_get_filter_bm(ctx);
+
+       req->t.tcm_info = tc_make_handle(priority, protocol);
+
+       req->t.tcm_handle = 1;
+       req->t.tcm_parent = tc_get_handle(ctx, 0);
+
+       nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
+       nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
+
+       nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid,
+                   sizeof(classid));
+
+       if (filter_bm & TC_FILTER_SRC_IP) {
+               const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx);
+
+               if (tc_flower_get_inet_prefix(src_p, &addr) != 0)
+                       return 0;
+
+               nl_attr_put(&req->n, datalen,
+                           (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC
+                                                    : TCA_FLOWER_KEY_IPV6_SRC,
+                           addr.data, addr.bytelen);
+
+               if (tc_flower_get_inet_mask(src_p, &addr) != 0)
+                       return 0;
+
+               nl_attr_put(&req->n, datalen,
+                           (addr.family == AF_INET)
+                                   ? TCA_FLOWER_KEY_IPV4_SRC_MASK
+                                   : TCA_FLOWER_KEY_IPV6_SRC_MASK,
+                           addr.data, addr.bytelen);
+       }
+
+       if (filter_bm & TC_FILTER_DST_IP) {
+               const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx);
+
+               if (tc_flower_get_inet_prefix(dst_p, &addr) != 0)
+                       return 0;
+
+               nl_attr_put(&req->n, datalen,
+                           (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST
+                                                    : TCA_FLOWER_KEY_IPV6_DST,
+                           addr.data, addr.bytelen);
+
+               if (tc_flower_get_inet_mask(dst_p, &addr) != 0)
+                       return 0;
+
+               nl_attr_put(&req->n, datalen,
+                           (addr.family == AF_INET)
+                                   ? TCA_FLOWER_KEY_IPV4_DST_MASK
+                                   : TCA_FLOWER_KEY_IPV6_DST_MASK,
+                           addr.data, addr.bytelen);
+       }
+
+       if (filter_bm & TC_FILTER_IP_PROTOCOL) {
+               nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO,
+                            dplane_ctx_tc_get_ip_proto(ctx));
+       }
+
+       nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags);
+
+       nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol);
+       nl_attr_nest_end(&req->n, nest);
+
+       return NLMSG_ALIGN(req->n.nlmsg_len);
+}
+
+static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx,
+                                           void *buf, size_t buflen)
+{
+       return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen);
+}
+
+static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx,
+                                            void *buf, size_t buflen)
+{
+       return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen);
+}
+
+static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx,
+                                             void *buf, size_t buflen)
+{
+       return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen);
+}
+
+enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth,
+                                                 struct zebra_dplane_ctx *ctx)
+{
+       /* TODO: error handling and other actions (delete, replace, ...) */
+
+       netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false);
+       netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false);
+       return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder,
+                                    false);
+}
+
+#endif /* HAVE_NETLINK */
diff --git a/zebra/tc_netlink.h b/zebra/tc_netlink.h
new file mode 100644 (file)
index 0000000..2190bca
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Zebra Traffic Control (TC) interaction with the kernel using netlink.
+ *
+ * Copyright (C) 2022 Shichu Yang
+ *
+ * This file is part of FRR.
+ *
+ * FRR is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * FRR is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FRR; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _ZEBRA_TC_NETLINK_H
+#define _ZEBRA_TC_NETLINK_H
+
+#ifdef HAVE_NETLINK
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Represent a prefixed address in flower filter */
+
+struct inet_prefix {
+       uint16_t flags;
+       uint16_t bytelen;
+       uint16_t bitlen;
+       uint16_t family;
+       uint32_t data[64];
+};
+
+enum {
+       PREFIXLEN_SPECIFIED = (1 << 0),
+       ADDRTYPE_INET = (1 << 1),
+       ADDRTYPE_UNSPEC = (1 << 2),
+       ADDRTYPE_MULTI = (1 << 3),
+
+       ADDRTYPE_INET_UNSPEC = ADDRTYPE_INET | ADDRTYPE_UNSPEC,
+       ADDRTYPE_INET_MULTI = ADDRTYPE_INET | ADDRTYPE_MULTI
+};
+
+extern enum netlink_msg_status
+netlink_put_tc_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HAVE_NETLINK */
+
+#endif /* _ZEBRA_TC_NETLINK_H */
index 4c7838198e3764b87348a91ebe323223e4244077..2fc1fad8ba1730e409e1912593e343f1ac87490d 100644 (file)
@@ -313,6 +313,25 @@ struct dplane_netconf_info {
        enum dplane_netconf_status_e linkdown_val;
 };
 
+/*
+ * Traffic control contexts for the dplane
+ */
+struct dplane_tc_info {
+       /* Rate spec (unit: Bytes/s) */
+       uint64_t rate;
+       uint64_t ceil;
+
+       /* TODO: custom burst */
+
+       /* Filter components for "tfilter" */
+       uint32_t filter_bm;
+       struct prefix src_ip;
+       struct prefix dst_ip;
+       uint8_t ip_proto;
+
+       /* TODO: more filter components */
+};
+
 /*
  * The context block used to exchange info about route updates across
  * the boundary between the zebra main context (and pthread) and the
@@ -362,6 +381,7 @@ struct zebra_dplane_ctx {
                struct dplane_mac_info macinfo;
                struct dplane_neigh_info neigh;
                struct dplane_rule_info rule;
+               struct dplane_tc_info tc;
                struct zebra_pbr_iptable iptable;
                struct zebra_pbr_ipset ipset;
                struct {
@@ -540,6 +560,9 @@ static struct zebra_dplane_globals {
        _Atomic uint32_t dg_intfs_in;
        _Atomic uint32_t dg_intf_errors;
 
+       _Atomic uint32_t dg_tcs_in;
+       _Atomic uint32_t dg_tcs_errors;
+
        /* Dataplane pthread */
        struct frr_pthread *dg_pthread;
 
@@ -777,6 +800,9 @@ static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx)
        case DPLANE_OP_INTF_INSTALL:
        case DPLANE_OP_INTF_UPDATE:
        case DPLANE_OP_INTF_DELETE:
+       case DPLANE_OP_TC_INSTALL:
+       case DPLANE_OP_TC_UPDATE:
+       case DPLANE_OP_TC_DELETE:
                break;
 
        case DPLANE_OP_IPSET_ENTRY_ADD:
@@ -1100,6 +1126,16 @@ const char *dplane_op2str(enum dplane_op_e op)
        case DPLANE_OP_INTF_DELETE:
                ret = "INTF_DELETE";
                break;
+
+       case DPLANE_OP_TC_INSTALL:
+               ret = "TC_INSTALL";
+               break;
+       case DPLANE_OP_TC_UPDATE:
+               ret = "TC_UPDATE";
+               break;
+       case DPLANE_OP_TC_DELETE:
+               ret = "TC_DELETE";
+               break;
        }
 
        return ret;
@@ -1419,6 +1455,50 @@ uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx)
        return ctx->u.rinfo.zd_old_distance;
 }
 
+uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx)
+{
+       DPLANE_CTX_VALID(ctx);
+
+       return ctx->u.tc.rate;
+}
+
+uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx)
+{
+       DPLANE_CTX_VALID(ctx);
+
+       return ctx->u.tc.ceil;
+}
+
+uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx)
+{
+       DPLANE_CTX_VALID(ctx);
+
+       return ctx->u.tc.filter_bm;
+}
+
+const struct prefix *
+dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx)
+{
+       DPLANE_CTX_VALID(ctx);
+
+       return &(ctx->u.tc.src_ip);
+}
+
+const struct prefix *
+dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx)
+{
+       DPLANE_CTX_VALID(ctx);
+
+       return &(ctx->u.tc.dst_ip);
+}
+
+uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx)
+{
+       DPLANE_CTX_VALID(ctx);
+
+       return ctx->u.tc.ip_proto;
+}
+
 /*
  * Set the nexthops associated with a context: note that processing code
  * may well expect that nexthops are in canonical (sorted) order, so we
@@ -2691,6 +2771,26 @@ done:
        return ret;
 }
 
+int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op)
+{
+       int ret = EINVAL;
+
+       struct zebra_vrf *zvrf = NULL;
+       struct zebra_ns *zns = NULL;
+
+       ctx->zd_op = op;
+       ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS;
+
+       /* TODO: init traffic control qdisc */
+       zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT);
+
+       dplane_ctx_ns_init(ctx, zns, true);
+
+       ret = AOK;
+
+       return ret;
+}
+
 /**
  * dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update
  *
@@ -3410,6 +3510,47 @@ dplane_route_update_internal(struct route_node *rn,
        return result;
 }
 
+static enum zebra_dplane_result dplane_tc_update_internal(enum dplane_op_e op)
+{
+       enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
+       int ret = EINVAL;
+       struct zebra_dplane_ctx *ctx = NULL;
+
+       /* Obtain context block */
+       ctx = dplane_ctx_alloc();
+
+       if (!ctx) {
+               ret = ENOMEM;
+               goto done;
+       }
+
+       /* Init context with info from zebra data structs */
+       ret = dplane_ctx_tc_init(ctx, op);
+
+       if (ret == AOK)
+               ret = dplane_update_enqueue(ctx);
+
+done:
+       /* Update counter */
+       atomic_fetch_add_explicit(&zdplane_info.dg_tcs_in, 1,
+                                 memory_order_relaxed);
+       if (ret == AOK) {
+               result = ZEBRA_DPLANE_REQUEST_QUEUED;
+       } else {
+               atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, 1,
+                                         memory_order_relaxed);
+               if (ctx)
+                       dplane_ctx_free(&ctx);
+       }
+
+       return result;
+}
+
+enum zebra_dplane_result dplane_tc_update(void)
+{
+       return dplane_tc_update_internal(DPLANE_OP_TC_UPDATE);
+}
+
 /**
  * dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes
  *
@@ -5591,6 +5732,13 @@ static void kernel_dplane_log_detail(struct zebra_dplane_ctx *ctx)
                           dplane_ctx_get_ifindex(ctx),
                           dplane_ctx_intf_is_protodown(ctx));
                break;
+
+       /* TODO: more detailed log */
+       case DPLANE_OP_TC_INSTALL:
+       case DPLANE_OP_TC_UPDATE:
+       case DPLANE_OP_TC_DELETE:
+               zlog_debug("Dplane tc ifidx %u", dplane_ctx_get_ifindex(ctx));
+               break;
        }
 }
 
@@ -5734,6 +5882,14 @@ static void kernel_dplane_handle_result(struct zebra_dplane_ctx *ctx)
                                                  1, memory_order_relaxed);
                break;
 
+       case DPLANE_OP_TC_INSTALL:
+       case DPLANE_OP_TC_UPDATE:
+       case DPLANE_OP_TC_DELETE:
+               if (res != ZEBRA_DPLANE_REQUEST_SUCCESS)
+                       atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors,
+                                                 1, memory_order_relaxed);
+               break;
+
        /* Ignore 'notifications' - no-op */
        case DPLANE_OP_SYS_ROUTE_ADD:
        case DPLANE_OP_SYS_ROUTE_DELETE:
index c96ea400946aa7889121a714abace8f877e60cc1..8b239a9ba19f8ebfe61b908b6e5e5fe8b449ae5d 100644 (file)
@@ -193,6 +193,11 @@ enum dplane_op_e {
        DPLANE_OP_INTF_INSTALL,
        DPLANE_OP_INTF_UPDATE,
        DPLANE_OP_INTF_DELETE,
+
+       /* Traffic control */
+       DPLANE_OP_TC_INSTALL,
+       DPLANE_OP_TC_UPDATE,
+       DPLANE_OP_TC_DELETE,
 };
 
 /*
@@ -378,6 +383,16 @@ uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx);
 void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance);
 uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx);
 
+/* Accessors for traffic control context */
+uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx);
+uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx);
+uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx);
+const struct prefix *
+dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx);
+const struct prefix *
+dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx);
+uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx);
+
 void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh);
 void dplane_ctx_set_backup_nhg(struct zebra_dplane_ctx *ctx,
                               const struct nexthop_group *nhg);
@@ -707,6 +722,13 @@ enum zebra_dplane_result dplane_intf_add(const struct interface *ifp);
 enum zebra_dplane_result dplane_intf_update(const struct interface *ifp);
 enum zebra_dplane_result dplane_intf_delete(const struct interface *ifp);
 
+/*
+ * Enqueue interface link changes for the dataplane.
+ */
+enum zebra_dplane_result dplane_tc_add(void);
+enum zebra_dplane_result dplane_tc_update(void);
+enum zebra_dplane_result dplane_tc_delete(void);
+
 /*
  * Link layer operations for the dataplane.
  */
@@ -849,6 +871,9 @@ int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op,
 int dplane_ctx_intf_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op,
                         const struct interface *ifp);
 
+/* Encode traffic control information into data plane context. */
+int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op);
+
 /* Retrieve the limit on the number of pending, unprocessed updates. */
 uint32_t dplane_get_in_queue_limit(void);
 
index c5b533fc22a7000bbbec55e8742a469df825e56f..1964c763c579ad6e51c3bceebc0e51239c6f52b7 100644 (file)
@@ -3125,6 +3125,9 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
        case DPLANE_OP_INTF_INSTALL:
        case DPLANE_OP_INTF_UPDATE:
        case DPLANE_OP_INTF_DELETE:
+       case DPLANE_OP_TC_INSTALL:
+       case DPLANE_OP_TC_UPDATE:
+       case DPLANE_OP_TC_DELETE:
                break;
        }
 }
index 79eb99ddf9448252641e678af8d9cb5b54dc73b4..03bda8cc33e580d6d6689c2c01b876ca8dfd4be8 100644 (file)
@@ -4391,6 +4391,11 @@ static void rib_process_dplane_results(struct thread *thread)
                                zebra_if_dplane_result(ctx);
                                break;
 
+                       case DPLANE_OP_TC_INSTALL:
+                       case DPLANE_OP_TC_UPDATE:
+                       case DPLANE_OP_TC_DELETE:
+                               break;
+
                        /* Some op codes not handled here */
                        case DPLANE_OP_ADDR_INSTALL:
                        case DPLANE_OP_ADDR_UNINSTALL: