summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--zebra/kernel_netlink.c65
-rw-r--r--zebra/kernel_socket.c20
-rw-r--r--zebra/main.c12
-rw-r--r--zebra/rt.h2
-rw-r--r--zebra/zebra_dplane.c895
-rw-r--r--zebra/zebra_dplane.h114
-rw-r--r--zebra/zebra_ns.c39
-rw-r--r--zebra/zebra_ns.h8
-rw-r--r--zebra/zebra_rib.c7
-rw-r--r--zebra/zebra_vty.c18
-rw-r--r--zebra/zebra_vxlan.c132
-rw-r--r--zebra/zebra_vxlan.h4
12 files changed, 1141 insertions, 175 deletions
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
index 0772c59b92..afc3985854 100644
--- a/zebra/kernel_netlink.c
+++ b/zebra/kernel_netlink.c
@@ -396,7 +396,7 @@ static int kernel_read(struct thread *thread)
/*
* Filter out messages from self that occur on listener socket,
- * caused by our actions on the command socket
+ * caused by our actions on the command socket(s)
*
* When we add new Netlink message types we probably
* do not need to add them here as that we are filtering
@@ -407,7 +407,7 @@ static int kernel_read(struct thread *thread)
* so that we only had to write one way to handle incoming
* address add/delete changes.
*/
-static void netlink_install_filter(int sock, __u32 pid)
+static void netlink_install_filter(int sock, __u32 pid, __u32 dplane_pid)
{
/*
* BPF_JUMP instructions and where you jump to are based upon
@@ -418,7 +418,8 @@ static void netlink_install_filter(int sock, __u32 pid)
struct sock_filter filter[] = {
/*
* Logic:
- * if (nlmsg_pid == pid) {
+ * if (nlmsg_pid == pid ||
+ * nlmsg_pid == dplane_pid) {
* if (the incoming nlmsg_type ==
* RTM_NEWADDR | RTM_DELADDR)
* keep this message
@@ -435,26 +436,30 @@ static void netlink_install_filter(int sock, __u32 pid)
/*
* 1: Compare to pid
*/
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 0, 4),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0),
/*
- * 2: Load the nlmsg_type into BPF register
+ * 2: Compare to dplane pid
+ */
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 4),
+ /*
+ * 3: Load the nlmsg_type into BPF register
*/
BPF_STMT(BPF_LD | BPF_ABS | BPF_H,
offsetof(struct nlmsghdr, nlmsg_type)),
/*
- * 3: Compare to RTM_NEWADDR
+ * 4: Compare to RTM_NEWADDR
*/
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 2, 0),
/*
- * 4: Compare to RTM_DELADDR
+ * 5: Compare to RTM_DELADDR
*/
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 1, 0),
/*
- * 5: This is the end state of we want to skip the
+ * 6: This is the end state of we want to skip the
* message
*/
BPF_STMT(BPF_RET | BPF_K, 0),
- /* 6: This is the end state of we want to keep
+ /* 7: This is the end state of we want to keep
* the message
*/
BPF_STMT(BPF_RET | BPF_K, 0xffff),
@@ -1102,6 +1107,15 @@ void kernel_init(struct zebra_ns *zns)
exit(-1);
}
+ snprintf(zns->netlink_dplane.name, sizeof(zns->netlink_dplane.name),
+ "netlink-dp (NS %u)", zns->ns_id);
+ zns->netlink_dplane.sock = -1;
+ if (netlink_socket(&zns->netlink_dplane, 0, zns->ns_id) < 0) {
+ zlog_err("Failure to create %s socket",
+ zns->netlink_dplane.name);
+ exit(-1);
+ }
+
/*
* SOL_NETLINK is not available on all platforms yet
* apparently. It's in bits/socket.h which I am not
@@ -1110,14 +1124,22 @@ void kernel_init(struct zebra_ns *zns)
#if defined SOL_NETLINK
/*
* Let's tell the kernel that we want to receive extended
- * ACKS over our command socket
+ * ACKS over our command socket(s)
*/
one = 1;
ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK,
&one, sizeof(one));
if (ret < 0)
- zlog_notice("Registration for extended ACK failed : %d %s",
+ zlog_notice("Registration for extended cmd ACK failed : %d %s",
+ errno, safe_strerror(errno));
+
+ one = 1;
+ ret = setsockopt(zns->netlink_dplane.sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one));
+
+ if (ret < 0)
+ zlog_notice("Registration for extended dp ACK failed : %d %s",
errno, safe_strerror(errno));
#endif
@@ -1130,12 +1152,18 @@ void kernel_init(struct zebra_ns *zns)
zlog_err("Can't set %s socket error: %s(%d)",
zns->netlink_cmd.name, safe_strerror(errno), errno);
+ if (fcntl(zns->netlink_dplane.sock, F_SETFL, O_NONBLOCK) < 0)
+ zlog_err("Can't set %s socket error: %s(%d)",
+ zns->netlink_dplane.name, safe_strerror(errno), errno);
+
/* Set receive buffer size if it's set from command line */
if (nl_rcvbufsize)
netlink_recvbuf(&zns->netlink, nl_rcvbufsize);
netlink_install_filter(zns->netlink.sock,
- zns->netlink_cmd.snl.nl_pid);
+ zns->netlink_cmd.snl.nl_pid,
+ zns->netlink_dplane.snl.nl_pid);
+
zns->t_netlink = NULL;
thread_add_read(zebrad.master, kernel_read, zns,
@@ -1144,7 +1172,7 @@ void kernel_init(struct zebra_ns *zns)
rt_netlink_init();
}
-void kernel_terminate(struct zebra_ns *zns)
+void kernel_terminate(struct zebra_ns *zns, bool complete)
{
THREAD_READ_OFF(zns->t_netlink);
@@ -1157,6 +1185,15 @@ void kernel_terminate(struct zebra_ns *zns)
close(zns->netlink_cmd.sock);
zns->netlink_cmd.sock = -1;
}
-}
+ /* During zebra shutdown, we need to leave the dataplane socket
+ * around until all work is done.
+ */
+ if (complete) {
+ if (zns->netlink_dplane.sock >= 0) {
+ close(zns->netlink_dplane.sock);
+ zns->netlink_dplane.sock = -1;
+ }
+ }
+}
#endif /* HAVE_NETLINK */
diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c
index 7af3083fd2..dcc22d2162 100644
--- a/zebra/kernel_socket.c
+++ b/zebra/kernel_socket.c
@@ -278,6 +278,11 @@ static const struct message rtm_flag_str[] = {{RTF_UP, "UP"},
/* Kernel routing update socket. */
int routing_sock = -1;
+/* Kernel dataplane routing update socket, used in the dataplane pthread
+ * context.
+ */
+int dplane_routing_sock = -1;
+
/* Yes I'm checking ugly routing socket behavior. */
/* #define DEBUG */
@@ -1136,7 +1141,7 @@ int rtm_write(int message, union sockunion *dest, union sockunion *mask,
char buf[512];
} msg;
- if (routing_sock < 0)
+ if (dplane_routing_sock < 0)
return ZEBRA_ERR_EPERM;
/* Clear and set rt_msghdr values */
@@ -1243,7 +1248,7 @@ int rtm_write(int message, union sockunion *dest, union sockunion *mask,
msg.rtm.rtm_msglen = pnt - (caddr_t)&msg;
- ret = write(routing_sock, &msg, msg.rtm.rtm_msglen);
+ ret = write(dplane_routing_sock, &msg, msg.rtm.rtm_msglen);
if (ret != msg.rtm.rtm_msglen) {
if (errno == EEXIST)
@@ -1390,6 +1395,9 @@ static void routing_socket(struct zebra_ns *zns)
{
frr_elevate_privs(&zserv_privs) {
routing_sock = ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id);
+
+ dplane_routing_sock =
+ ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id);
}
if (routing_sock < 0) {
@@ -1397,6 +1405,12 @@ static void routing_socket(struct zebra_ns *zns)
return;
}
+ if (dplane_routing_sock < 0) {
+ flog_err_sys(EC_LIB_SOCKET,
+ "Can't init kernel dataplane routing socket");
+ return;
+ }
+
/* XXX: Socket should be NONBLOCK, however as we currently
* discard failed writes, this will lead to inconsistencies.
* For now, socket must be blocking.
@@ -1415,7 +1429,7 @@ void kernel_init(struct zebra_ns *zns)
routing_socket(zns);
}
-void kernel_terminate(struct zebra_ns *zns)
+void kernel_terminate(struct zebra_ns *zns, bool complete)
{
return;
}
diff --git a/zebra/main.c b/zebra/main.c
index 5628b5e022..5b5ee8259a 100644
--- a/zebra/main.c
+++ b/zebra/main.c
@@ -172,7 +172,7 @@ static void sigint(void)
work_queue_free_and_null(&zebrad.lsp_process_q);
vrf_terminate();
- ns_walk_func(zebra_ns_disabled);
+ ns_walk_func(zebra_ns_early_shutdown);
zebra_ns_notify_close();
access_list_reset();
@@ -196,6 +196,9 @@ int zebra_finalize(struct thread *dummy)
{
zlog_info("Zebra final shutdown");
+ /* Final shutdown of ns resources */
+ ns_walk_func(zebra_ns_final_shutdown);
+
/* Stop dplane thread and finish any cleanup */
zebra_dplane_shutdown();
@@ -390,6 +393,9 @@ int main(int argc, char **argv)
vty_config_lockless();
zebrad.master = frr_init();
+ /* Initialize pthread library */
+ frr_pthread_init();
+
/* Zebra related initialize. */
zebra_router_init();
zserv_init();
@@ -445,8 +451,8 @@ int main(int argc, char **argv)
/* Needed for BSD routing socket. */
pid = getpid();
- /* Intialize pthread library */
- frr_pthread_init();
+ /* Start dataplane system */
+ zebra_dplane_start();
/* Start Zebra API server */
zserv_start(zserv_path);
diff --git a/zebra/rt.h b/zebra/rt.h
index 70ac6f635c..0317dc85ba 100644
--- a/zebra/rt.h
+++ b/zebra/rt.h
@@ -86,7 +86,7 @@ extern int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip);
*/
extern void interface_list(struct zebra_ns *zns);
extern void kernel_init(struct zebra_ns *zns);
-extern void kernel_terminate(struct zebra_ns *zns);
+extern void kernel_terminate(struct zebra_ns *zns, bool complete);
extern void macfdb_read(struct zebra_ns *zns);
extern void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
struct interface *br_if);
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index 3e61418b64..ba0f1b41aa 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -38,9 +38,14 @@ DEFINE_MTYPE(ZEBRA, DP_PROV, "Zebra DPlane Provider")
# define AOK 0
#endif
+/* Enable test dataplane provider */
+/*#define DPLANE_TEST_PROVIDER 1 */
+
/* Default value for max queued incoming updates */
const uint32_t DPLANE_DEFAULT_MAX_QUEUED = 200;
+/* Default value for new work per cycle */
+const uint32_t DPLANE_DEFAULT_NEW_WORK = 100;
/* Validation check macro for context blocks */
/* #define DPLANE_DEBUG 1 */
@@ -69,6 +74,12 @@ struct zebra_dplane_ctx {
/* Status on return */
enum zebra_dplane_result zd_status;
+ /* Dplane provider id */
+ uint32_t zd_provider;
+
+ /* Flags - used by providers, e.g. */
+ int zd_flags;
+
/* TODO -- internal/sub-operation status? */
enum zebra_dplane_result zd_remote_status;
enum zebra_dplane_result zd_kernel_status;
@@ -118,6 +129,12 @@ struct zebra_dplane_ctx {
TAILQ_ENTRY(zebra_dplane_ctx) zd_q_entries;
};
+/* Flag that can be set by a pre-kernel provider as a signal that an update
+ * should bypass the kernel.
+ */
+#define DPLANE_CTX_FLAG_NO_KERNEL 0x01
+
+
/*
* Registration block for one dataplane provider.
*/
@@ -131,16 +148,37 @@ struct zebra_dplane_provider {
/* Id value */
uint32_t dp_id;
+ /* Mutex */
+ pthread_mutex_t dp_mutex;
+
+ /* Plugin-provided extra data */
+ void *dp_data;
+
+ /* Flags */
+ int dp_flags;
+
dplane_provider_process_fp dp_fp;
dplane_provider_fini_fp dp_fini;
_Atomic uint32_t dp_in_counter;
+ _Atomic uint32_t dp_in_queued;
+ _Atomic uint32_t dp_in_max;
+ _Atomic uint32_t dp_out_counter;
+ _Atomic uint32_t dp_out_queued;
+ _Atomic uint32_t dp_out_max;
_Atomic uint32_t dp_error_counter;
- /* Embedded list linkage */
- TAILQ_ENTRY(zebra_dplane_provider) dp_q_providers;
+ /* Queue of contexts inbound to the provider */
+ struct dplane_ctx_q dp_ctx_in_q;
+ /* Queue of completed contexts outbound from the provider back
+ * towards the dataplane module.
+ */
+ struct dplane_ctx_q dp_ctx_out_q;
+
+ /* Embedded list linkage for provider objects */
+ TAILQ_ENTRY(zebra_dplane_provider) dp_prov_link;
};
/*
@@ -171,10 +209,19 @@ static struct zebra_dplane_globals {
/* Limit number of pending, unprocessed updates */
_Atomic uint32_t dg_max_queued_updates;
+ /* Limit number of new updates dequeued at once, to pace an
+ * incoming burst.
+ */
+ uint32_t dg_updates_per_cycle;
+
_Atomic uint32_t dg_routes_in;
_Atomic uint32_t dg_routes_queued;
_Atomic uint32_t dg_routes_queued_max;
_Atomic uint32_t dg_route_errors;
+ _Atomic uint32_t dg_update_yields;
+
+ /* Dataplane pthread */
+ struct frr_pthread *dg_pthread;
/* Event-delivery context 'master' for the dplane */
struct thread_master *dg_master;
@@ -188,19 +235,33 @@ static struct zebra_dplane_globals {
} zdplane_info;
/*
- * Lock and unlock for interactions with the zebra 'core'
+ * Lock and unlock for interactions with the zebra 'core' pthread
*/
#define DPLANE_LOCK() pthread_mutex_lock(&zdplane_info.dg_mutex)
-
#define DPLANE_UNLOCK() pthread_mutex_unlock(&zdplane_info.dg_mutex)
+
+/*
+ * Lock and unlock for individual providers
+ */
+#define DPLANE_PROV_LOCK(p) pthread_mutex_lock(&((p)->dp_mutex))
+#define DPLANE_PROV_UNLOCK(p) pthread_mutex_unlock(&((p)->dp_mutex))
+
/* Prototypes */
-static int dplane_route_process(struct thread *event);
+static int dplane_thread_loop(struct thread *event);
+static void dplane_info_from_zns(struct zebra_dplane_info *ns_info,
+ struct zebra_ns *zns);
/*
* Public APIs
*/
+/* Obtain thread_master for dataplane thread */
+struct thread_master *dplane_get_thread_master(void)
+{
+ return zdplane_info.dg_master;
+}
+
/*
* Allocate a dataplane update context
*/
@@ -249,7 +310,7 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx)
*/
void dplane_ctx_fini(struct zebra_dplane_ctx **pctx)
{
- /* TODO -- enqueue for next provider; for now, just free */
+ /* TODO -- maintain pool; for now, just free */
dplane_ctx_free(pctx);
}
@@ -260,15 +321,27 @@ void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q,
TAILQ_INSERT_TAIL(q, (struct zebra_dplane_ctx *)ctx, zd_q_entries);
}
+/* Append a list of context blocks to another list */
+void dplane_ctx_list_append(struct dplane_ctx_q *to_list,
+ struct dplane_ctx_q *from_list)
+{
+ if (TAILQ_FIRST(from_list)) {
+ TAILQ_CONCAT(to_list, from_list, zd_q_entries);
+
+ /* And clear 'from' list */
+ TAILQ_INIT(from_list);
+ }
+}
+
/* Dequeue a context block from the head of a list */
-void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp)
+struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_q *q)
{
struct zebra_dplane_ctx *ctx = TAILQ_FIRST(q);
if (ctx)
TAILQ_REMOVE(q, ctx, zd_q_entries);
- *ctxp = ctx;
+ return ctx;
}
/*
@@ -282,6 +355,38 @@ enum zebra_dplane_result dplane_ctx_get_status(
return ctx->zd_status;
}
+void dplane_ctx_set_status(struct zebra_dplane_ctx *ctx,
+ enum zebra_dplane_result status)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ ctx->zd_status = status;
+}
+
+/* Retrieve last/current provider id */
+uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+ return ctx->zd_provider;
+}
+
+/* Providers run before the kernel can control whether a kernel
+ * update should be done.
+ */
+void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ SET_FLAG(ctx->zd_flags, DPLANE_CTX_FLAG_NO_KERNEL);
+}
+
+bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return CHECK_FLAG(ctx->zd_flags, DPLANE_CTX_FLAG_NO_KERNEL);
+}
+
enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
@@ -441,7 +546,7 @@ uint16_t dplane_ctx_get_old_instance(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
- return ctx->zd_instance;
+ return ctx->zd_old_instance;
}
uint32_t dplane_ctx_get_metric(const struct zebra_dplane_ctx *ctx)
@@ -514,6 +619,7 @@ const struct zebra_dplane_info *dplane_ctx_get_ns(
* End of dplane context accessors
*/
+
/*
* Retrieve the limit on the number of pending, unprocessed updates.
*/
@@ -565,6 +671,7 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx,
goto done;
ctx->zd_op = op;
+ ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS;
ctx->zd_type = re->type;
ctx->zd_old_type = re->type;
@@ -601,16 +708,17 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx,
zvrf = vrf_info_lookup(re->vrf_id);
zns = zvrf->zns;
- zebra_dplane_info_from_zns(&(ctx->zd_ns_info), zns, true /*is_cmd*/);
+ /* Internal copy helper */
+ dplane_info_from_zns(&(ctx->zd_ns_info), zns);
#if defined(HAVE_NETLINK)
/* Increment message counter after copying to context struct - may need
* two messages in some 'update' cases.
*/
if (op == DPLANE_OP_ROUTE_UPDATE)
- zns->netlink_cmd.seq += 2;
+ zns->netlink_dplane.seq += 2;
else
- zns->netlink_cmd.seq++;
+ zns->netlink_dplane.seq++;
#endif /* NETLINK*/
/* Copy nexthops; recursive info is included too */
@@ -618,7 +726,7 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx,
/* TODO -- maybe use array of nexthops to avoid allocs? */
- /* Ensure that the dplane's nexthop flag is clear. */
+ /* Ensure that the dplane's nexthops flags are clear. */
for (ALL_NEXTHOPS(ctx->zd_ng, nexthop))
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
@@ -675,35 +783,12 @@ static int dplane_route_enqueue(struct zebra_dplane_ctx *ctx)
}
/* Ensure that an event for the dataplane thread is active */
- thread_add_event(zdplane_info.dg_master, dplane_route_process, NULL, 0,
- &zdplane_info.dg_t_update);
-
- ret = AOK;
+ ret = dplane_provider_work_ready();
return ret;
}
/*
- * Attempt to dequeue a route-update block
- */
-static struct zebra_dplane_ctx *dplane_route_dequeue(void)
-{
- struct zebra_dplane_ctx *ctx = NULL;
-
- DPLANE_LOCK();
- {
- ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q);
- if (ctx) {
- TAILQ_REMOVE(&zdplane_info.dg_route_ctx_q,
- ctx, zd_q_entries);
- }
- }
- DPLANE_UNLOCK();
-
- return ctx;
-}
-
-/*
* Utility that prepares a route update and enqueues it for processing
*/
static enum zebra_dplane_result
@@ -826,67 +911,14 @@ done:
}
/*
- * Event handler function for routing updates
- */
-static int dplane_route_process(struct thread *event)
-{
- enum zebra_dplane_result res;
- struct zebra_dplane_ctx *ctx;
-
- while (1) {
- /* Check for shutdown */
- if (!zdplane_info.dg_run)
- break;
-
- /* TODO -- limit number of updates per cycle? */
- ctx = dplane_route_dequeue();
- if (ctx == NULL)
- break;
-
- /* Update counter */
- atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, 1,
- memory_order_relaxed);
-
- if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) {
- char dest_str[PREFIX_STRLEN];
-
- prefix2str(dplane_ctx_get_dest(ctx),
- dest_str, sizeof(dest_str));
-
- zlog_debug("%u:%s Dplane route update ctx %p op %s",
- dplane_ctx_get_vrf(ctx), dest_str,
- ctx, dplane_op2str(dplane_ctx_get_op(ctx)));
- }
-
- /* TODO -- support series of providers */
-
- /* Initially, just doing kernel-facing update here */
- res = kernel_route_update(ctx);
-
- if (res != ZEBRA_DPLANE_REQUEST_SUCCESS)
- atomic_fetch_add_explicit(&zdplane_info.dg_route_errors,
- 1, memory_order_relaxed);
-
- ctx->zd_status = res;
-
- /* Enqueue result to zebra main context */
- zdplane_info.dg_results_cb(ctx);
-
- ctx = NULL;
- }
-
- return 0;
-}
-
-/*
* Handler for 'show dplane'
*/
int dplane_show_helper(struct vty *vty, bool detailed)
{
- uint64_t queued, limit, queue_max, errs, incoming;
+ uint64_t queued, queue_max, limit, errs, incoming, yields;
/* Using atomics because counters are being changed in different
- * contexts.
+ * pthread contexts.
*/
incoming = atomic_load_explicit(&zdplane_info.dg_routes_in,
memory_order_relaxed);
@@ -898,12 +930,16 @@ int dplane_show_helper(struct vty *vty, bool detailed)
memory_order_relaxed);
errs = atomic_load_explicit(&zdplane_info.dg_route_errors,
memory_order_relaxed);
+ yields = atomic_load_explicit(&zdplane_info.dg_update_yields,
+ memory_order_relaxed);
- vty_out(vty, "Route updates: %"PRIu64"\n", incoming);
+ vty_out(vty, "Zebra dataplane:\nRoute updates: %"PRIu64"\n",
+ incoming);
vty_out(vty, "Route update errors: %"PRIu64"\n", errs);
vty_out(vty, "Route update queue limit: %"PRIu64"\n", limit);
vty_out(vty, "Route update queue depth: %"PRIu64"\n", queued);
vty_out(vty, "Route update queue max: %"PRIu64"\n", queue_max);
+ vty_out(vty, "Route update yields: %"PRIu64"\n", yields);
return CMD_SUCCESS;
}
@@ -913,8 +949,35 @@ int dplane_show_helper(struct vty *vty, bool detailed)
*/
int dplane_show_provs_helper(struct vty *vty, bool detailed)
{
- vty_out(vty, "Zebra dataplane providers:%s\n",
- (detailed ? " (detailed)" : ""));
+ struct zebra_dplane_provider *prov;
+ uint64_t in, in_max, out, out_max;
+
+ vty_out(vty, "Zebra dataplane providers:\n");
+
+ DPLANE_LOCK();
+ prov = TAILQ_FIRST(&zdplane_info.dg_providers_q);
+ DPLANE_UNLOCK();
+
+ /* Show counters, useful info from each registered provider */
+ while (prov) {
+
+ in = atomic_load_explicit(&prov->dp_in_counter,
+ memory_order_relaxed);
+ in_max = atomic_load_explicit(&prov->dp_in_max,
+ memory_order_relaxed);
+ out = atomic_load_explicit(&prov->dp_out_counter,
+ memory_order_relaxed);
+ out_max = atomic_load_explicit(&prov->dp_out_max,
+ memory_order_relaxed);
+
+ vty_out(vty, "%s (%u): in: %"PRIu64", q_max: %"PRIu64", "
+ "out: %"PRIu64", q_max: %"PRIu64"\n",
+ prov->dp_name, prov->dp_id, in, in_max, out, out_max);
+
+ DPLANE_LOCK();
+ prov = TAILQ_NEXT(prov, dp_prov_link);
+ DPLANE_UNLOCK();
+ }
return CMD_SUCCESS;
}
@@ -923,9 +986,11 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed)
* Provider registration
*/
int dplane_provider_register(const char *name,
- enum dplane_provider_prio_e prio,
+ enum dplane_provider_prio prio,
+ int flags,
dplane_provider_process_fp fp,
- dplane_provider_fini_fp fini_fp)
+ dplane_provider_fini_fp fini_fp,
+ void *data)
{
int ret = 0;
struct zebra_dplane_provider *p, *last;
@@ -949,37 +1014,201 @@ int dplane_provider_register(const char *name,
goto done;
}
- strncpy(p->dp_name, name, DPLANE_PROVIDER_NAMELEN);
- p->dp_name[DPLANE_PROVIDER_NAMELEN] = '\0'; /* Belt-and-suspenders */
+ pthread_mutex_init(&(p->dp_mutex), NULL);
+ TAILQ_INIT(&(p->dp_ctx_in_q));
+ TAILQ_INIT(&(p->dp_ctx_out_q));
p->dp_priority = prio;
p->dp_fp = fp;
p->dp_fini = fini_fp;
+ p->dp_data = data;
- /* Lock the lock - the dplane pthread may be running */
+ /* Lock - the dplane pthread may be running */
DPLANE_LOCK();
p->dp_id = ++zdplane_info.dg_provider_id;
+ if (name)
+ strlcpy(p->dp_name, name, DPLANE_PROVIDER_NAMELEN);
+ else
+ snprintf(p->dp_name, DPLANE_PROVIDER_NAMELEN,
+ "provider-%u", p->dp_id);
+
/* Insert into list ordered by priority */
- TAILQ_FOREACH(last, &zdplane_info.dg_providers_q, dp_q_providers) {
+ TAILQ_FOREACH(last, &zdplane_info.dg_providers_q, dp_prov_link) {
if (last->dp_priority > p->dp_priority)
break;
}
if (last)
- TAILQ_INSERT_BEFORE(last, p, dp_q_providers);
+ TAILQ_INSERT_BEFORE(last, p, dp_prov_link);
else
TAILQ_INSERT_TAIL(&zdplane_info.dg_providers_q, p,
- dp_q_providers);
+ dp_prov_link);
/* And unlock */
DPLANE_UNLOCK();
+ if (IS_ZEBRA_DEBUG_DPLANE)
+ zlog_debug("dplane: registered new provider '%s' (%u), prio %d",
+ p->dp_name, p->dp_id, p->dp_priority);
+
done:
return ret;
}
+/* Accessors for provider attributes */
+const char *dplane_provider_get_name(const struct zebra_dplane_provider *prov)
+{
+ return prov->dp_name;
+}
+
+uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov)
+{
+ return prov->dp_id;
+}
+
+void *dplane_provider_get_data(const struct zebra_dplane_provider *prov)
+{
+ return prov->dp_data;
+}
+
+int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov)
+{
+ return zdplane_info.dg_updates_per_cycle;
+}
+
+/* Lock/unlock a provider's mutex - iff the provider was registered with
+ * the THREADED flag.
+ */
+void dplane_provider_lock(struct zebra_dplane_provider *prov)
+{
+ if (dplane_provider_is_threaded(prov))
+ DPLANE_PROV_LOCK(prov);
+}
+
+void dplane_provider_unlock(struct zebra_dplane_provider *prov)
+{
+ if (dplane_provider_is_threaded(prov))
+ DPLANE_PROV_UNLOCK(prov);
+}
+
+/*
+ * Dequeue and maintain associated counter
+ */
+struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx(
+ struct zebra_dplane_provider *prov)
+{
+ struct zebra_dplane_ctx *ctx = NULL;
+
+ dplane_provider_lock(prov);
+
+ ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q));
+ if (ctx) {
+ TAILQ_REMOVE(&(prov->dp_ctx_in_q), ctx, zd_q_entries);
+
+ atomic_fetch_sub_explicit(&prov->dp_in_queued, 1,
+ memory_order_relaxed);
+ }
+
+ dplane_provider_unlock(prov);
+
+ return ctx;
+}
+
+/*
+ * Dequeue work to a list, return count
+ */
+int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov,
+ struct dplane_ctx_q *listp)
+{
+ int limit, ret;
+ struct zebra_dplane_ctx *ctx;
+
+ limit = zdplane_info.dg_updates_per_cycle;
+
+ dplane_provider_lock(prov);
+
+ for (ret = 0; ret < limit; ret++) {
+ ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q));
+ if (ctx) {
+ TAILQ_REMOVE(&(prov->dp_ctx_in_q), ctx, zd_q_entries);
+
+ TAILQ_INSERT_TAIL(listp, ctx, zd_q_entries);
+ } else {
+ break;
+ }
+ }
+
+ if (ret > 0)
+ atomic_fetch_sub_explicit(&prov->dp_in_queued, ret,
+ memory_order_relaxed);
+
+ dplane_provider_unlock(prov);
+
+ return ret;
+}
+
+/*
+ * Enqueue and maintain associated counter
+ */
+void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov,
+ struct zebra_dplane_ctx *ctx)
+{
+ dplane_provider_lock(prov);
+
+ TAILQ_INSERT_TAIL(&(prov->dp_ctx_out_q), ctx,
+ zd_q_entries);
+
+ dplane_provider_unlock(prov);
+
+ atomic_fetch_add_explicit(&(prov->dp_out_counter), 1,
+ memory_order_relaxed);
+}
+
+/*
+ * Accessor for provider object
+ */
+bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov)
+{
+ return (prov->dp_flags & DPLANE_PROV_FLAG_THREADED);
+}
+
+/*
+ * Internal helper that copies information from a zebra ns object; this is
+ * called in the zebra main pthread context as part of dplane ctx init.
+ */
+static void dplane_info_from_zns(struct zebra_dplane_info *ns_info,
+ struct zebra_ns *zns)
+{
+ ns_info->ns_id = zns->ns_id;
+
+#if defined(HAVE_NETLINK)
+ ns_info->is_cmd = true;
+ ns_info->nls = zns->netlink_dplane;
+#endif /* NETLINK */
+}
+
+/*
+ * Provider api to signal that work/events are available
+ * for the dataplane pthread.
+ */
+int dplane_provider_work_ready(void)
+{
+ /* Note that during zebra startup, we may be offered work before
+ * the dataplane pthread (and thread-master) are ready. We want to
+ * enqueue the work, but the event-scheduling machinery may not be
+ * available.
+ */
+ if (zdplane_info.dg_run) {
+ thread_add_event(zdplane_info.dg_master,
+ dplane_thread_loop, NULL, 0,
+ &zdplane_info.dg_t_update);
+ }
+
+ return AOK;
+}
+
/*
* Zebra registers a results callback with the dataplane system
*/
@@ -990,27 +1219,163 @@ int dplane_results_register(dplane_results_fp fp)
}
/*
- * Initialize the dataplane module during startup, internal/private version
+ * Kernel dataplane provider
*/
-static void zebra_dplane_init_internal(struct zebra_t *zebra)
+
+/*
+ * Kernel provider callback
+ */
+static int kernel_dplane_process_func(struct zebra_dplane_provider *prov)
{
- memset(&zdplane_info, 0, sizeof(zdplane_info));
+ enum zebra_dplane_result res;
+ struct zebra_dplane_ctx *ctx;
+ int counter, limit;
- pthread_mutex_init(&zdplane_info.dg_mutex, NULL);
+ limit = dplane_provider_get_work_limit(prov);
- TAILQ_INIT(&zdplane_info.dg_route_ctx_q);
- TAILQ_INIT(&zdplane_info.dg_providers_q);
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane provider '%s': processing",
+ dplane_provider_get_name(prov));
- zdplane_info.dg_max_queued_updates = DPLANE_DEFAULT_MAX_QUEUED;
+ for (counter = 0; counter < limit; counter++) {
+
+ ctx = dplane_provider_dequeue_in_ctx(prov);
+ if (ctx == NULL)
+ break;
- /* TODO -- register default kernel 'provider' during init */
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) {
+ char dest_str[PREFIX_STRLEN];
- zdplane_info.dg_run = true;
+ prefix2str(dplane_ctx_get_dest(ctx),
+ dest_str, sizeof(dest_str));
+
+ zlog_debug("%u:%s Dplane route update ctx %p op %s",
+ dplane_ctx_get_vrf(ctx), dest_str,
+ ctx, dplane_op2str(dplane_ctx_get_op(ctx)));
+ }
+
+ /* Call into the synchronous kernel-facing code here */
+ res = kernel_route_update(ctx);
+
+ if (res != ZEBRA_DPLANE_REQUEST_SUCCESS)
+ atomic_fetch_add_explicit(
+ &zdplane_info.dg_route_errors, 1,
+ memory_order_relaxed);
+
+ dplane_ctx_set_status(ctx, res);
+
+ dplane_provider_enqueue_out_ctx(prov, ctx);
+ }
+
+ /* Ensure that we'll run the work loop again if there's still
+ * more work to do.
+ */
+ if (counter >= limit) {
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane provider '%s' reached max updates %d",
+ dplane_provider_get_name(prov), counter);
+
+ atomic_fetch_add_explicit(&zdplane_info.dg_update_yields,
+ 1, memory_order_relaxed);
+
+ dplane_provider_work_ready();
+ }
+
+ return 0;
+}
+
+#if DPLANE_TEST_PROVIDER
+
+/*
+ * Test dataplane provider plugin
+ */
+
+/*
+ * Test provider process callback
+ */
+static int test_dplane_process_func(struct zebra_dplane_provider *prov)
+{
+ struct zebra_dplane_ctx *ctx;
+ int counter, limit;
+
+ /* Just moving from 'in' queue to 'out' queue */
+
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane provider '%s': processing",
+ dplane_provider_get_name(prov));
+
+ limit = dplane_provider_get_work_limit(prov);
+
+ for (counter = 0; counter < limit; counter++) {
+
+ ctx = dplane_provider_dequeue_in_ctx(prov);
+ if (ctx == NULL)
+ break;
- /* TODO -- start dataplane pthread. We're using the zebra
- * core/main thread temporarily
+ dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
+
+ dplane_provider_enqueue_out_ctx(prov, ctx);
+ }
+
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane provider '%s': processed %d",
+ dplane_provider_get_name(prov), counter);
+
+ /* Ensure that we'll run the work loop again if there's still
+ * more work to do.
*/
- zdplane_info.dg_master = zebra->master;
+ if (counter >= limit)
+ dplane_provider_work_ready();
+
+ return 0;
+}
+
+/*
+ * Test provider shutdown/fini callback
+ */
+static int test_dplane_shutdown_func(struct zebra_dplane_provider *prov,
+ bool early)
+{
+ if (IS_ZEBRA_DEBUG_DPLANE)
+ zlog_debug("dplane provider '%s': %sshutdown",
+ dplane_provider_get_name(prov),
+ early ? "early " : "");
+
+ return 0;
+}
+#endif /* DPLANE_TEST_PROVIDER */
+
+/*
+ * Register default kernel provider
+ */
+static void dplane_provider_init(void)
+{
+ int ret;
+
+ ret = dplane_provider_register("Kernel",
+ DPLANE_PRIO_KERNEL,
+ DPLANE_PROV_FLAGS_DEFAULT,
+ kernel_dplane_process_func,
+ NULL,
+ NULL);
+
+ if (ret != AOK)
+ zlog_err("Unable to register kernel dplane provider: %d",
+ ret);
+
+#if DPLANE_TEST_PROVIDER
+ /* Optional test provider ... */
+ ret = dplane_provider_register("Test",
+ DPLANE_PRIO_PRE_KERNEL,
+ DPLANE_PROV_FLAGS_DEFAULT,
+ test_dplane_process_func,
+ test_dplane_shutdown_func,
+ NULL /* data */);
+
+ if (ret != AOK)
+ zlog_err("Unable to register test dplane provider: %d",
+ ret);
+#endif /* DPLANE_TEST_PROVIDER */
}
/* Indicates zebra shutdown/exit is in progress. Some operations may be
@@ -1026,7 +1391,7 @@ bool dplane_is_in_shutdown(void)
* early during zebra shutdown, as a signal to stop new work and prepare
* for updates generated by shutdown/cleanup activity, as zebra tries to
* remove everything it's responsible for.
- * NB: This runs in the main zebra thread context.
+ * NB: This runs in the main zebra pthread context.
*/
void zebra_dplane_pre_finish(void)
{
@@ -1035,7 +1400,7 @@ void zebra_dplane_pre_finish(void)
zdplane_info.dg_is_shutdown = true;
- /* Notify provider(s) of pending shutdown */
+ /* TODO -- Notify provider(s) of pending shutdown */
}
/*
@@ -1044,16 +1409,48 @@ void zebra_dplane_pre_finish(void)
*/
static bool dplane_work_pending(void)
{
+ bool ret = false;
struct zebra_dplane_ctx *ctx;
+ struct zebra_dplane_provider *prov;
- /* TODO -- just checking incoming/pending work for now */
+ /* TODO -- just checking incoming/pending work for now, must check
+ * providers
+ */
DPLANE_LOCK();
{
ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q);
+ prov = TAILQ_FIRST(&zdplane_info.dg_providers_q);
}
DPLANE_UNLOCK();
- return (ctx != NULL);
+ if (ctx != NULL) {
+ ret = true;
+ goto done;
+ }
+
+ while (prov) {
+
+ dplane_provider_lock(prov);
+
+ ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q));
+ if (ctx == NULL)
+ ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q));
+
+ dplane_provider_unlock(prov);
+
+ if (ctx != NULL)
+ break;
+
+ DPLANE_LOCK();
+ prov = TAILQ_NEXT(prov, dp_prov_link);
+ DPLANE_UNLOCK();
+ }
+
+ if (ctx != NULL)
+ ret = true;
+
+done:
+ return ret;
}
/*
@@ -1108,6 +1505,205 @@ void zebra_dplane_finish(void)
}
/*
+ * Main dataplane pthread event loop. The thread takes new incoming work
+ * and offers it to the first provider. It then iterates through the
+ * providers, taking complete work from each one and offering it
+ * to the next in order. At each step, a limited number of updates are
+ * processed during a cycle in order to provide some fairness.
+ *
+ * This loop through the providers is only run once, so that the dataplane
+ * pthread can look for other pending work - such as i/o work on behalf of
+ * providers.
+ */
+static int dplane_thread_loop(struct thread *event)
+{
+ struct dplane_ctx_q work_list;
+ struct dplane_ctx_q error_list;
+ struct zebra_dplane_provider *prov;
+ struct zebra_dplane_ctx *ctx, *tctx;
+ int limit, counter, error_counter;
+ uint64_t curr, high;
+
+ /* Capture work limit per cycle */
+ limit = zdplane_info.dg_updates_per_cycle;
+
+ /* Init temporary lists used to move contexts among providers */
+ TAILQ_INIT(&work_list);
+ TAILQ_INIT(&error_list);
+ error_counter = 0;
+
+ /* Check for zebra shutdown */
+ if (!zdplane_info.dg_run)
+ goto done;
+
+ /* Dequeue some incoming work from zebra (if any) onto the temporary
+ * working list.
+ */
+ DPLANE_LOCK();
+
+ /* Locate initial registered provider */
+ prov = TAILQ_FIRST(&zdplane_info.dg_providers_q);
+
+ /* Move new work from incoming list to temp list */
+ for (counter = 0; counter < limit; counter++) {
+ ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q);
+ if (ctx) {
+ TAILQ_REMOVE(&zdplane_info.dg_route_ctx_q, ctx,
+ zd_q_entries);
+
+ ctx->zd_provider = prov->dp_id;
+
+ TAILQ_INSERT_TAIL(&work_list, ctx, zd_q_entries);
+ } else {
+ break;
+ }
+ }
+
+ DPLANE_UNLOCK();
+
+ atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter,
+ memory_order_relaxed);
+
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane: incoming new work counter: %d", counter);
+
+ /* Iterate through the registered providers, offering new incoming
+ * work. If the provider has outgoing work in its queue, take that
+ * work for the next provider
+ */
+ while (prov) {
+
+ /* At each iteration, the temporary work list has 'counter'
+ * items.
+ */
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane enqueues %d new work to provider '%s'",
+ counter, dplane_provider_get_name(prov));
+
+ /* Capture current provider id in each context; check for
+ * error status.
+ */
+ TAILQ_FOREACH_SAFE(ctx, &work_list, zd_q_entries, tctx) {
+ if (dplane_ctx_get_status(ctx) ==
+ ZEBRA_DPLANE_REQUEST_SUCCESS) {
+ ctx->zd_provider = prov->dp_id;
+ } else {
+ /*
+ * TODO -- improve error-handling: recirc
+ * errors backwards so that providers can
+ * 'undo' their work (if they want to)
+ */
+
+ /* Move to error list; will be returned
+ * zebra main.
+ */
+ TAILQ_REMOVE(&work_list, ctx, zd_q_entries);
+ TAILQ_INSERT_TAIL(&error_list,
+ ctx, zd_q_entries);
+ error_counter++;
+ }
+ }
+
+ /* Enqueue new work to the provider */
+ dplane_provider_lock(prov);
+
+ if (TAILQ_FIRST(&work_list))
+ TAILQ_CONCAT(&(prov->dp_ctx_in_q), &work_list,
+ zd_q_entries);
+
+ atomic_fetch_add_explicit(&prov->dp_in_counter, counter,
+ memory_order_relaxed);
+ atomic_fetch_add_explicit(&prov->dp_in_queued, counter,
+ memory_order_relaxed);
+ curr = atomic_load_explicit(&prov->dp_in_queued,
+ memory_order_relaxed);
+ high = atomic_load_explicit(&prov->dp_in_max,
+ memory_order_relaxed);
+ if (curr > high)
+ atomic_store_explicit(&prov->dp_in_max, curr,
+ memory_order_relaxed);
+
+ dplane_provider_unlock(prov);
+
+ /* Reset the temp list (though the 'concat' may have done this
+ * already), and the counter
+ */
+ TAILQ_INIT(&work_list);
+ counter = 0;
+
+ /* Call into the provider code. Note that this is
+ * unconditional: we offer to do work even if we don't enqueue
+ * any _new_ work.
+ */
+ (*prov->dp_fp)(prov);
+
+ /* Check for zebra shutdown */
+ if (!zdplane_info.dg_run)
+ break;
+
+ /* Dequeue completed work from the provider */
+ dplane_provider_lock(prov);
+
+ while (counter < limit) {
+ ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q));
+ if (ctx) {
+ TAILQ_REMOVE(&(prov->dp_ctx_out_q), ctx,
+ zd_q_entries);
+
+ TAILQ_INSERT_TAIL(&work_list,
+ ctx, zd_q_entries);
+ counter++;
+ } else
+ break;
+ }
+
+ dplane_provider_unlock(prov);
+
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane dequeues %d completed work from provider %s",
+ counter, dplane_provider_get_name(prov));
+
+ /* Locate next provider */
+ DPLANE_LOCK();
+ prov = TAILQ_NEXT(prov, dp_prov_link);
+ DPLANE_UNLOCK();
+ }
+
+ /* After all providers have been serviced, enqueue any completed
+ * work and any errors back to zebra so it can process the results.
+ */
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("dplane has %d completed, %d errors, for zebra main",
+ counter, error_counter);
+
+ /*
+ * TODO -- I'd rather hand lists through the api to zebra main,
+ * to reduce the number of lock/unlock cycles
+ */
+ for (ctx = TAILQ_FIRST(&error_list); ctx; ) {
+ TAILQ_REMOVE(&error_list, ctx, zd_q_entries);
+
+ /* Call through to zebra main */
+ (*zdplane_info.dg_results_cb)(ctx);
+
+ ctx = TAILQ_FIRST(&error_list);
+ }
+
+
+ for (ctx = TAILQ_FIRST(&work_list); ctx; ) {
+ TAILQ_REMOVE(&work_list, ctx, zd_q_entries);
+
+ /* Call through to zebra main */
+ (*zdplane_info.dg_results_cb)(ctx);
+
+ ctx = TAILQ_FIRST(&work_list);
+ }
+
+done:
+ return 0;
+}
+
+/*
* Final phase of shutdown, after all work enqueued to dplane has been
* processed. This is called from the zebra main pthread context.
*/
@@ -1122,14 +1718,65 @@ void zebra_dplane_shutdown(void)
THREAD_OFF(zdplane_info.dg_t_update);
- /* TODO */
- /* frr_pthread_stop(...) */
+ frr_pthread_stop(zdplane_info.dg_pthread, NULL);
+
+ /* Destroy pthread */
+ frr_pthread_destroy(zdplane_info.dg_pthread);
+ zdplane_info.dg_pthread = NULL;
+ zdplane_info.dg_master = NULL;
+
+ /* TODO -- Notify provider(s) of final shutdown */
+
+ /* TODO -- Clean-up provider objects */
+
+ /* TODO -- Clean queue(s), free memory */
+}
+
+/*
+ * Initialize the dataplane module during startup, internal/private version
+ */
+static void zebra_dplane_init_internal(struct zebra_t *zebra)
+{
+ memset(&zdplane_info, 0, sizeof(zdplane_info));
+
+ pthread_mutex_init(&zdplane_info.dg_mutex, NULL);
+
+ TAILQ_INIT(&zdplane_info.dg_route_ctx_q);
+ TAILQ_INIT(&zdplane_info.dg_providers_q);
+
+ zdplane_info.dg_updates_per_cycle = DPLANE_DEFAULT_NEW_WORK;
+
+ zdplane_info.dg_max_queued_updates = DPLANE_DEFAULT_MAX_QUEUED;
+
+ /* Register default kernel 'provider' during init */
+ dplane_provider_init();
+}
+
+/*
+ * Start the dataplane pthread. This step needs to be run later than the
+ * 'init' step, in case zebra has fork-ed.
+ */
+void zebra_dplane_start(void)
+{
+ /* Start dataplane pthread */
+
+ struct frr_pthread_attr pattr = {
+ .start = frr_pthread_attr_default.start,
+ .stop = frr_pthread_attr_default.stop
+ };
+
+ zdplane_info.dg_pthread = frr_pthread_new(&pattr, "Zebra dplane thread",
+ "Zebra dplane");
- /* Notify provider(s) of final shutdown */
+ zdplane_info.dg_master = zdplane_info.dg_pthread->master;
- /* Clean-up provider objects */
+ zdplane_info.dg_run = true;
+
+ /* Enqueue an initial event for the dataplane pthread */
+ thread_add_event(zdplane_info.dg_master, dplane_thread_loop, NULL, 0,
+ &zdplane_info.dg_t_update);
- /* Clean queue(s) */
+ frr_pthread_run(zdplane_info.dg_pthread, NULL);
}
/*
diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
index 999e0f39e4..b6b2e64600 100644
--- a/zebra/zebra_dplane.h
+++ b/zebra/zebra_dplane.h
@@ -29,7 +29,6 @@
#include "zebra/rib.h"
#include "zebra/zserv.h"
-
/* Key netlink info from zebra ns */
struct zebra_dplane_info {
ns_id_t ns_id;
@@ -121,20 +120,28 @@ TAILQ_HEAD(dplane_ctx_q, zebra_dplane_ctx);
*/
void dplane_ctx_fini(struct zebra_dplane_ctx **pctx);
-/* Enqueue a context block to caller's tailq. This just exists so that the
+/* Enqueue a context block to caller's tailq. This exists so that the
* context struct can remain opaque.
*/
void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q,
const struct zebra_dplane_ctx *ctx);
+/* Append a list of context blocks to another list - again, just keeping
+ * the context struct opaque.
+ */
+void dplane_ctx_list_append(struct dplane_ctx_q *to_list,
+ struct dplane_ctx_q *from_list);
+
/* Dequeue a context block from the head of caller's tailq */
-void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp);
+struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_q *q);
/*
* Accessors for information from the context object
*/
enum zebra_dplane_result dplane_ctx_get_status(
const struct zebra_dplane_ctx *ctx);
+void dplane_ctx_set_status(struct zebra_dplane_ctx *ctx,
+ enum zebra_dplane_result status);
const char *dplane_res2str(enum zebra_dplane_result res);
enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx);
@@ -142,6 +149,15 @@ const char *dplane_op2str(enum dplane_op_e op);
const struct prefix *dplane_ctx_get_dest(const struct zebra_dplane_ctx *ctx);
+/* Retrieve last/current provider id */
+uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx *ctx);
+
+/* Providers running before the kernel can control whether a kernel
+ * update should be done.
+ */
+void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx *ctx);
+bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx *ctx);
+
/* Source prefix is a little special - use convention to return NULL
* to mean "no src prefix"
*/
@@ -212,9 +228,11 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed);
/*
- * Dataplane providers: modules that consume dataplane events.
+ * Dataplane providers: modules that process or consume dataplane events.
*/
+struct zebra_dplane_provider;
+
/* Support string name for a dataplane provider */
#define DPLANE_PROVIDER_NAMELEN 64
@@ -223,7 +241,7 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed);
* followed by the kernel, followed by some post-processing step (such as
* the fpm output stream.)
*/
-enum dplane_provider_prio_e {
+enum dplane_provider_prio {
DPLANE_PRIO_NONE = 0,
DPLANE_PRIO_PREPROCESS,
DPLANE_PRIO_PRE_KERNEL,
@@ -232,28 +250,81 @@ enum dplane_provider_prio_e {
DPLANE_PRIO_LAST
};
-/* Provider's entry-point to process a context block */
-typedef int (*dplane_provider_process_fp)(struct zebra_dplane_ctx *ctx);
+/* Provider's entry-point for incoming work, called in the context of the
+ * dataplane pthread. The dataplane pthread enqueues any new work to the
+ * provider's 'inbound' queue, then calls the callback. The dataplane
+ * then checks the provider's outbound queue.
+ */
+typedef int (*dplane_provider_process_fp)(struct zebra_dplane_provider *prov);
+
+/* Provider's entry-point for shutdown and cleanup. Called with 'early'
+ * during shutdown, to indicate that the dataplane subsystem is allowing
+ * work to move through the providers and finish. When called without 'early',
+ * the provider should release all resources (if it has any allocated).
+ */
+typedef int (*dplane_provider_fini_fp)(struct zebra_dplane_provider *prov,
+ bool early);
-/* Provider's entry-point for shutdown and cleanup */
-typedef int (*dplane_provider_fini_fp)(void);
+/* Flags values used during provider registration. */
+#define DPLANE_PROV_FLAGS_DEFAULT 0x0
-/* Provider registration */
+/* Provider will be spawning its own worker thread */
+#define DPLANE_PROV_FLAG_THREADED 0x1
+
+
+/* Provider registration: ordering or priority value, callbacks, and optional
+ * opaque data value.
+ */
int dplane_provider_register(const char *name,
- enum dplane_provider_prio_e prio,
+ enum dplane_provider_prio prio,
+ int flags,
dplane_provider_process_fp fp,
- dplane_provider_fini_fp fini_fp);
+ dplane_provider_fini_fp fini_fp,
+ void *data);
-/*
- * Results are returned to zebra core via a callback
+/* Accessors for provider attributes */
+const char *dplane_provider_get_name(const struct zebra_dplane_provider *prov);
+uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov);
+void *dplane_provider_get_data(const struct zebra_dplane_provider *prov);
+bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov);
+
+/* Lock/unlock a provider's mutex - iff the provider was registered with
+ * the THREADED flag.
*/
-typedef int (*dplane_results_fp)(const struct zebra_dplane_ctx *ctx);
+void dplane_provider_lock(struct zebra_dplane_provider *prov);
+void dplane_provider_unlock(struct zebra_dplane_provider *prov);
+
+/* Obtain thread_master for dataplane thread */
+struct thread_master *dplane_get_thread_master(void);
+
+/* Providers should (generally) limit number of updates per work cycle */
+int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov);
+
+/* Provider api to signal that work/events are available
+ * for the dataplane pthread.
+ */
+int dplane_provider_work_ready(void);
+
+/* Dequeue, maintain associated counter and locking */
+struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx(
+ struct zebra_dplane_provider *prov);
+
+/* Dequeue work to a list, maintain counter and locking, return count */
+int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov,
+ struct dplane_ctx_q *listp);
+
+/* Enqueue, maintain associated counter and locking */
+void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov,
+ struct zebra_dplane_ctx *ctx);
/*
* Zebra registers a results callback with the dataplane. The callback is
- * called in the dataplane thread context, so the expectation is that the
- * context is queued (or that processing is very limited).
+ * called in the dataplane pthread context, so the expectation is that the
+ * context is queued for the zebra main pthread or that processing
+ * is very limited.
*/
+typedef int (*dplane_results_fp)(struct zebra_dplane_ctx *ctx);
+
int dplane_results_register(dplane_results_fp fp);
/*
@@ -262,9 +333,16 @@ int dplane_results_register(dplane_results_fp fp);
*/
void zebra_dplane_init(void);
+/*
+ * Start the dataplane pthread. This step needs to be run later than the
+ * 'init' step, in case zebra has fork-ed.
+ */
+void zebra_dplane_start(void);
+
/* Finalize/cleanup apis, one called early as shutdown is starting,
* one called late at the end of zebra shutdown, and then one called
- * from the zebra main thread to stop the dplane thread free all resources.
+ * from the zebra main pthread to stop the dplane pthread and
+ * free all resources.
*
* Zebra expects to try to clean up all vrfs and all routes during
* shutdown, so the dplane must be available until very late.
diff --git a/zebra/zebra_ns.c b/zebra/zebra_ns.c
index e65f23dc8a..965c8c206c 100644
--- a/zebra/zebra_ns.c
+++ b/zebra/zebra_ns.c
@@ -47,6 +47,7 @@ DEFINE_MTYPE(ZEBRA, ZEBRA_NS, "Zebra Name Space")
static struct zebra_ns *dzns;
static int logicalrouter_config_write(struct vty *vty);
+static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete);
struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id)
{
@@ -111,7 +112,7 @@ int zebra_ns_disabled(struct ns *ns)
zlog_info("ZNS %s with id %u (disabled)", ns->name, ns->ns_id);
if (!zns)
return 0;
- return zebra_ns_disable(ns->ns_id, (void **)&zns);
+ return zebra_ns_disable_internal(zns, true);
}
/* Do global enable actions - open sockets, read kernel config etc. */
@@ -135,17 +136,18 @@ int zebra_ns_enable(ns_id_t ns_id, void **info)
return 0;
}
-int zebra_ns_disable(ns_id_t ns_id, void **info)
+/* Common handler for ns disable - this can be called during ns config,
+ * or during zebra shutdown.
+ */
+static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete)
{
- struct zebra_ns *zns = (struct zebra_ns *)(*info);
-
route_table_finish(zns->if_table);
zebra_vxlan_ns_disable(zns);
#if defined(HAVE_RTADV)
rtadv_terminate(zns);
#endif
- kernel_terminate(zns);
+ kernel_terminate(zns, complete);
table_manager_disable(zns->ns_id);
@@ -154,6 +156,33 @@ int zebra_ns_disable(ns_id_t ns_id, void **info)
return 0;
}
+/* During zebra shutdown, do partial cleanup while the async dataplane
+ * is still running.
+ */
+int zebra_ns_early_shutdown(struct ns *ns)
+{
+ struct zebra_ns *zns = ns->info;
+
+ if (zns == NULL)
+ return 0;
+
+ return zebra_ns_disable_internal(zns, false);
+}
+
+/* During zebra shutdown, do final cleanup
+ * after all dataplane work is complete.
+ */
+int zebra_ns_final_shutdown(struct ns *ns)
+{
+ struct zebra_ns *zns = ns->info;
+
+ if (zns == NULL)
+ return 0;
+
+ kernel_terminate(zns, true);
+
+ return 0;
+}
int zebra_ns_init(void)
{
diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
index c1a9b41b8d..d3592f8f30 100644
--- a/zebra/zebra_ns.h
+++ b/zebra/zebra_ns.h
@@ -46,8 +46,9 @@ struct zebra_ns {
ns_id_t ns_id;
#ifdef HAVE_NETLINK
- struct nlsock netlink; /* kernel messages */
- struct nlsock netlink_cmd; /* command channel */
+ struct nlsock netlink; /* kernel messages */
+ struct nlsock netlink_cmd; /* command channel */
+ struct nlsock netlink_dplane; /* dataplane channel */
struct thread *t_netlink;
#endif
@@ -62,7 +63,8 @@ struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id);
int zebra_ns_init(void);
int zebra_ns_enable(ns_id_t ns_id, void **info);
int zebra_ns_disabled(struct ns *ns);
-int zebra_ns_disable(ns_id_t ns_id, void **info);
+int zebra_ns_early_shutdown(struct ns *ns);
+int zebra_ns_final_shutdown(struct ns *ns);
int zebra_ns_config_write(struct vty *vty, struct ns *ns);
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 8285392527..f2d07310ee 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -1932,11 +1932,10 @@ static void rib_process_after(struct zebra_dplane_ctx *ctx)
op = dplane_ctx_get_op(ctx);
status = dplane_ctx_get_status(ctx);
- if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) {
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
zlog_debug("%u:%s Processing dplane ctx %p, op %s result %s",
dplane_ctx_get_vrf(ctx), dest_str, ctx,
dplane_op2str(op), dplane_res2str(status));
- }
if (op == DPLANE_OP_ROUTE_DELETE) {
/*
@@ -3267,7 +3266,7 @@ static int rib_process_dplane_results(struct thread *thread)
pthread_mutex_lock(&dplane_mutex);
{
/* Dequeue context block */
- dplane_ctx_dequeue(&rib_dplane_q, &ctx);
+ ctx = dplane_ctx_dequeue(&rib_dplane_q);
}
pthread_mutex_unlock(&dplane_mutex);
@@ -3289,7 +3288,7 @@ static int rib_process_dplane_results(struct thread *thread)
* the dataplane pthread. We enqueue the results here for processing by
* the main thread later.
*/
-static int rib_dplane_results(const struct zebra_dplane_ctx *ctx)
+static int rib_dplane_results(struct zebra_dplane_ctx *ctx)
{
/* Take lock controlling queue of results */
pthread_mutex_lock(&dplane_mutex);
diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c
index 536fdabcbc..ad6bfe5751 100644
--- a/zebra/zebra_vty.c
+++ b/zebra/zebra_vty.c
@@ -2181,6 +2181,23 @@ DEFUN (show_evpn_neigh_vni_all,
return CMD_SUCCESS;
}
+DEFUN (show_evpn_neigh_vni_all_detail, show_evpn_neigh_vni_all_detail_cmd,
+ "show evpn arp-cache vni all detail [json]",
+ SHOW_STR
+ "EVPN\n"
+ "ARP and ND cache\n"
+ "VxLAN Network Identifier\n"
+ "All VNIs\n"
+ "Neighbor details for all vnis in detail\n" JSON_STR)
+{
+ struct zebra_vrf *zvrf;
+ bool uj = use_json(argc, argv);
+
+ zvrf = vrf_info_lookup(VRF_DEFAULT);
+ zebra_vxlan_print_neigh_all_vni_detail(vty, zvrf, false, uj);
+ return CMD_SUCCESS;
+}
+
DEFUN (show_evpn_neigh_vni_neigh,
show_evpn_neigh_vni_neigh_cmd,
"show evpn arp-cache vni " CMD_VNI_RANGE " ip WORD [json]",
@@ -2892,6 +2909,7 @@ void zebra_vty_init(void)
install_element(VIEW_NODE, &show_evpn_mac_vni_all_dad_cmd);
install_element(VIEW_NODE, &show_evpn_neigh_vni_cmd);
install_element(VIEW_NODE, &show_evpn_neigh_vni_all_cmd);
+ install_element(VIEW_NODE, &show_evpn_neigh_vni_all_detail_cmd);
install_element(VIEW_NODE, &show_evpn_neigh_vni_neigh_cmd);
install_element(VIEW_NODE, &show_evpn_neigh_vni_vtep_cmd);
install_element(VIEW_NODE, &show_evpn_neigh_vni_dad_cmd);
diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c
index 86f6961118..a4c09b6f41 100644
--- a/zebra/zebra_vxlan.c
+++ b/zebra/zebra_vxlan.c
@@ -872,6 +872,33 @@ static void zvni_print_neigh_hash(struct hash_backet *backet, void *ctxt)
}
/*
+ * Print neighbor hash entry in detail - called for display of all neighbors.
+ */
+static void zvni_print_neigh_hash_detail(struct hash_backet *backet, void *ctxt)
+{
+ struct vty *vty;
+ json_object *json_vni = NULL, *json_row = NULL;
+ zebra_neigh_t *n;
+ char buf[INET6_ADDRSTRLEN];
+ struct neigh_walk_ctx *wctx = ctxt;
+
+ vty = wctx->vty;
+ json_vni = wctx->json;
+ n = (zebra_neigh_t *)backet->data;
+ if (!n)
+ return;
+
+ ipaddr2str(&n->ip, buf, sizeof(buf));
+ if (json_vni)
+ json_row = json_object_new_object();
+
+ zvni_print_neigh(n, vty, json_row);
+
+ if (json_vni)
+ json_object_object_add(json_vni, buf, json_row);
+}
+
+/*
* Print neighbors for all VNI.
*/
static void zvni_print_neigh_hash_all_vni(struct hash_backet *backet,
@@ -950,6 +977,80 @@ static void zvni_print_dad_neigh_hash(struct hash_backet *backet, void *ctxt)
zvni_print_neigh_hash(backet, ctxt);
}
+static void zvni_print_dad_neigh_hash_detail(struct hash_backet *backet,
+ void *ctxt)
+{
+ zebra_neigh_t *nbr;
+
+ nbr = (zebra_neigh_t *)backet->data;
+ if (!nbr)
+ return;
+
+ if (CHECK_FLAG(nbr->flags, ZEBRA_NEIGH_DUPLICATE))
+ zvni_print_neigh_hash_detail(backet, ctxt);
+}
+
+/*
+ * Print neighbors for all VNIs in detail.
+ */
+static void zvni_print_neigh_hash_all_vni_detail(struct hash_backet *backet,
+ void **args)
+{
+ struct vty *vty;
+ json_object *json = NULL, *json_vni = NULL;
+ zebra_vni_t *zvni;
+ uint32_t num_neigh;
+ struct neigh_walk_ctx wctx;
+ char vni_str[VNI_STR_LEN];
+ uint32_t print_dup;
+
+ vty = (struct vty *)args[0];
+ json = (json_object *)args[1];
+ print_dup = (uint32_t)(uintptr_t)args[2];
+
+ zvni = (zebra_vni_t *)backet->data;
+ if (!zvni) {
+ if (json)
+ vty_out(vty, "{}\n");
+ return;
+ }
+ num_neigh = hashcount(zvni->neigh_table);
+
+ if (print_dup && num_dup_detected_neighs(zvni) == 0)
+ return;
+
+ if (json == NULL) {
+ vty_out(vty,
+ "\nVNI %u #ARP (IPv4 and IPv6, local and remote) %u\n\n",
+ zvni->vni, num_neigh);
+ } else {
+ json_vni = json_object_new_object();
+ json_object_int_add(json_vni, "numArpNd", num_neigh);
+ snprintf(vni_str, VNI_STR_LEN, "%u", zvni->vni);
+ }
+ if (!num_neigh) {
+ if (json)
+ json_object_object_add(json, vni_str, json_vni);
+ return;
+ }
+
+ memset(&wctx, 0, sizeof(struct neigh_walk_ctx));
+ wctx.zvni = zvni;
+ wctx.vty = vty;
+ wctx.addr_width = 15;
+ wctx.json = json_vni;
+
+ if (print_dup)
+ hash_iterate(zvni->neigh_table,
+ zvni_print_dad_neigh_hash_detail, &wctx);
+ else
+ hash_iterate(zvni->neigh_table, zvni_print_neigh_hash_detail,
+ &wctx);
+
+ if (json)
+ json_object_object_add(json, vni_str, json_vni);
+}
+
/* print a specific next hop for an l3vni */
static void zl3vni_print_nh(zebra_neigh_t *n, struct vty *vty,
json_object *json)
@@ -5667,6 +5768,37 @@ void zebra_vxlan_print_neigh_all_vni(struct vty *vty, struct zebra_vrf *zvrf,
}
/*
+ * Display neighbors across all VNIs in detail(VTY command handler).
+ */
+void zebra_vxlan_print_neigh_all_vni_detail(struct vty *vty,
+ struct zebra_vrf *zvrf,
+ bool print_dup, bool use_json)
+{
+ json_object *json = NULL;
+ void *args[3];
+
+ if (!is_evpn_enabled())
+ return;
+
+ if (use_json)
+ json = json_object_new_object();
+
+ args[0] = vty;
+ args[1] = json;
+ args[2] = (void *)(ptrdiff_t)print_dup;
+
+ hash_iterate(zvrf->vni_table,
+ (void (*)(struct hash_backet *,
+ void *))zvni_print_neigh_hash_all_vni_detail,
+ args);
+ if (use_json) {
+ vty_out(vty, "%s\n", json_object_to_json_string_ext(
+ json, JSON_C_TO_STRING_PRETTY));
+ json_object_free(json);
+ }
+}
+
+/*
* Display specific neighbor for a VNI, if present (VTY command handler).
*/
void zebra_vxlan_print_specific_neigh_vni(struct vty *vty,
diff --git a/zebra/zebra_vxlan.h b/zebra/zebra_vxlan.h
index bf6e4290dc..de120ae0eb 100644
--- a/zebra/zebra_vxlan.h
+++ b/zebra/zebra_vxlan.h
@@ -111,6 +111,10 @@ extern void zebra_vxlan_print_neigh_all_vni(struct vty *vty,
struct zebra_vrf *zvrf,
bool print_dup,
bool use_json);
+extern void zebra_vxlan_print_neigh_all_vni_detail(struct vty *vty,
+ struct zebra_vrf *zvrf,
+ bool print_dup,
+ bool use_json);
extern void zebra_vxlan_print_specific_neigh_vni(struct vty *vty,
struct zebra_vrf *zvrf,
vni_t vni, struct ipaddr *ip,