]> git.puffer.fish Git - matthieu/frr.git/commitdiff
zebra: add netlink message batching infrastructure
authorJakub Urbańczyk <xthaid@gmail.com>
Wed, 15 Jul 2020 13:13:18 +0000 (15:13 +0200)
committerJakub Urbańczyk <xthaid@gmail.com>
Mon, 10 Aug 2020 19:33:00 +0000 (21:33 +0200)
Add helpers to batch several messages into one single netlink datagram.

Signed-off-by: Jakub Urbańczyk <xthaid@gmail.com>
zebra/kernel_netlink.c
zebra/kernel_netlink.h

index b35ed9df4312dec027c02fc4a99d0848dad9b78b..56b58ef8af49d56b959f9306b9de24f1c1ae9d95 100644 (file)
 #define RTPROT_MROUTED 17
 #endif
 
+#define NL_BATCH_TX_BUFSIZE (16 * NL_PKT_BUF_SIZE)
+
+/*
+ * For every request sent to the kernel that has failed we get an error message,
+ * which contains a standard netlink message header and the payload consisting
+ * of an error code and the original netlink mesage. So the receiving buffer
+ * must be at least as big as the transmitting buffer increased by some space
+ * for headers.
+ */
+#define NL_BATCH_RX_BUFSIZE (NL_BATCH_TX_BUFSIZE + NL_PKT_BUF_SIZE)
+
+/*
+ * We limit the batch's size to a number smaller than the length of the
+ * underlying buffer since the last message that wouldn't fit the batch would go
+ * over the upper boundary and then it would have to be encoded again into a new
+ * buffer. If the difference between the limit and the length of the buffer is
+ * big enough (bigger than the biggest Netlink message) then this situation
+ * won't occur.
+ */
+#define NL_BATCH_SEND_THRESHOLD (NL_BATCH_TX_BUFSIZE - NL_PKT_BUF_SIZE)
+
 static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
                                           {RTM_DELROUTE, "RTM_DELROUTE"},
                                           {RTM_GETROUTE, "RTM_GETROUTE"},
@@ -151,6 +172,36 @@ extern uint32_t nl_rcvbufsize;
 
 extern struct zebra_privs_t zserv_privs;
 
+char nl_batch_tx_buf[NL_BATCH_TX_BUFSIZE];
+char nl_batch_rx_buf[NL_BATCH_RX_BUFSIZE];
+
+DEFINE_MTYPE_STATIC(ZEBRA, NL_BATCH_ITEM, "Netlink batch items")
+
+PREDECL_LIST(nl_batch_list)
+
+struct nl_batch {
+       void *buf;
+       size_t bufsiz;
+       size_t limit;
+
+       void *buf_head;
+       size_t curlen;
+       size_t msgcnt;
+
+       const struct zebra_dplane_info *zns;
+       struct nl_batch_list_head items;
+};
+
+struct nl_msg_batch_item {
+       int seq;
+       struct zebra_dplane_ctx *ctx;
+       bool ignore_res;
+       bool failure;
+
+       struct nl_batch_list_item item;
+};
+
+DECLARE_LIST(nl_batch_list, struct nl_msg_batch_item, item)
 
 int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup)
 {
@@ -1080,6 +1131,205 @@ int netlink_request(struct nlsock *nl, void *req)
        return 0;
 }
 
+static int nl_batch_read_resp(struct nl_batch *bth)
+{
+       struct nlmsghdr *h;
+       struct sockaddr_nl snl;
+       struct msghdr msg;
+       int status;
+       const struct nlsock *nl;
+       struct nl_msg_batch_item *item, *from_item;
+
+       nl = &(bth->zns->nls);
+
+       msg.msg_name = (void *)&snl;
+       msg.msg_namelen = sizeof(snl);
+
+       from_item = nl_batch_list_first(&(bth->items));
+
+       status = netlink_recv_msg(nl, msg, nl_batch_rx_buf,
+                                 sizeof(nl_batch_rx_buf));
+       if (status == -1 || status == 0)
+               return status;
+
+       for (h = (struct nlmsghdr *)nl_batch_rx_buf;
+            (status >= 0 && NLMSG_OK(h, (unsigned int)status));
+            h = NLMSG_NEXT(h, status)) {
+
+               /*
+                * Find the corresponding batch item. Received responses are in
+                * the same order as requests we sent, so we can simply iterate
+                * over the batch item list and match responses with requests
+                * at same time.
+                */
+               frr_each_from(nl_batch_list, &(bth->items), item, from_item) {
+                       if (item->seq == (int)h->nlmsg_seq)
+                               break;
+               }
+
+               /*
+                * We received a message with the sequence number that isn't
+                * associated with any dplane context object.
+                */
+               if (item == NULL) {
+                       zlog_debug(
+                               "%s: skipping unassociated response, seq number %d NS %u",
+                               __func__, h->nlmsg_seq, bth->zns->ns_id);
+                       from_item = nl_batch_list_first(&(bth->items));
+                       continue;
+               }
+
+               if (h->nlmsg_type == NLMSG_ERROR) {
+                       int err = netlink_parse_error(nl, h, bth->zns, 0);
+
+                       if (err == -1)
+                               item->failure = true;
+
+                       zlog_debug("%s: netlink error message seq=%d ",
+                                  __func__, h->nlmsg_seq);
+                       continue;
+               }
+
+               /*
+                * If we get here then we did not receive neither the ack nor
+                * the error and instead received some other message in an
+                * unexpected way.
+                */
+               zlog_debug("%s: ignoring message type 0x%04x(%s) NS %u",
+                          __func__, h->nlmsg_type,
+                          nl_msg_type_to_str(h->nlmsg_type), bth->zns->ns_id);
+       }
+
+       return 0;
+}
+
+static void nl_batch_reset(struct nl_batch *bth)
+{
+       bth->buf = nl_batch_tx_buf;
+       bth->bufsiz = sizeof(nl_batch_tx_buf);
+       bth->limit = NL_BATCH_SEND_THRESHOLD;
+
+       bth->buf_head = bth->buf;
+       bth->curlen = 0;
+       bth->msgcnt = 0;
+       bth->zns = NULL;
+
+       nl_batch_list_init(&(bth->items));
+}
+
+static void nl_batch_send(struct nl_batch *bth)
+{
+       struct nl_msg_batch_item *item;
+       bool err = false;
+
+       if (bth->curlen == 0 || bth->zns == NULL)
+               return;
+
+       if (IS_ZEBRA_DEBUG_KERNEL)
+               zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu", __func__,
+                          bth->zns->nls.name, bth->curlen, bth->msgcnt);
+
+       if (netlink_send_msg(&(bth->zns->nls), bth->buf, bth->curlen) == -1)
+               err = true;
+
+       if (!err) {
+               if (nl_batch_read_resp(bth) == -1)
+                       err = true;
+       }
+
+       frr_each_safe(nl_batch_list, &(bth->items), item) {
+               enum zebra_dplane_result res = ZEBRA_DPLANE_REQUEST_SUCCESS;
+
+               /*
+                * If either sending or receiving a message batch has ended with
+                * the error, mark all dplane requests as failed.
+                */
+               if (item->failure || err)
+                       res = ZEBRA_DPLANE_REQUEST_FAILURE;
+
+               if (!item->ignore_res)
+                       dplane_ctx_set_status(item->ctx, res);
+
+               nl_batch_list_del(&(bth->items), item);
+               XFREE(MTYPE_NL_BATCH_ITEM, item);
+       }
+
+       nl_batch_reset(bth);
+}
+
+static void nl_batch_add_item(struct nl_batch *bth, int seq,
+                             struct zebra_dplane_ctx *ctx, bool ignore_res)
+{
+       struct nl_msg_batch_item *item =
+               XCALLOC(MTYPE_NL_BATCH_ITEM, sizeof(*item));
+
+       item->seq = seq;
+       item->ctx = ctx;
+       item->ignore_res = ignore_res;
+       item->failure = false;
+
+       nl_batch_list_add_tail(&(bth->items), item);
+}
+
+enum netlink_msg_status netlink_batch_add_msg(
+       struct nl_batch *bth, struct zebra_dplane_ctx *ctx,
+       ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t),
+       bool extra_msg)
+{
+       int seq;
+       ssize_t size;
+       struct nlmsghdr *msgh;
+
+       if (bth->zns != NULL
+           && bth->zns->ns_id != dplane_ctx_get_ns(ctx)->ns_id)
+               nl_batch_send(bth);
+
+       size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen);
+
+       /*
+        * If there was an error while encoding the message (other than buffer
+        * overflow) then return an error.
+        */
+       if (size < 0)
+               return FRR_NETLINK_ERROR;
+
+       /*
+        * If the message doesn't fit entirely in the buffer then send the batch
+        * and retry.
+        */
+       if (size == 0) {
+               nl_batch_send(bth);
+               size = (*msg_encoder)(ctx, bth->buf_head,
+                                     bth->bufsiz - bth->curlen);
+               /*
+                * If the message doesn't fit in the empty buffer then just
+                * return an error.
+                */
+               if (size <= 0)
+                       return FRR_NETLINK_ERROR;
+       }
+
+       seq = dplane_ctx_get_ns(ctx)->nls.seq;
+       if (extra_msg)
+               seq++;
+
+       msgh = (struct nlmsghdr *)bth->buf_head;
+       msgh->nlmsg_seq = seq;
+       msgh->nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
+
+       nl_batch_add_item(bth, seq, ctx, extra_msg);
+
+       bth->zns = dplane_ctx_get_ns(ctx);
+       bth->buf_head = ((char *)bth->buf_head) + size;
+       bth->curlen += size;
+       bth->msgcnt++;
+
+       if (bth->curlen > bth->limit)
+               nl_batch_send(bth);
+
+       return FRR_NETLINK_QUEUED;
+}
+
 void kernel_update_multi(struct dplane_ctx_q *ctx_list)
 {
        /* no-op */
index bd8159faf33439ba5eba6c1a7cdece42adcf0063..0df0cb2dde2346244d19ef93b543d2579270cccf 100644 (file)
@@ -105,6 +105,36 @@ int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
 
 extern int netlink_request(struct nlsock *nl, void *req);
 
+enum netlink_msg_status {
+       FRR_NETLINK_SUCCESS,
+       FRR_NETLINK_ERROR,
+       FRR_NETLINK_QUEUED,
+};
+
+struct nl_batch;
+
+/*
+ * netlink_batch_add_msg - add message to the netlink batch using dplane
+ * context object.
+ *
+ * @ctx:         Dataplane context
+ * @msg_encoder: A function that encodes dplane context object into
+ *               netlink message. Should take dplane context object,
+ *               pointer to a buffer and buffer's length as parameters
+ *               and should return -1 on error, 0 on buffer overflow or
+ *               size of the encoded message.
+ * @extra_msg:   In some cases there are two netlink messages for single
+ *               context object that need to be handled differently. This flag
+ *               distinguishes those.
+ *
+ * Return:             Status of the message.
+ */
+extern enum netlink_msg_status netlink_batch_add_msg(
+       struct nl_batch *bth, struct zebra_dplane_ctx *ctx,
+       ssize_t (*msg_encoder)(struct zebra_dplane_ctx *, void *, size_t),
+       bool extra_msg);
+
+
 #endif /* HAVE_NETLINK */
 
 #ifdef __cplusplus