diff options
| -rw-r--r-- | debian/frr.install | 1 | ||||
| -rw-r--r-- | lib/nexthop.h | 11 | ||||
| -rw-r--r-- | redhat/frr.spec.in | 1 | ||||
| -rw-r--r-- | zebra/dplane_fpm_nl.c | 1127 | ||||
| -rw-r--r-- | zebra/rt_netlink.c | 426 | ||||
| -rw-r--r-- | zebra/rt_netlink.h | 6 | ||||
| -rw-r--r-- | zebra/subdir.am | 10 | ||||
| -rw-r--r-- | zebra/zebra_dplane.c | 202 | ||||
| -rw-r--r-- | zebra/zebra_dplane.h | 24 | ||||
| -rw-r--r-- | zebra/zebra_vxlan_private.h | 1 |
10 files changed, 1543 insertions, 266 deletions
diff --git a/debian/frr.install b/debian/frr.install index 5917c0da84..e2485fe8b8 100644 --- a/debian/frr.install +++ b/debian/frr.install @@ -9,6 +9,7 @@ usr/lib/frr/*d usr/lib/frr/watchfrr usr/lib/frr/zebra usr/lib/*/frr/modules/zebra_cumulus_mlag.so +usr/lib/*/frr/modules/dplane_fpm_nl.so usr/lib/*/frr/modules/zebra_irdp.so usr/lib/*/frr/modules/zebra_fpm.so usr/lib/*/frr/modules/bgpd_bmp.so diff --git a/lib/nexthop.h b/lib/nexthop.h index c4e88dd844..9b71262589 100644 --- a/lib/nexthop.h +++ b/lib/nexthop.h @@ -25,6 +25,7 @@ #include "prefix.h" #include "mpls.h" +#include "vxlan.h" #ifdef __cplusplus extern "C" { @@ -60,6 +61,10 @@ enum blackhole_type { ? (type) \ : ((type) | 1) +enum nh_encap_type { + NET_VXLAN = 100, /* value copied from FPM_NH_ENCAP_VXLAN. */ +}; + /* Nexthop structure. */ struct nexthop { struct nexthop *next; @@ -123,6 +128,12 @@ struct nexthop { * only meaningful if the HAS_BACKUP flag is set. */ uint8_t backup_idx; + + /* Encapsulation information. */ + enum nh_encap_type nh_encap_type; + union { + vni_t vni; + } nh_encap; }; /* Backup index value is limited */ diff --git a/redhat/frr.spec.in b/redhat/frr.spec.in index db465f2b00..929214a142 100644 --- a/redhat/frr.spec.in +++ b/redhat/frr.spec.in @@ -675,6 +675,7 @@ fi %{_libdir}/frr/modules/bgpd_rpki.so %endif %{_libdir}/frr/modules/zebra_cumulus_mlag.so +%{_libdir}/frr/modules/dplane_fpm_nl.so %{_libdir}/frr/modules/zebra_irdp.so %{_libdir}/frr/modules/bgpd_bmp.so %{_bindir}/* diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c new file mode 100644 index 0000000000..a697a306bf --- /dev/null +++ b/zebra/dplane_fpm_nl.c @@ -0,0 +1,1127 @@ +/* + * Zebra dataplane plugin for Forwarding Plane Manager (FPM) using netlink. + * + * Copyright (C) 2019 Network Device Education Foundation, Inc. ("NetDEF") + * Rafael Zalamena + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <arpa/inet.h> + +#include <sys/types.h> +#include <sys/socket.h> + +#include <errno.h> +#include <string.h> + +#include "config.h" /* Include this explicitly */ +#include "lib/zebra.h" +#include "lib/json.h" +#include "lib/libfrr.h" +#include "lib/frratomic.h" +#include "lib/command.h" +#include "lib/memory.h" +#include "lib/network.h" +#include "lib/ns.h" +#include "lib/frr_pthread.h" +#include "zebra/debug.h" +#include "zebra/interface.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_router.h" +#include "zebra/zebra_vxlan_private.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/debug.h" + +#define SOUTHBOUND_DEFAULT_ADDR INADDR_LOOPBACK +#define SOUTHBOUND_DEFAULT_PORT 2620 + +/** + * FPM header: + * { + * version: 1 byte (always 1), + * type: 1 byte (1 for netlink, 2 protobuf), + * len: 2 bytes (network order), + * } + * + * This header is used with any format to tell the users how many bytes to + * expect. + */ +#define FPM_HEADER_SIZE 4 + +static const char *prov_name = "dplane_fpm_nl"; + +struct fpm_nl_ctx { + /* data plane connection. */ + int socket; + bool disabled; + bool connecting; + bool rib_complete; + bool rmac_complete; + struct sockaddr_storage addr; + + /* data plane buffers. */ + struct stream *ibuf; + struct stream *obuf; + pthread_mutex_t obuf_mutex; + + /* + * data plane context queue: + * When a FPM server connection becomes a bottleneck, we must keep the + * data plane contexts until we get a chance to process them. + */ + struct dplane_ctx_q ctxqueue; + pthread_mutex_t ctxqueue_mutex; + + /* data plane events. */ + struct zebra_dplane_provider *prov; + struct frr_pthread *fthread; + struct thread *t_connect; + struct thread *t_read; + struct thread *t_write; + struct thread *t_event; + struct thread *t_dequeue; + + /* zebra events. */ + struct thread *t_ribreset; + struct thread *t_ribwalk; + struct thread *t_rmacreset; + struct thread *t_rmacwalk; + + /* Statistic counters. */ + struct { + /* Amount of bytes read into ibuf. */ + _Atomic uint32_t bytes_read; + /* Amount of bytes written from obuf. */ + _Atomic uint32_t bytes_sent; + /* Output buffer current usage. */ + _Atomic uint32_t obuf_bytes; + /* Output buffer peak usage. */ + _Atomic uint32_t obuf_peak; + + /* Amount of connection closes. */ + _Atomic uint32_t connection_closes; + /* Amount of connection errors. */ + _Atomic uint32_t connection_errors; + + /* Amount of user configurations: FNE_RECONNECT. */ + _Atomic uint32_t user_configures; + /* Amount of user disable requests: FNE_DISABLE. */ + _Atomic uint32_t user_disables; + + /* Amount of data plane context processed. */ + _Atomic uint32_t dplane_contexts; + /* Amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len; + /* Peak amount of data plane contexts enqueued. */ + _Atomic uint32_t ctxqueue_len_peak; + + /* Amount of buffer full events. */ + _Atomic uint32_t buffer_full; + } counters; +} *gfnc; + +enum fpm_nl_events { + /* Ask for FPM to reconnect the external server. */ + FNE_RECONNECT, + /* Disable FPM. */ + FNE_DISABLE, + /* Reset counters. */ + FNE_RESET_COUNTERS, +}; + +/* + * Prototypes. + */ +static int fpm_process_event(struct thread *t); +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx); +static int fpm_rib_send(struct thread *t); +static int fpm_rib_reset(struct thread *t); +static int fpm_rmac_send(struct thread *t); +static int fpm_rmac_reset(struct thread *t); + +/* + * Helper functions. + */ + +/** + * Reorganizes the data on the buffer so it can fit more data. + * + * @param s stream pointer. + */ +static void stream_pulldown(struct stream *s) +{ + size_t rlen = STREAM_READABLE(s); + + /* No more data, so just move the pointers. */ + if (rlen == 0) { + stream_reset(s); + return; + } + + /* Move the available data to the beginning. */ + memmove(s->data, &s->data[s->getp], rlen); + s->getp = 0; + s->endp = rlen; +} + +/* + * CLI. + */ +#define FPM_STR "Forwarding Plane Manager configuration\n" + +DEFUN(fpm_set_address, fpm_set_address_cmd, + "fpm address <A.B.C.D|X:X::X:X> [port (1-65535)]", + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + uint16_t port = 0; + uint8_t naddr[INET6_BUFSIZ]; + + if (argc == 5) + port = strtol(argv[4]->arg, NULL, 10); + + /* Handle IPv4 addresses. */ + if (inet_pton(AF_INET, argv[2]->arg, naddr) == 1) { + sin = (struct sockaddr_in *)&gfnc->addr; + + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_port = + port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin->sin_len = sizeof(*sin); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin->sin_addr, naddr, sizeof(sin->sin_addr)); + + goto ask_reconnect; + } + + /* Handle IPv6 addresses. */ + if (inet_pton(AF_INET6, argv[2]->arg, naddr) != 1) { + vty_out(vty, "%% Invalid address: %s\n", argv[2]->arg); + return CMD_WARNING; + } + + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = port ? htons(port) : htons(SOUTHBOUND_DEFAULT_PORT); +#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN + sin6->sin6_len = sizeof(*sin6); +#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */ + memcpy(&sin6->sin6_addr, naddr, sizeof(sin6->sin6_addr)); + +ask_reconnect: + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RECONNECT, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(no_fpm_set_address, no_fpm_set_address_cmd, + "no fpm address [<A.B.C.D|X:X::X:X> [port <1-65535>]]", + NO_STR + FPM_STR + "FPM remote listening server address\n" + "Remote IPv4 FPM server\n" + "Remote IPv6 FPM server\n" + "FPM remote listening server port\n" + "Remote FPM server port\n") +{ + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_DISABLE, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_reset_counters, fpm_reset_counters_cmd, + "clear fpm counters", + CLEAR_STR + FPM_STR + "FPM statistic counters\n") +{ + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_RESET_COUNTERS, &gfnc->t_event); + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters, fpm_show_counters_cmd, + "show fpm counters", + SHOW_STR + FPM_STR + "FPM statistic counters\n") +{ + vty_out(vty, "%30s\n%30s\n", "FPM counters", "============"); + +#define SHOW_COUNTER(label, counter) \ + vty_out(vty, "%28s: %u\n", (label), (counter)) + + SHOW_COUNTER("Input bytes", gfnc->counters.bytes_read); + SHOW_COUNTER("Output bytes", gfnc->counters.bytes_sent); + SHOW_COUNTER("Output buffer current size", gfnc->counters.obuf_bytes); + SHOW_COUNTER("Output buffer peak size", gfnc->counters.obuf_peak); + SHOW_COUNTER("Connection closes", gfnc->counters.connection_closes); + SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors); + SHOW_COUNTER("Data plane items processed", + gfnc->counters.dplane_contexts); + SHOW_COUNTER("Data plane items enqueued", + gfnc->counters.ctxqueue_len); + SHOW_COUNTER("Data plane items queue peak", + gfnc->counters.ctxqueue_len_peak); + SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full); + SHOW_COUNTER("User FPM configurations", gfnc->counters.user_configures); + SHOW_COUNTER("User FPM disable requests", gfnc->counters.user_disables); + +#undef SHOW_COUNTER + + return CMD_SUCCESS; +} + +DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd, + "show fpm counters json", + SHOW_STR + FPM_STR + "FPM statistic counters\n" + JSON_STR) +{ + struct json_object *jo; + + jo = json_object_new_object(); + json_object_int_add(jo, "bytes-read", gfnc->counters.bytes_read); + json_object_int_add(jo, "bytes-sent", gfnc->counters.bytes_sent); + json_object_int_add(jo, "obuf-bytes", gfnc->counters.obuf_bytes); + json_object_int_add(jo, "obuf-bytes-peak", gfnc->counters.obuf_peak); + json_object_int_add(jo, "connection-closes", + gfnc->counters.connection_closes); + json_object_int_add(jo, "connection-errors", + gfnc->counters.connection_errors); + json_object_int_add(jo, "data-plane-contexts", + gfnc->counters.dplane_contexts); + json_object_int_add(jo, "data-plane-contexts-queue", + gfnc->counters.ctxqueue_len); + json_object_int_add(jo, "data-plane-contexts-queue-peak", + gfnc->counters.ctxqueue_len_peak); + json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full); + json_object_int_add(jo, "user-configures", + gfnc->counters.user_configures); + json_object_int_add(jo, "user-disables", gfnc->counters.user_disables); + vty_out(vty, "%s\n", json_object_to_json_string_ext(jo, 0)); + json_object_free(jo); + + return CMD_SUCCESS; +} + +static int fpm_write_config(struct vty *vty) +{ + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + int written = 0; + char addrstr[INET6_ADDRSTRLEN]; + + if (gfnc->disabled) + return written; + + switch (gfnc->addr.ss_family) { + case AF_INET: + written = 1; + sin = (struct sockaddr_in *)&gfnc->addr; + inet_ntop(AF_INET, &sin->sin_addr, addrstr, sizeof(addrstr)); + vty_out(vty, "fpm address %s", addrstr); + if (sin->sin_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin->sin_port)); + + vty_out(vty, "\n"); + break; + case AF_INET6: + written = 1; + sin6 = (struct sockaddr_in6 *)&gfnc->addr; + inet_ntop(AF_INET, &sin6->sin6_addr, addrstr, sizeof(addrstr)); + vty_out(vty, "fpm address %s", addrstr); + if (sin6->sin6_port != htons(SOUTHBOUND_DEFAULT_PORT)) + vty_out(vty, " port %d", ntohs(sin6->sin6_port)); + + vty_out(vty, "\n"); + break; + + default: + break; + } + + return written; +} + +struct cmd_node fpm_node = { + .node = VTY_NODE, + .prompt = "", + .vtysh = 1, +}; + +/* + * FPM functions. + */ +static int fpm_connect(struct thread *t); + +static void fpm_reconnect(struct fpm_nl_ctx *fnc) +{ + /* Grab the lock to empty the stream and stop the zebra thread. */ + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + /* Avoid calling close on `-1`. */ + if (fnc->socket != -1) { + close(fnc->socket); + fnc->socket = -1; + } + + stream_reset(fnc->ibuf); + stream_reset(fnc->obuf); + THREAD_OFF(fnc->t_read); + THREAD_OFF(fnc->t_write); + + if (fnc->t_ribreset) + thread_cancel_async(zrouter.master, &fnc->t_ribreset, NULL); + if (fnc->t_ribwalk) + thread_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL); + if (fnc->t_rmacreset) + thread_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL); + if (fnc->t_rmacwalk) + thread_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL); + + /* FPM is disabled, don't attempt to connect. */ + if (fnc->disabled) + return; + + thread_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); +} + +static int fpm_read(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + ssize_t rv; + + /* Let's ignore the input at the moment. */ + rv = stream_read_try(fnc->ibuf, fnc->socket, + STREAM_WRITEABLE(fnc->ibuf)); + if (rv == 0) { + atomic_fetch_add_explicit(&fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + + fpm_reconnect(fnc); + return 0; + } + if (rv == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK + || errno == EINTR) + return 0; + + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + fpm_reconnect(fnc); + return 0; + } + stream_reset(fnc->ibuf); + + /* Account all bytes read. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_read, rv, + memory_order_relaxed); + + thread_add_read(fnc->fthread->master, fpm_read, fnc, fnc->socket, + &fnc->t_read); + + return 0; +} + +static int fpm_write(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + socklen_t statuslen; + ssize_t bwritten; + int rv, status; + size_t btotal; + + if (fnc->connecting == true) { + status = 0; + statuslen = sizeof(status); + + rv = getsockopt(fnc->socket, SOL_SOCKET, SO_ERROR, &status, + &statuslen); + if (rv == -1 || status != 0) { + if (rv != -1) + zlog_warn("%s: connection failed: %s", __func__, + strerror(status)); + else + zlog_warn("%s: SO_ERROR failed: %s", __func__, + strerror(status)); + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + + fpm_reconnect(fnc); + return 0; + } + + fnc->connecting = false; + + /* Ask zebra main thread to start walking the RIB table. */ + thread_add_timer(zrouter.master, fpm_rib_send, fnc, 0, + &fnc->t_ribwalk); + thread_add_timer(zrouter.master, fpm_rmac_send, fnc, 0, + &fnc->t_rmacwalk); + } + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + while (true) { + /* Stream is empty: reset pointers and return. */ + if (STREAM_READABLE(fnc->obuf) == 0) { + stream_reset(fnc->obuf); + break; + } + + /* Try to write all at once. */ + btotal = stream_get_endp(fnc->obuf) - + stream_get_getp(fnc->obuf); + bwritten = write(fnc->socket, stream_pnt(fnc->obuf), btotal); + if (bwritten == 0) { + atomic_fetch_add_explicit( + &fnc->counters.connection_closes, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: connection closed", __func__); + break; + } + if (bwritten == -1) { + /* Attempt to continue if blocked by a signal. */ + if (errno == EINTR) + continue; + /* Receiver is probably slow, lets give it some time. */ + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + + atomic_fetch_add_explicit( + &fnc->counters.connection_errors, 1, + memory_order_relaxed); + zlog_warn("%s: connection failure: %s", __func__, + strerror(errno)); + fpm_reconnect(fnc); + break; + } + + /* Account all bytes sent. */ + atomic_fetch_add_explicit(&fnc->counters.bytes_sent, bwritten, + memory_order_relaxed); + + /* Account number of bytes free. */ + atomic_fetch_sub_explicit(&fnc->counters.obuf_bytes, bwritten, + memory_order_relaxed); + + stream_forward_getp(fnc->obuf, (size_t)bwritten); + } + + /* Stream is not empty yet, we must schedule more writes. */ + if (STREAM_READABLE(fnc->obuf)) { + stream_pulldown(fnc->obuf); + thread_add_write(fnc->fthread->master, fpm_write, fnc, + fnc->socket, &fnc->t_write); + return 0; + } + + return 0; +} + +static int fpm_connect(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct sockaddr_in *sin = (struct sockaddr_in *)&fnc->addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&fnc->addr; + socklen_t slen; + int rv, sock; + char addrstr[INET6_ADDRSTRLEN]; + + sock = socket(fnc->addr.ss_family, SOCK_STREAM, 0); + if (sock == -1) { + zlog_err("%s: fpm socket failed: %s", __func__, + strerror(errno)); + thread_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return 0; + } + + set_nonblocking(sock); + + if (fnc->addr.ss_family == AF_INET) { + inet_ntop(AF_INET, &sin->sin_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin); + } else { + inet_ntop(AF_INET6, &sin6->sin6_addr, addrstr, sizeof(addrstr)); + slen = sizeof(*sin6); + } + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: attempting to connect to %s:%d", __func__, + addrstr, ntohs(sin->sin_port)); + + rv = connect(sock, (struct sockaddr *)&fnc->addr, slen); + if (rv == -1 && errno != EINPROGRESS) { + atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, + memory_order_relaxed); + close(sock); + zlog_warn("%s: fpm connection failed: %s", __func__, + strerror(errno)); + thread_add_timer(fnc->fthread->master, fpm_connect, fnc, 3, + &fnc->t_connect); + return 0; + } + + fnc->connecting = (errno == EINPROGRESS); + fnc->socket = sock; + thread_add_read(fnc->fthread->master, fpm_read, fnc, sock, + &fnc->t_read); + thread_add_write(fnc->fthread->master, fpm_write, fnc, sock, + &fnc->t_write); + + /* Mark all routes as unsent. */ + thread_add_timer(zrouter.master, fpm_rib_reset, fnc, 0, + &fnc->t_ribreset); + thread_add_timer(zrouter.master, fpm_rmac_reset, fnc, 0, + &fnc->t_rmacreset); + + return 0; +} + +/** + * Encode data plane operation context into netlink and enqueue it in the FPM + * output buffer. + * + * @param fnc the netlink FPM context. + * @param ctx the data plane operation context data. + * @return 0 on success or -1 on not enough space. + */ +static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) +{ + uint8_t nl_buf[NL_PKT_BUF_SIZE]; + size_t nl_buf_len; + ssize_t rv; + uint64_t obytes, obytes_peak; + + nl_buf_len = 0; + + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + rv = netlink_route_multipath(RTM_DELROUTE, ctx, nl_buf, + sizeof(nl_buf), true); + if (rv <= 0) { + zlog_err("%s: netlink_route_multipath failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + + /* UPDATE operations need a INSTALL, otherwise just quit. */ + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) + break; + + /* FALL THROUGH */ + case DPLANE_OP_ROUTE_INSTALL: + rv = netlink_route_multipath(RTM_NEWROUTE, ctx, + &nl_buf[nl_buf_len], + sizeof(nl_buf) - nl_buf_len, true); + if (rv <= 0) { + zlog_err("%s: netlink_route_multipath failed", + __func__); + return 0; + } + + nl_buf_len += (size_t)rv; + break; + + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + rv = netlink_macfdb_update_ctx(ctx, nl_buf, sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_macfdb_update_ctx failed", + __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + case DPLANE_OP_NH_DELETE: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_NONE: + break; + + default: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: unhandled data plane message (%d) %s", + __func__, dplane_ctx_get_op(ctx), + dplane_op2str(dplane_ctx_get_op(ctx))); + break; + } + + /* Skip empty enqueues. */ + if (nl_buf_len == 0) + return 0; + + /* We must know if someday a message goes beyond 65KiB. */ + assert((nl_buf_len + FPM_HEADER_SIZE) <= UINT16_MAX); + + /* Check if we have enough buffer space. */ + if (STREAM_WRITEABLE(fnc->obuf) < (nl_buf_len + FPM_HEADER_SIZE)) { + atomic_fetch_add_explicit(&fnc->counters.buffer_full, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug( + "%s: buffer full: wants to write %zu but has %zu", + __func__, nl_buf_len + FPM_HEADER_SIZE, + STREAM_WRITEABLE(fnc->obuf)); + + return -1; + } + + /* + * Fill in the FPM header information. + * + * See FPM_HEADER_SIZE definition for more information. + */ + stream_putc(fnc->obuf, 1); + stream_putc(fnc->obuf, 1); + stream_putw(fnc->obuf, nl_buf_len + FPM_HEADER_SIZE); + + /* Write current data. */ + stream_write(fnc->obuf, nl_buf, (size_t)nl_buf_len); + + /* Account number of bytes waiting to be written. */ + atomic_fetch_add_explicit(&fnc->counters.obuf_bytes, + nl_buf_len + FPM_HEADER_SIZE, + memory_order_relaxed); + obytes = atomic_load_explicit(&fnc->counters.obuf_bytes, + memory_order_relaxed); + obytes_peak = atomic_load_explicit(&fnc->counters.obuf_peak, + memory_order_relaxed); + if (obytes_peak < obytes) + atomic_store_explicit(&fnc->counters.obuf_peak, obytes, + memory_order_relaxed); + + /* Tell the thread to start writing. */ + thread_add_write(fnc->fthread->master, fpm_write, fnc, fnc->socket, + &fnc->t_write); + + return 0; +} + +/** + * Send all RIB installed routes to the connected data plane. + */ +static int fpm_rib_send(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + struct zebra_dplane_ctx *ctx; + rib_tables_iter_t rt_iter; + + /* Allocate temporary context for all transactions. */ + ctx = dplane_ctx_alloc(); + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL || dest->selected_fib == NULL) + continue; + + /* Check for already sent routes. */ + if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) + continue; + + /* Enqueue route install. */ + dplane_ctx_reset(ctx); + dplane_ctx_route_init(ctx, DPLANE_OP_ROUTE_INSTALL, rn, + dest->selected_fib); + if (fpm_nl_enqueue(fnc, ctx) == -1) { + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + thread_add_timer(zrouter.master, fpm_rib_send, + fnc, 1, &fnc->t_ribwalk); + return 0; + } + + /* Mark as sent. */ + SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + /* Free the temporary allocated context. */ + dplane_ctx_fini(&ctx); + + /* All RIB routes sent! */ + fnc->rib_complete = true; + + return 0; +} + +/* + * The next three functions will handle RMAC enqueue. + */ +struct fpm_rmac_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + zebra_l3vni_t *zl3vni; +}; + +static void fpm_enqueue_rmac_table(struct hash_bucket *backet, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + zebra_mac_t *zrmac = backet->data; + struct zebra_if *zif = fra->zl3vni->vxlan_if->info; + const struct zebra_l2info_vxlan *vxl = &zif->l2info.vxl; + struct zebra_if *br_zif; + vlanid_t vid; + bool sticky; + + /* Entry already sent. */ + if (CHECK_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT)) + return; + + sticky = !!CHECK_FLAG(zrmac->flags, + (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)); + br_zif = (struct zebra_if *)(zif->brslave_info.br_if->info); + vid = IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif) ? vxl->access_vlan : 0; + + dplane_ctx_reset(fra->ctx); + dplane_ctx_set_op(fra->ctx, DPLANE_OP_MAC_INSTALL); + dplane_mac_init(fra->ctx, fra->zl3vni->vxlan_if, + zif->brslave_info.br_if, vid, + &zrmac->macaddr, zrmac->fwd_info.r_vtep_ip, sticky); + if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) { + thread_add_timer(zrouter.master, fpm_rmac_send, + fra->fnc, 1, &fra->fnc->t_rmacwalk); + } +} + +static void fpm_enqueue_l3vni_table(struct hash_bucket *backet, void *arg) +{ + struct fpm_rmac_arg *fra = arg; + zebra_l3vni_t *zl3vni = backet->data; + + fra->zl3vni = zl3vni; + hash_iterate(zl3vni->rmac_table, fpm_enqueue_rmac_table, zl3vni); +} + +static int fpm_rmac_send(struct thread *t) +{ + struct fpm_rmac_arg fra; + + fra.fnc = THREAD_ARG(t); + fra.ctx = dplane_ctx_alloc(); + hash_iterate(zrouter.l3vni_table, fpm_enqueue_l3vni_table, &fra); + dplane_ctx_fini(&fra.ctx); + + return 0; +} + +/** + * Resets the RIB FPM flags so we send all routes again. + */ +static int fpm_rib_reset(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + rib_dest_t *dest; + struct route_node *rn; + struct route_table *rt; + rib_tables_iter_t rt_iter; + + fnc->rib_complete = false; + + rt_iter.state = RIB_TABLES_ITER_S_INIT; + while ((rt = rib_tables_iter_next(&rt_iter))) { + for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { + dest = rib_dest_from_rnode(rn); + /* Skip bad route entries. */ + if (dest == NULL) + continue; + + UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM); + } + } + + return 0; +} + +/* + * The next three function will handle RMAC table reset. + */ +static void fpm_unset_rmac_table(struct hash_bucket *backet, void *arg) +{ + zebra_mac_t *zrmac = backet->data; + + UNSET_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT); +} + +static void fpm_unset_l3vni_table(struct hash_bucket *backet, void *arg) +{ + zebra_l3vni_t *zl3vni = backet->data; + + hash_iterate(zl3vni->rmac_table, fpm_unset_rmac_table, zl3vni); +} + +static int fpm_rmac_reset(struct thread *t) +{ + hash_iterate(zrouter.l3vni_table, fpm_unset_l3vni_table, NULL); + + return 0; +} + +static int fpm_process_queue(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct zebra_dplane_ctx *ctx; + + frr_mutex_lock_autounlock(&fnc->ctxqueue_mutex); + + while (true) { + /* No space available yet. */ + if (STREAM_WRITEABLE(fnc->obuf) < NL_PKT_BUF_SIZE) + break; + + /* Dequeue next item or quit processing. */ + ctx = dplane_ctx_dequeue(&fnc->ctxqueue); + if (ctx == NULL) + break; + + fpm_nl_enqueue(fnc, ctx); + + /* Account the processed entries. */ + atomic_fetch_add_explicit(&fnc->counters.dplane_contexts, 1, + memory_order_relaxed); + atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1, + memory_order_relaxed); + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(fnc->prov, ctx); + } + + /* Check for more items in the queue. */ + if (atomic_load_explicit(&fnc->counters.ctxqueue_len, + memory_order_relaxed) + > 0) + thread_add_timer(fnc->fthread->master, fpm_process_queue, + fnc, 0, &fnc->t_dequeue); + + return 0; +} + +/** + * Handles external (e.g. CLI, data plane or others) events. + */ +static int fpm_process_event(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + int event = THREAD_VAL(t); + + switch (event) { + case FNE_DISABLE: + zlog_info("%s: manual FPM disable event", __func__); + fnc->disabled = true; + atomic_fetch_add_explicit(&fnc->counters.user_disables, 1, + memory_order_relaxed); + + /* Call reconnect to disable timers and clean up context. */ + fpm_reconnect(fnc); + break; + + case FNE_RECONNECT: + zlog_info("%s: manual FPM reconnect event", __func__); + fnc->disabled = false; + atomic_fetch_add_explicit(&fnc->counters.user_configures, 1, + memory_order_relaxed); + fpm_reconnect(fnc); + break; + + case FNE_RESET_COUNTERS: + zlog_info("%s: manual FPM counters reset event", __func__); + memset(&fnc->counters, 0, sizeof(fnc->counters)); + break; + + default: + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: unhandled event %d", __func__, event); + break; + } + + return 0; +} + +/* + * Data plane functions. + */ +static int fpm_nl_start(struct zebra_dplane_provider *prov) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + fnc->fthread = frr_pthread_new(NULL, prov_name, prov_name); + assert(frr_pthread_run(fnc->fthread, NULL) == 0); + fnc->ibuf = stream_new(NL_PKT_BUF_SIZE); + fnc->obuf = stream_new(NL_PKT_BUF_SIZE * 128); + pthread_mutex_init(&fnc->obuf_mutex, NULL); + fnc->socket = -1; + fnc->disabled = true; + fnc->prov = prov; + TAILQ_INIT(&fnc->ctxqueue); + pthread_mutex_init(&fnc->ctxqueue_mutex, NULL); + + return 0; +} + +static int fpm_nl_finish(struct zebra_dplane_provider *prov, bool early) +{ + struct fpm_nl_ctx *fnc; + + fnc = dplane_provider_get_data(prov); + stream_free(fnc->ibuf); + stream_free(fnc->obuf); + close(fnc->socket); + + return 0; +} + +static int fpm_nl_process(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + int counter, limit; + uint64_t cur_queue, peak_queue; + + fnc = dplane_provider_get_data(prov); + limit = dplane_provider_get_work_limit(prov); + for (counter = 0; counter < limit; counter++) { + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; + + /* + * Skip all notifications if not connected, we'll walk the RIB + * anyway. + */ + if (fnc->socket != -1 && fnc->connecting == false) { + frr_mutex_lock_autounlock(&fnc->ctxqueue_mutex); + dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx); + + /* Account the number of contexts. */ + atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len, + 1, memory_order_relaxed); + cur_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len, + memory_order_relaxed); + peak_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len_peak, + memory_order_relaxed); + if (peak_queue < cur_queue) + atomic_store_explicit( + &fnc->counters.ctxqueue_len_peak, + peak_queue, memory_order_relaxed); + continue; + } + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + if (atomic_load_explicit(&fnc->counters.ctxqueue_len, + memory_order_relaxed) + > 0) + thread_add_timer(fnc->fthread->master, fpm_process_queue, + fnc, 0, &fnc->t_dequeue); + + return 0; +} + +static int fpm_nl_new(struct thread_master *tm) +{ + struct zebra_dplane_provider *prov = NULL; + int rv; + + gfnc = calloc(1, sizeof(*gfnc)); + rv = dplane_provider_register(prov_name, DPLANE_PRIO_POSTPROCESS, + DPLANE_PROV_FLAG_THREADED, fpm_nl_start, + fpm_nl_process, fpm_nl_finish, gfnc, + &prov); + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("%s register status: %d", prov_name, rv); + + install_node(&fpm_node, fpm_write_config); + install_element(ENABLE_NODE, &fpm_show_counters_cmd); + install_element(ENABLE_NODE, &fpm_show_counters_json_cmd); + install_element(ENABLE_NODE, &fpm_reset_counters_cmd); + install_element(CONFIG_NODE, &fpm_set_address_cmd); + install_element(CONFIG_NODE, &no_fpm_set_address_cmd); + + return 0; +} + +static int fpm_nl_init(void) +{ + hook_register(frr_late_init, fpm_nl_new); + return 0; +} + +FRR_MODULE_SETUP( + .name = "dplane_fpm_nl", + .version = "0.0.1", + .description = "Data plane plugin for FPM using netlink.", + .init = fpm_nl_init, + ) diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index b6224b3da9..c4af082e72 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -1123,6 +1123,7 @@ static void _netlink_route_build_singlepath(const struct prefix *p, char label_buf[256]; int num_labels = 0; struct vrf *vrf; + char addrstr[INET6_ADDRSTRLEN]; assert(nexthop); @@ -1179,11 +1180,10 @@ static void _netlink_route_build_singlepath(const struct prefix *p, &nexthop->src.ipv4, bytelen); if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - " 5549: _netlink_route_build_singlepath() (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", - routedesc, p, ipv4_ll_buf, label_buf, - nexthop->ifindex, VRF_LOGNAME(vrf), - nexthop->vrf_id); + zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, ipv4_ll_buf, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); return; } @@ -1204,12 +1204,14 @@ static void _netlink_route_build_singlepath(const struct prefix *p, &nexthop->src.ipv4, bytelen); } - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", - routedesc, p, inet_ntoa(nexthop->gate.ipv4), - label_buf, nexthop->ifindex, VRF_LOGNAME(vrf), - nexthop->vrf_id); + if (IS_ZEBRA_DEBUG_KERNEL) { + inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr, + sizeof(addrstr)); + zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, addrstr, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + } } if (nexthop->type == NEXTHOP_TYPE_IPV6 @@ -1227,12 +1229,14 @@ static void _netlink_route_build_singlepath(const struct prefix *p, &nexthop->src.ipv6, bytelen); } - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", - routedesc, p, inet6_ntoa(nexthop->gate.ipv6), - label_buf, nexthop->ifindex, VRF_LOGNAME(vrf), - nexthop->vrf_id); + if (IS_ZEBRA_DEBUG_KERNEL) { + inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr, + sizeof(addrstr)); + zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, addrstr, label_buf, + nexthop->ifindex, VRF_LOGNAME(vrf), + nexthop->vrf_id); + } } /* @@ -1254,10 +1258,9 @@ static void _netlink_route_build_singlepath(const struct prefix *p, } if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - "netlink_route_multipath() (%s): %pFX nexthop via if %u vrf %s(%u)", - routedesc, p, nexthop->ifindex, - VRF_LOGNAME(vrf), nexthop->vrf_id); + zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)", + __func__, routedesc, p, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); } } @@ -1356,8 +1359,8 @@ _netlink_route_build_multipath(const struct prefix *p, const char *routedesc, if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug( - " 5549: netlink_route_build_multipath() (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", - routedesc, p, ipv4_ll_buf, label_buf, + "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", + __func__, routedesc, p, ipv4_ll_buf, label_buf, nexthop->ifindex, VRF_LOGNAME(vrf), nexthop->vrf_id); return; @@ -1374,11 +1377,10 @@ _netlink_route_build_multipath(const struct prefix *p, const char *routedesc, *src = &nexthop->src; if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", - routedesc, p, inet_ntoa(nexthop->gate.ipv4), - label_buf, nexthop->ifindex, VRF_LOGNAME(vrf), - nexthop->vrf_id); + zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)", + __func__, routedesc, p, &nexthop->gate.ipv4, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); } if (nexthop->type == NEXTHOP_TYPE_IPV6 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) { @@ -1392,11 +1394,10 @@ _netlink_route_build_multipath(const struct prefix *p, const char *routedesc, *src = &nexthop->src; if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - "netlink_route_multipath() (%s): %pFX nexthop via %s %s if %u vrf %s(%u)", - routedesc, p, inet6_ntoa(nexthop->gate.ipv6), - label_buf, nexthop->ifindex, VRF_LOGNAME(vrf), - nexthop->vrf_id); + zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)", + __func__, routedesc, p, &nexthop->gate.ipv6, + label_buf, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); } /* @@ -1415,10 +1416,9 @@ _netlink_route_build_multipath(const struct prefix *p, const char *routedesc, *src = &nexthop->src; if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - "netlink_route_multipath() (%s): %pFX nexthop via if %u vrf %s(%u)", - routedesc, p, nexthop->ifindex, - VRF_LOGNAME(vrf), nexthop->vrf_id); + zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)", + __func__, routedesc, p, nexthop->ifindex, + VRF_LOGNAME(vrf), nexthop->vrf_id); } if (nexthop->weight) @@ -1457,37 +1457,6 @@ _netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc, rta, rtnh, rtmsg, src); } - -/* Log debug information for netlink_route_multipath - * if debug logging is enabled. - * - * @param cmd: Netlink command which is to be processed - * @param p: Prefix for which the change is due - * @param family: Address family which the change concerns - * @param zvrf: The vrf we are in - * @param tableid: The table we are working on - */ -static void _netlink_route_debug(int cmd, const struct prefix *p, - int family, vrf_id_t vrfid, - uint32_t tableid) -{ - if (IS_ZEBRA_DEBUG_KERNEL) { - char buf[PREFIX_STRLEN]; - zlog_debug( - "netlink_route_multipath(): %s %s vrf %s(%u) table_id: %u", - nl_msg_type_to_str(cmd), - prefix2str(p, buf, sizeof(buf)), vrf_id_to_name(vrfid), - vrfid, tableid); - } -} - -static void _netlink_nexthop_debug(int cmd, uint32_t id) -{ - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug("netlink_nexthop(): %s, id=%u", - nl_msg_type_to_str(cmd), id); -} - static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc) { if (IS_ZEBRA_DEBUG_KERNEL) @@ -1552,15 +1521,32 @@ static bool nexthop_set_src(const struct nexthop *nexthop, int family, return false; } +static void netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen, + struct nexthop *nh) +{ + struct rtattr *nest; + + switch (nh->nh_encap_type) { + case NET_VXLAN: + addattr_l(n, nlen, RTA_ENCAP_TYPE, &nh->nh_encap_type, + sizeof(uint16_t)); + + nest = addattr_nest(n, nlen, RTA_ENCAP); + addattr32(n, nlen, 0 /* VXLAN_VNI */, nh->nh_encap.vni); + addattr_nest_end(n, nest); + break; + } +} + /* * Routing table change via netlink interface, using a dataplane context object */ -static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) +ssize_t netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx, + uint8_t *data, size_t datalen, bool fpm) { int bytelen; struct nexthop *nexthop = NULL; unsigned int nexthop_num; - int family; const char *routedesc; bool setsrc = false; union g_addr src; @@ -1570,38 +1556,36 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) struct { struct nlmsghdr n; struct rtmsg r; - char buf[NL_PKT_BUF_SIZE]; - } req; + char buf[]; + } *req = (void *)data; p = dplane_ctx_get_dest(ctx); src_p = dplane_ctx_get_src(ctx); - family = PREFIX_FAMILY(p); + memset(req, 0, sizeof(*req)); - memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE); - - bytelen = (family == AF_INET ? 4 : 16); + bytelen = (p->family == AF_INET ? 4 : 16); - req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; if ((cmd == RTM_NEWROUTE) && ((p->family == AF_INET) || v6_rr_semantics)) - req.n.nlmsg_flags |= NLM_F_REPLACE; + req->n.nlmsg_flags |= NLM_F_REPLACE; - req.n.nlmsg_type = cmd; + req->n.nlmsg_type = cmd; - req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; + req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; - req.r.rtm_family = family; - req.r.rtm_dst_len = p->prefixlen; - req.r.rtm_src_len = src_p ? src_p->prefixlen : 0; - req.r.rtm_scope = RT_SCOPE_UNIVERSE; + req->r.rtm_family = p->family; + req->r.rtm_dst_len = p->prefixlen; + req->r.rtm_src_len = src_p ? src_p->prefixlen : 0; + req->r.rtm_scope = RT_SCOPE_UNIVERSE; if (cmd == RTM_DELROUTE) - req.r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx)); + req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx)); else - req.r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx)); + req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx)); /* * blackhole routes are not RTN_UNICAST, they are @@ -1612,12 +1596,11 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) * the RTM_DELROUTE case */ if (cmd != RTM_DELROUTE) - req.r.rtm_type = RTN_UNICAST; + req->r.rtm_type = RTN_UNICAST; - addattr_l(&req.n, sizeof(req), RTA_DST, &p->u.prefix, bytelen); + addattr_l(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen); if (src_p) - addattr_l(&req.n, sizeof(req), RTA_SRC, &src_p->u.prefix, - bytelen); + addattr_l(&req->n, datalen, RTA_SRC, &src_p->u.prefix, bytelen); /* Metric. */ /* Hardcode the metric for all routes coming from zebra. Metric isn't @@ -1626,7 +1609,7 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) * path(s) * by the routing protocol and for communicating with protocol peers. */ - addattr32(&req.n, sizeof(req), RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC); + addattr32(&req->n, datalen, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC); #if defined(SUPPORT_REALMS) { @@ -1638,19 +1621,23 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) tag = dplane_ctx_get_tag(ctx); if (tag > 0 && tag <= 255) - addattr32(&req.n, sizeof(req), RTA_FLOW, tag); + addattr32(&req->n, datalen, RTA_FLOW, tag); } #endif /* Table corresponding to this route. */ table_id = dplane_ctx_get_table(ctx); if (table_id < 256) - req.r.rtm_table = table_id; + req->r.rtm_table = table_id; else { - req.r.rtm_table = RT_TABLE_UNSPEC; - addattr32(&req.n, sizeof(req), RTA_TABLE, table_id); + req->r.rtm_table = RT_TABLE_UNSPEC; + addattr32(&req->n, datalen, RTA_TABLE, table_id); } - _netlink_route_debug(cmd, p, family, dplane_ctx_get_vrf(ctx), table_id); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "%s: %s %pFX vrf %u(%u)", __func__, + nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx), + table_id); /* * If we are not updating the route and we have received @@ -1659,7 +1646,7 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) * it. */ if (cmd == RTM_DELROUTE) - goto skip; + return req->n.nlmsg_len; if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) { char buf[NL_PKT_BUF_SIZE]; @@ -1673,7 +1660,7 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) rta->rta_len = RTA_LENGTH(0); rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof(mtu)); - addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta), + addattr_l(&req->n, datalen, RTA_METRICS, RTA_DATA(rta), RTA_PAYLOAD(rta)); } @@ -1683,7 +1670,8 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) zlog_debug( "netlink_route_multipath(): %pFX nhg_id is %u", p, dplane_ctx_get_nhe_id(ctx)); - addattr32(&req.n, sizeof(req), RTA_NH_ID, + + addattr32(&req->n, datalen, RTA_NH_ID, dplane_ctx_get_nhe_id(ctx)); /* Have to determine src still */ @@ -1691,18 +1679,19 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) if (setsrc) break; - setsrc = nexthop_set_src(nexthop, family, &src); + setsrc = nexthop_set_src(nexthop, p->family, &src); } if (setsrc) { - if (family == AF_INET) - addattr_l(&req.n, sizeof(req), RTA_PREFSRC, + if (p->family == AF_INET) + addattr_l(&req->n, datalen, RTA_PREFSRC, &src.ipv4, bytelen); - else if (family == AF_INET6) - addattr_l(&req.n, sizeof(req), RTA_PREFSRC, + else if (p->family == AF_INET6) + addattr_l(&req->n, datalen, RTA_PREFSRC, &src.ipv6, bytelen); } - goto skip; + + return req->n.nlmsg_len; } /* Count overall nexthops so we can decide whether to use singlepath @@ -1712,7 +1701,7 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; - if (cmd == RTM_NEWROUTE && !NEXTHOP_IS_ACTIVE(nexthop->flags)) + if (!NEXTHOP_IS_ACTIVE(nexthop->flags)) continue; nexthop_num++; @@ -1732,16 +1721,16 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { switch (nexthop->bh_type) { case BLACKHOLE_ADMINPROHIB: - req.r.rtm_type = RTN_PROHIBIT; + req->r.rtm_type = RTN_PROHIBIT; break; case BLACKHOLE_REJECT: - req.r.rtm_type = RTN_UNREACHABLE; + req->r.rtm_type = RTN_UNREACHABLE; break; default: - req.r.rtm_type = RTN_BLACKHOLE; + req->r.rtm_type = RTN_BLACKHOLE; break; } - goto skip; + return req->n.nlmsg_len; } if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) { @@ -1749,30 +1738,38 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) if (setsrc) continue; - setsrc = nexthop_set_src(nexthop, family, &src); - + setsrc = nexthop_set_src(nexthop, p->family, + &src); continue; } - if ((cmd == RTM_NEWROUTE - && NEXTHOP_IS_ACTIVE(nexthop->flags))) { + if (NEXTHOP_IS_ACTIVE(nexthop->flags)) { routedesc = nexthop->rparent ? "recursive, single-path" : "single-path"; _netlink_route_build_singlepath( - p, routedesc, bytelen, nexthop, &req.n, - &req.r, sizeof(req), cmd); + p, routedesc, bytelen, nexthop, &req->n, + &req->r, datalen, cmd); nexthop_num++; break; } + + /* + * Add encapsulation information when installing via + * FPM. + */ + if (fpm) + netlink_route_nexthop_encap(&req->n, datalen, + nexthop); } + if (setsrc) { - if (family == AF_INET) - addattr_l(&req.n, sizeof(req), RTA_PREFSRC, + if (p->family == AF_INET) + addattr_l(&req->n, datalen, RTA_PREFSRC, &src.ipv4, bytelen); - else if (family == AF_INET6) - addattr_l(&req.n, sizeof(req), RTA_PREFSRC, + else if (p->family == AF_INET6) + addattr_l(&req->n, datalen, RTA_PREFSRC, &src.ipv6, bytelen); } } else { /* Multipath case */ @@ -1793,13 +1790,12 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) if (setsrc) continue; - setsrc = nexthop_set_src(nexthop, family, &src); - + setsrc = nexthop_set_src(nexthop, p->family, + &src); continue; } - if ((cmd == RTM_NEWROUTE - && NEXTHOP_IS_ACTIVE(nexthop->flags))) { + if (NEXTHOP_IS_ACTIVE(nexthop->flags)) { routedesc = nexthop->rparent ? "recursive, multipath" : "multipath"; @@ -1807,47 +1803,51 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) _netlink_route_build_multipath( p, routedesc, bytelen, nexthop, rta, - rtnh, &req.r, &src1); + rtnh, &req->r, &src1); rtnh = RTNH_NEXT(rtnh); if (!setsrc && src1) { - if (family == AF_INET) + if (p->family == AF_INET) src.ipv4 = src1->ipv4; - else if (family == AF_INET6) + else if (p->family == AF_INET6) src.ipv6 = src1->ipv6; setsrc = 1; } } + + /* + * Add encapsulation information when installing via + * FPM. + */ + if (fpm) + netlink_route_nexthop_encap(&req->n, datalen, + nexthop); } + if (setsrc) { - if (family == AF_INET) - addattr_l(&req.n, sizeof(req), RTA_PREFSRC, + if (p->family == AF_INET) + addattr_l(&req->n, datalen, RTA_PREFSRC, &src.ipv4, bytelen); - else if (family == AF_INET6) - addattr_l(&req.n, sizeof(req), RTA_PREFSRC, + else if (p->family == AF_INET6) + addattr_l(&req->n, datalen, RTA_PREFSRC, &src.ipv6, bytelen); if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug("Setting source"); } if (rta->rta_len > RTA_LENGTH(0)) - addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH, + addattr_l(&req->n, datalen, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta)); } /* If there is no useful nexthop then return. */ if (nexthop_num == 0) { if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug( - "netlink_route_multipath(): No useful nexthop."); - return 0; + zlog_debug("%s: No useful nexthop.", __func__); } -skip: - /* Talk to netlink socket. */ - return netlink_talk_info(netlink_talk_filter, &req.n, - dplane_ctx_get_ns(ctx), 0); + return req->n.nlmsg_len; } int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) @@ -2106,7 +2106,9 @@ nexthop_done: return -1; } - _netlink_nexthop_debug(cmd, id); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd), + id); return netlink_talk_info(netlink_talk_filter, &req.n, dplane_ctx_get_ns(ctx), 0); @@ -2152,6 +2154,7 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) int cmd, ret; const struct prefix *p = dplane_ctx_get_dest(ctx); struct nexthop *nexthop; + uint8_t nl_pkt[NL_PKT_BUF_SIZE]; if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) { cmd = RTM_DELROUTE; @@ -2172,9 +2175,14 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) * the kernel the old non-system route */ if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)) && - !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) - (void)netlink_route_multipath(RTM_DELROUTE, - ctx); + !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) { + netlink_route_multipath(RTM_DELROUTE, ctx, + nl_pkt, sizeof(nl_pkt), + false); + netlink_talk_info(netlink_talk_filter, + (struct nlmsghdr *)nl_pkt, + dplane_ctx_get_ns(ctx), 0); + } } else { /* * So v6 route replace semantics are not in @@ -2188,9 +2196,14 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) * of the route delete. If that happens yeah we're * screwed. */ - if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) - (void)netlink_route_multipath(RTM_DELROUTE, - ctx); + if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) { + netlink_route_multipath(RTM_DELROUTE, ctx, + nl_pkt, sizeof(nl_pkt), + false); + netlink_talk_info(netlink_talk_filter, + (struct nlmsghdr *)nl_pkt, + dplane_ctx_get_ns(ctx), 0); + } cmd = RTM_NEWROUTE; } @@ -2198,9 +2211,13 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) return ZEBRA_DPLANE_REQUEST_FAILURE; } - if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))) - ret = netlink_route_multipath(cmd, ctx); - else + if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))) { + netlink_route_multipath(cmd, ctx, nl_pkt, sizeof(nl_pkt), + false); + ret = netlink_talk_info(netlink_talk_filter, + (struct nlmsghdr *)nl_pkt, + dplane_ctx_get_ns(ctx), 0); + } else ret = 0; if ((cmd == RTM_NEWROUTE) && (ret == 0)) { /* Update installed nexthops to signal which have been @@ -2535,62 +2552,63 @@ int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, * @type: RTN_* route type * @flags: NTF_* flags * @state: NUD_* states + * @data: data buffer pointer + * @datalen: total amount of data buffer space * * Return: Result status */ -static int netlink_update_neigh_ctx_internal(const struct zebra_dplane_ctx *ctx, - int cmd, const struct ethaddr *mac, - const struct ipaddr *ip, - bool replace_obj, uint8_t family, - uint8_t type, uint8_t flags, - uint16_t state) +static ssize_t +netlink_update_neigh_ctx_internal(const struct zebra_dplane_ctx *ctx, + int cmd, const struct ethaddr *mac, + const struct ipaddr *ip, bool replace_obj, + uint8_t family, uint8_t type, uint8_t flags, + uint16_t state, void *data, size_t datalen) { uint8_t protocol = RTPROT_ZEBRA; struct { struct nlmsghdr n; struct ndmsg ndm; - char buf[256]; - } req; + char buf[]; + } *req = data; int ipa_len; enum dplane_op_e op; - memset(&req, 0, sizeof(req)); + memset(req, 0, datalen); op = dplane_ctx_get_op(ctx); - req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); - req.n.nlmsg_flags = NLM_F_REQUEST; + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req->n.nlmsg_flags = NLM_F_REQUEST; if (cmd == RTM_NEWNEIGH) - req.n.nlmsg_flags |= + req->n.nlmsg_flags |= NLM_F_CREATE | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND); - req.n.nlmsg_type = cmd; - req.ndm.ndm_family = family; - req.ndm.ndm_type = type; - req.ndm.ndm_state = state; - req.ndm.ndm_flags = flags; - req.ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx); - - addattr_l(&req.n, sizeof(req), + req->n.nlmsg_type = cmd; + req->ndm.ndm_family = family; + req->ndm.ndm_type = type; + req->ndm.ndm_state = state; + req->ndm.ndm_flags = flags; + req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx); + + addattr_l(&req->n, sizeof(req), NDA_PROTOCOL, &protocol, sizeof(protocol)); if (mac) - addattr_l(&req.n, sizeof(req), NDA_LLADDR, mac, 6); + addattr_l(&req->n, datalen, NDA_LLADDR, mac, 6); ipa_len = IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN; - addattr_l(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len); + addattr_l(&req->n, datalen, NDA_DST, &ip->ip.addr, ipa_len); if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) { vlanid_t vid = dplane_ctx_mac_get_vlan(ctx); if (vid > 0) - addattr16(&req.n, sizeof(req), NDA_VLAN, vid); + addattr16(&req->n, datalen, NDA_VLAN, vid); - addattr32(&req.n, sizeof(req), NDA_MASTER, + addattr32(&req->n, datalen, NDA_MASTER, dplane_ctx_mac_get_br_ifindex(ctx)); } - return netlink_talk_info(netlink_talk_filter, &req.n, - dplane_ctx_get_ns(ctx), 0); + return NLMSG_ALIGN(req->n.nlmsg_len); } /* @@ -2601,10 +2619,16 @@ static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd) { struct ethaddr dst_mac = {.octet = {0}}; + uint8_t nl_pkt[NL_PKT_BUF_SIZE]; - return netlink_update_neigh_ctx_internal( + netlink_update_neigh_ctx_internal( ctx, cmd, &dst_mac, dplane_ctx_neigh_get_ipaddr(ctx), false, - PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT)); + PF_BRIDGE, 0, NTF_SELF, (NUD_NOARP | NUD_PERMANENT), nl_pkt, + sizeof(nl_pkt)); + + return netlink_talk_info(netlink_talk_filter, + (struct nlmsghdr *)nl_pkt, + dplane_ctx_get_ns(ctx), 0); } #ifndef NDA_RTA @@ -2930,12 +2954,20 @@ int netlink_macfdb_read_specific_mac(struct zebra_ns *zns, /* * Netlink-specific handler for MAC updates using dataplane context object. */ -static int netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, int cmd) +ssize_t +netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data, + size_t datalen) { struct ipaddr vtep_ip; vlanid_t vid; + ssize_t total; + int cmd; uint8_t flags; uint16_t state; + uint8_t nl_pkt[NL_PKT_BUF_SIZE]; + + cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL + ? RTM_NEWNEIGH : RTM_DELNEIGH; flags = (NTF_SELF | NTF_MASTER); state = NUD_REACHABLE; @@ -2970,10 +3002,12 @@ static int netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, int cmd) ipaddr2str(&vtep_ip, ipbuf, sizeof(ipbuf))); } - return netlink_update_neigh_ctx_internal( - ctx, cmd, dplane_ctx_mac_get_addr(ctx), - dplane_ctx_neigh_get_ipaddr(ctx), true, AF_BRIDGE, 0, flags, - state); + total = netlink_update_neigh_ctx_internal( + ctx, cmd, dplane_ctx_mac_get_addr(ctx), + dplane_ctx_neigh_get_ipaddr(ctx), true, AF_BRIDGE, 0, + flags, state, nl_pkt, sizeof(nl_pkt)); + + return total; } /* @@ -3362,6 +3396,7 @@ static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx, uint8_t flags; uint16_t state; uint8_t family; + uint8_t nl_pkt[NL_PKT_BUF_SIZE]; ip = dplane_ctx_neigh_get_ipaddr(ctx); mac = dplane_ctx_neigh_get_mac(ctx); @@ -3386,8 +3421,12 @@ static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx, flags, state); } - return netlink_update_neigh_ctx_internal( - ctx, cmd, mac, ip, true, family, RTN_UNICAST, flags, state); + netlink_update_neigh_ctx_internal( + ctx, cmd, mac, ip, true, family, RTN_UNICAST, flags, + state, nl_pkt, sizeof(nl_pkt)); + + return netlink_talk_info(netlink_talk_filter, (struct nlmsghdr *)nl_pkt, + dplane_ctx_get_ns(ctx), 0); } /* @@ -3395,13 +3434,18 @@ static int netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx, */ enum zebra_dplane_result kernel_mac_update_ctx(struct zebra_dplane_ctx *ctx) { - int cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL - ? RTM_NEWNEIGH - : RTM_DELNEIGH; - int ret = netlink_macfdb_update_ctx(ctx, cmd); + uint8_t nl_pkt[NL_PKT_BUF_SIZE]; + ssize_t rv; - return (ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS - : ZEBRA_DPLANE_REQUEST_FAILURE); + rv = netlink_macfdb_update_ctx(ctx, nl_pkt, sizeof(nl_pkt)); + if (rv <= 0) + return ZEBRA_DPLANE_REQUEST_FAILURE; + + rv = netlink_talk_info(netlink_talk_filter, (struct nlmsghdr *)nl_pkt, + dplane_ctx_get_ns(ctx), 0); + + return rv == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE; } enum zebra_dplane_result kernel_neigh_update_ctx(struct zebra_dplane_ctx *ctx) diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h index 2b4b145149..d6a993e78a 100644 --- a/zebra/rt_netlink.h +++ b/zebra/rt_netlink.h @@ -66,6 +66,12 @@ void rt_netlink_init(void); /* MPLS label forwarding table change, using dataplane context information. */ extern int netlink_mpls_multipath(int cmd, struct zebra_dplane_ctx *ctx); +extern ssize_t netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx, + uint8_t *data, size_t datalen, + bool fpm); +extern ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, + uint8_t *data, size_t datalen); + extern int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); extern int netlink_route_read(struct zebra_ns *zns); diff --git a/zebra/subdir.am b/zebra/subdir.am index f281afce94..71094cb52c 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -199,3 +199,13 @@ nodist_zebra_zebra_SOURCES = \ zebra_zebra_cumulus_mlag_la_SOURCES = zebra/zebra_mlag_private.c zebra_zebra_cumulus_mlag_la_LDFLAGS = -avoid-version -module -shared -export-dynamic + +if LINUX +module_LTLIBRARIES += zebra/dplane_fpm_nl.la + +zebra_dplane_fpm_nl_la_SOURCES = zebra/dplane_fpm_nl.c +zebra_dplane_fpm_nl_la_LDFLAGS = -avoid-version -module -shared -export-dynamic +zebra_dplane_fpm_nl_la_LIBADD = + +vtysh_scan += $(top_srcdir)/zebra/dplane_fpm_nl.c +endif diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index a2365ee76b..abbd136948 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -32,6 +32,7 @@ #include "zebra/zebra_memory.h" #include "zebra/zebra_router.h" #include "zebra/zebra_dplane.h" +#include "zebra/zebra_vxlan_private.h" #include "zebra/rt.h" #include "zebra/debug.h" @@ -178,7 +179,6 @@ struct dplane_mac_info { struct ethaddr mac; struct in_addr vtep_ip; bool is_sticky; - }; /* @@ -401,7 +401,7 @@ static enum zebra_dplane_result pw_update_internal(struct zebra_pw *pw, static enum zebra_dplane_result intf_addr_update_internal( const struct interface *ifp, const struct connected *ifc, enum dplane_op_e op); -static enum zebra_dplane_result mac_update_internal( +static enum zebra_dplane_result mac_update_common( enum dplane_op_e op, const struct interface *ifp, const struct interface *br_ifp, vlanid_t vid, const struct ethaddr *mac, @@ -445,23 +445,15 @@ void dplane_enable_sys_route_notifs(void) } /* - * Free a dataplane results context. + * Clean up dependent/internal allocations inside a context object */ -static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) +static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx) { - if (pctx == NULL) - return; - - DPLANE_CTX_VALID(*pctx); - - /* TODO -- just freeing memory, but would like to maintain - * a pool - */ - - /* Some internal allocations may need to be freed, depending on + /* + * Some internal allocations may need to be freed, depending on * the type of info captured in the ctx. */ - switch ((*pctx)->zd_op) { + switch (ctx->zd_op) { case DPLANE_OP_ROUTE_INSTALL: case DPLANE_OP_ROUTE_UPDATE: case DPLANE_OP_ROUTE_DELETE: @@ -470,33 +462,33 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) case DPLANE_OP_ROUTE_NOTIFY: /* Free allocated nexthops */ - if ((*pctx)->u.rinfo.zd_ng.nexthop) { + if (ctx->u.rinfo.zd_ng.nexthop) { /* This deals with recursive nexthops too */ - nexthops_free((*pctx)->u.rinfo.zd_ng.nexthop); + nexthops_free(ctx->u.rinfo.zd_ng.nexthop); - (*pctx)->u.rinfo.zd_ng.nexthop = NULL; + ctx->u.rinfo.zd_ng.nexthop = NULL; } /* Free backup info also (if present) */ - if ((*pctx)->u.rinfo.backup_ng.nexthop) { + if (ctx->u.rinfo.backup_ng.nexthop) { /* This deals with recursive nexthops too */ - nexthops_free((*pctx)->u.rinfo.backup_ng.nexthop); + nexthops_free(ctx->u.rinfo.backup_ng.nexthop); - (*pctx)->u.rinfo.backup_ng.nexthop = NULL; + ctx->u.rinfo.backup_ng.nexthop = NULL; } - if ((*pctx)->u.rinfo.zd_old_ng.nexthop) { + if (ctx->u.rinfo.zd_old_ng.nexthop) { /* This deals with recursive nexthops too */ - nexthops_free((*pctx)->u.rinfo.zd_old_ng.nexthop); + nexthops_free(ctx->u.rinfo.zd_old_ng.nexthop); - (*pctx)->u.rinfo.zd_old_ng.nexthop = NULL; + ctx->u.rinfo.zd_old_ng.nexthop = NULL; } - if ((*pctx)->u.rinfo.old_backup_ng.nexthop) { + if (ctx->u.rinfo.old_backup_ng.nexthop) { /* This deals with recursive nexthops too */ - nexthops_free((*pctx)->u.rinfo.old_backup_ng.nexthop); + nexthops_free(ctx->u.rinfo.old_backup_ng.nexthop); - (*pctx)->u.rinfo.old_backup_ng.nexthop = NULL; + ctx->u.rinfo.old_backup_ng.nexthop = NULL; } break; @@ -504,11 +496,11 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) case DPLANE_OP_NH_INSTALL: case DPLANE_OP_NH_UPDATE: case DPLANE_OP_NH_DELETE: { - if ((*pctx)->u.rinfo.nhe.ng.nexthop) { + if (ctx->u.rinfo.nhe.ng.nexthop) { /* This deals with recursive nexthops too */ - nexthops_free((*pctx)->u.rinfo.nhe.ng.nexthop); + nexthops_free(ctx->u.rinfo.nhe.ng.nexthop); - (*pctx)->u.rinfo.nhe.ng.nexthop = NULL; + ctx->u.rinfo.nhe.ng.nexthop = NULL; } break; } @@ -521,7 +513,7 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) zebra_nhlfe_t *nhlfe, *next; /* Free allocated NHLFEs */ - for (nhlfe = (*pctx)->u.lsp.nhlfe_list; nhlfe; nhlfe = next) { + for (nhlfe = ctx->u.lsp.nhlfe_list; nhlfe; nhlfe = next) { next = nhlfe->next; zebra_mpls_nhlfe_del(nhlfe); @@ -530,8 +522,8 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) /* Clear pointers in lsp struct, in case we're cacheing * free context structs. */ - (*pctx)->u.lsp.nhlfe_list = NULL; - (*pctx)->u.lsp.best_nhlfe = NULL; + ctx->u.lsp.nhlfe_list = NULL; + ctx->u.lsp.best_nhlfe = NULL; break; } @@ -539,21 +531,21 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) case DPLANE_OP_PW_INSTALL: case DPLANE_OP_PW_UNINSTALL: /* Free allocated nexthops */ - if ((*pctx)->u.pw.nhg.nexthop) { + if (ctx->u.pw.nhg.nexthop) { /* This deals with recursive nexthops too */ - nexthops_free((*pctx)->u.pw.nhg.nexthop); + nexthops_free(ctx->u.pw.nhg.nexthop); - (*pctx)->u.pw.nhg.nexthop = NULL; + ctx->u.pw.nhg.nexthop = NULL; } break; case DPLANE_OP_ADDR_INSTALL: case DPLANE_OP_ADDR_UNINSTALL: /* Maybe free label string, if allocated */ - if ((*pctx)->u.intf.label != NULL && - (*pctx)->u.intf.label != (*pctx)->u.intf.label_buf) { - free((*pctx)->u.intf.label); - (*pctx)->u.intf.label = NULL; + if (ctx->u.intf.label != NULL && + ctx->u.intf.label != ctx->u.intf.label_buf) { + free(ctx->u.intf.label); + ctx->u.intf.label = NULL; } break; @@ -567,11 +559,41 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) case DPLANE_OP_NONE: break; } +} + +/* + * Free a dataplane results context. + */ +static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) +{ + if (pctx == NULL) + return; + + DPLANE_CTX_VALID(*pctx); + + /* TODO -- just freeing memory, but would like to maintain + * a pool + */ + + /* Some internal allocations may need to be freed, depending on + * the type of info captured in the ctx. + */ + dplane_ctx_free_internal(*pctx); XFREE(MTYPE_DP_CTX, *pctx); } /* + * Reset an allocated context object for re-use. All internal allocations are + * freed and the context is memset. + */ +void dplane_ctx_reset(struct zebra_dplane_ctx *ctx) +{ + dplane_ctx_free_internal(ctx); + memset(ctx, 0, sizeof(*ctx)); +} + +/* * Return a context block to the dplane module after processing */ void dplane_ctx_fini(struct zebra_dplane_ctx **pctx) @@ -1503,10 +1525,8 @@ static int dplane_ctx_ns_init(struct zebra_dplane_ctx *ctx, /* * Initialize a context block for a route update from zebra data structs. */ -static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, - enum dplane_op_e op, - struct route_node *rn, - struct route_entry *re) +int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct route_node *rn, struct route_entry *re) { int ret = EINVAL; const struct route_table *table = NULL; @@ -1515,6 +1535,7 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, struct zebra_ns *zns; struct zebra_vrf *zvrf; struct nexthop *nexthop; + zebra_l3vni_t *zl3vni; if (!ctx || !rn || !re) goto done; @@ -1564,10 +1585,24 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, re->nhe->backup_info->nhe->nhg.nexthop, NULL); } - /* Ensure that the dplane nexthops' flags are clear. */ - for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop)) + /* + * Ensure that the dplane nexthops' flags are clear and copy + * encapsulation information. + */ + for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop)) { UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + /* Check for available encapsulations. */ + if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) + continue; + + zl3vni = zl3vni_from_vrf(nexthop->vrf_id); + if (zl3vni && is_l3vni_oper_up(zl3vni)) { + nexthop->nh_encap_type = NET_VXLAN; + nexthop->nh_encap.vni = zl3vni->vni; + } + } + /* Don't need some info when capturing a system notification */ if (op == DPLANE_OP_SYS_ROUTE_ADD || op == DPLANE_OP_SYS_ROUTE_DELETE) { @@ -2470,8 +2505,8 @@ enum zebra_dplane_result dplane_mac_add(const struct interface *ifp, enum zebra_dplane_result result; /* Use common helper api */ - result = mac_update_internal(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp, - vid, mac, vtep_ip, sticky); + result = mac_update_common(DPLANE_OP_MAC_INSTALL, ifp, bridge_ifp, + vid, mac, vtep_ip, sticky); return result; } @@ -2487,41 +2522,25 @@ enum zebra_dplane_result dplane_mac_del(const struct interface *ifp, enum zebra_dplane_result result; /* Use common helper api */ - result = mac_update_internal(DPLANE_OP_MAC_DELETE, ifp, bridge_ifp, - vid, mac, vtep_ip, false); + result = mac_update_common(DPLANE_OP_MAC_DELETE, ifp, bridge_ifp, + vid, mac, vtep_ip, false); return result; } /* - * Common helper api for MAC address/vxlan updates + * Public api to init an empty context - either newly-allocated or + * reset/cleared - for a MAC update. */ -static enum zebra_dplane_result -mac_update_internal(enum dplane_op_e op, - const struct interface *ifp, - const struct interface *br_ifp, - vlanid_t vid, - const struct ethaddr *mac, - struct in_addr vtep_ip, - bool sticky) +void dplane_mac_init(struct zebra_dplane_ctx *ctx, + const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky) { - enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; - int ret; - struct zebra_dplane_ctx *ctx = NULL; struct zebra_ns *zns; - if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { - char buf1[ETHER_ADDR_STRLEN], buf2[PREFIX_STRLEN]; - - zlog_debug("init mac ctx %s: mac %s, ifp %s, vtep %s", - dplane_op2str(op), - prefix_mac2str(mac, buf1, sizeof(buf1)), - ifp->name, - inet_ntop(AF_INET, &vtep_ip, buf2, sizeof(buf2))); - } - - ctx = dplane_ctx_alloc(); - - ctx->zd_op = op; ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; ctx->zd_vrf_id = ifp->vrf_id; @@ -2539,6 +2558,39 @@ mac_update_internal(enum dplane_op_e op, ctx->u.macinfo.mac = *mac; ctx->u.macinfo.vid = vid; ctx->u.macinfo.is_sticky = sticky; +} + +/* + * Common helper api for MAC address/vxlan updates + */ +static enum zebra_dplane_result +mac_update_common(enum dplane_op_e op, + const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret; + struct zebra_dplane_ctx *ctx = NULL; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + char buf1[ETHER_ADDR_STRLEN], buf2[PREFIX_STRLEN]; + + zlog_debug("init mac ctx %s: mac %s, ifp %s, vtep %s", + dplane_op2str(op), + prefix_mac2str(mac, buf1, sizeof(buf1)), + ifp->name, + inet_ntop(AF_INET, &vtep_ip, buf2, sizeof(buf2))); + } + + ctx = dplane_ctx_alloc(); + ctx->zd_op = op; + + /* Common init for the ctx */ + dplane_mac_init(ctx, ifp, br_ifp, vid, mac, vtep_ip, sticky); /* Enqueue for processing on the dplane pthread */ ret = dplane_update_enqueue(ctx); diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 9ce4df197c..f01ca2e84c 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -180,6 +180,12 @@ TAILQ_HEAD(dplane_ctx_q, zebra_dplane_ctx); /* Allocate a context object */ struct zebra_dplane_ctx *dplane_ctx_alloc(void); +/* + * Reset an allocated context object for re-use. All internal allocations are + * freed. + */ +void dplane_ctx_reset(struct zebra_dplane_ctx *ctx); + /* Return a dataplane results context block after use; the caller's pointer will * be cleared. */ @@ -438,6 +444,12 @@ enum zebra_dplane_result dplane_intf_addr_unset(const struct interface *ifp, /* * Enqueue evpn mac operations for the dataplane. */ +extern struct zebra_dplane_ctx *mac_update_internal( + enum dplane_op_e op, const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, const struct ethaddr *mac, + struct in_addr vtep_ip, bool sticky); + enum zebra_dplane_result dplane_mac_add(const struct interface *ifp, const struct interface *bridge_ifp, vlanid_t vid, @@ -451,6 +463,15 @@ enum zebra_dplane_result dplane_mac_del(const struct interface *ifp, const struct ethaddr *mac, struct in_addr vtep_ip); +/* Helper api to init an empty or new context for a MAC update */ +void dplane_mac_init(struct zebra_dplane_ctx *ctx, + const struct interface *ifp, + const struct interface *br_ifp, + vlanid_t vid, + const struct ethaddr *mac, + struct in_addr vtep_ip, + bool sticky); + /* * Enqueue evpn neighbor updates for the dataplane. */ @@ -474,6 +495,9 @@ enum zebra_dplane_result dplane_vtep_delete(const struct interface *ifp, const struct in_addr *ip, vni_t vni); +/* Encode route information into data plane context. */ +int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct route_node *rn, struct route_entry *re); /* Retrieve the limit on the number of pending, unprocessed updates. */ uint32_t dplane_get_in_queue_limit(void); diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h index 100bb0e093..0a46fb2075 100644 --- a/zebra/zebra_vxlan_private.h +++ b/zebra/zebra_vxlan_private.h @@ -301,6 +301,7 @@ struct zebra_mac_t_ { /* remote VTEP advertised MAC as default GW */ #define ZEBRA_MAC_REMOTE_DEF_GW 0x40 #define ZEBRA_MAC_DUPLICATE 0x80 +#define ZEBRA_MAC_FPM_SENT 0x100 /* whether or not this entry was sent. */ /* back pointer to zvni */ zebra_vni_t *zvni; |
