diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/command.c | 3 | ||||
| -rw-r--r-- | lib/command.h | 1 | ||||
| -rw-r--r-- | lib/monotime.h | 14 | ||||
| -rw-r--r-- | lib/pullwr.c | 275 | ||||
| -rw-r--r-- | lib/pullwr.h | 110 | ||||
| -rw-r--r-- | lib/subdir.am | 2 |
6 files changed, 405 insertions, 0 deletions
diff --git a/lib/command.c b/lib/command.c index 9dabc2af7e..d4c35a80d5 100644 --- a/lib/command.c +++ b/lib/command.c @@ -151,6 +151,7 @@ const char *node_names[] = { "bfd peer", /* BFD_PEER_NODE */ "openfabric", // OPENFABRIC_NODE "vrrp", /* VRRP_NODE */ + "bmp", /* BMP_NODE */ }; /* clang-format on */ @@ -975,6 +976,7 @@ enum node_type node_parent(enum node_type node) case BGP_IPV6M_NODE: case BGP_EVPN_NODE: case BGP_IPV6L_NODE: + case BMP_NODE: ret = BGP_NODE; break; case BGP_EVPN_VNI_NODE: @@ -1491,6 +1493,7 @@ void cmd_exit(struct vty *vty) case BGP_IPV6M_NODE: case BGP_EVPN_NODE: case BGP_IPV6L_NODE: + case BMP_NODE: vty->node = BGP_NODE; break; case BGP_EVPN_VNI_NODE: diff --git a/lib/command.h b/lib/command.h index 8dc35a0fdc..137d3748ae 100644 --- a/lib/command.h +++ b/lib/command.h @@ -159,6 +159,7 @@ enum node_type { BFD_PEER_NODE, /* BFD peer configuration mode. */ OPENFABRIC_NODE, /* OpenFabric router configuration node */ VRRP_NODE, /* VRRP node */ + BMP_NODE, /* BMP config under router bgp */ NODE_TYPE_MAX, /* maximum */ }; diff --git a/lib/monotime.h b/lib/monotime.h index ca27c45dc6..e246f177de 100644 --- a/lib/monotime.h +++ b/lib/monotime.h @@ -84,6 +84,20 @@ static inline int64_t monotime_until(const struct timeval *ref, return (int64_t)tv.tv_sec * 1000000LL + tv.tv_usec; } +static inline time_t monotime_to_realtime(const struct timeval *mono, + struct timeval *realout) +{ + struct timeval delta, real; + + monotime_since(mono, &delta); + gettimeofday(&real, NULL); + + timersub(&real, &delta, &real); + if (realout) + *realout = real; + return real.tv_sec; +} + /* Char buffer size for time-to-string api */ #define MONOTIME_STRLEN 32 diff --git a/lib/pullwr.c b/lib/pullwr.c new file mode 100644 index 0000000000..0c326f29d4 --- /dev/null +++ b/lib/pullwr.c @@ -0,0 +1,275 @@ +/* + * Pull-driven write event handler + * Copyright (C) 2019 David Lamparter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "zebra.h" + +#include "pullwr.h" +#include "memory.h" +#include "monotime.h" + +/* defaults */ +#define PULLWR_THRESH 16384 /* size at which we start to call write() */ +#define PULLWR_MAXSPIN 2500 /* max µs to spend grabbing more data */ + +struct pullwr { + int fd; + struct thread_master *tm; + /* writer == NULL <=> we're idle */ + struct thread *writer; + + void *arg; + void (*fill)(void *, struct pullwr *); + void (*err)(void *, struct pullwr *, bool); + + /* ring buffer (although it's "un-ringed" on resizing, it WILL wrap + * around if data is trickling in while keeping it at a constant size) + */ + size_t bufsz, valid, pos; + uint64_t total_written; + char *buffer; + + size_t thresh; /* PULLWR_THRESH */ + int64_t maxspin; /* PULLWR_MAXSPIN */ +}; + +DEFINE_MTYPE_STATIC(LIB, PULLWR_HEAD, "pull-driven write controller") +DEFINE_MTYPE_STATIC(LIB, PULLWR_BUF, "pull-driven write buffer") + +static int pullwr_run(struct thread *t); + +struct pullwr *_pullwr_new(struct thread_master *tm, int fd, + void *arg, + void (*fill)(void *, struct pullwr *), + void (*err)(void *, struct pullwr *, bool)) +{ + struct pullwr *pullwr; + + pullwr = XCALLOC(MTYPE_PULLWR_HEAD, sizeof(*pullwr)); + pullwr->fd = fd; + pullwr->tm = tm; + pullwr->arg = arg; + pullwr->fill = fill; + pullwr->err = err; + + pullwr->thresh = PULLWR_THRESH; + pullwr->maxspin = PULLWR_MAXSPIN; + + return pullwr; +} + +void pullwr_del(struct pullwr *pullwr) +{ + THREAD_OFF(pullwr->writer); + + XFREE(MTYPE_PULLWR_BUF, pullwr->buffer); + XFREE(MTYPE_PULLWR_HEAD, pullwr); +} + +void pullwr_cfg(struct pullwr *pullwr, int64_t max_spin_usec, + size_t write_threshold) +{ + pullwr->maxspin = max_spin_usec ?: PULLWR_MAXSPIN; + pullwr->thresh = write_threshold ?: PULLWR_THRESH; +} + +void pullwr_bump(struct pullwr *pullwr) +{ + if (pullwr->writer) + return; + + thread_add_timer(pullwr->tm, pullwr_run, pullwr, 0, &pullwr->writer); +} + +static size_t pullwr_iov(struct pullwr *pullwr, struct iovec *iov) +{ + size_t len1; + + if (pullwr->valid == 0) + return 0; + + if (pullwr->pos + pullwr->valid <= pullwr->bufsz) { + iov[0].iov_base = pullwr->buffer + pullwr->pos; + iov[0].iov_len = pullwr->valid; + return 1; + } + + len1 = pullwr->bufsz - pullwr->pos; + + iov[0].iov_base = pullwr->buffer + pullwr->pos; + iov[0].iov_len = len1; + iov[1].iov_base = pullwr->buffer; + iov[1].iov_len = pullwr->valid - len1; + return 2; +} + +static void pullwr_resize(struct pullwr *pullwr, size_t need) +{ + struct iovec iov[2]; + size_t niov, newsize; + char *newbuf; + + /* the buffer is maintained at pullwr->thresh * 2 since we'll be + * trying to fill it as long as it's anywhere below pullwr->thresh. + * That means we frequently end up a little short of it and then write + * something that goes over the threshold. So, just use double. + */ + if (need) { + /* resize up */ + if (pullwr->bufsz - pullwr->valid >= need) + return; + + newsize = MAX((pullwr->valid + need) * 2, pullwr->thresh * 2); + newbuf = XMALLOC(MTYPE_PULLWR_BUF, newsize); + } else if (!pullwr->valid) { + /* resize down, buffer empty */ + newsize = 0; + newbuf = NULL; + } else { + /* resize down */ + if (pullwr->bufsz - pullwr->valid < pullwr->thresh) + return; + newsize = MAX(pullwr->valid, pullwr->thresh * 2); + newbuf = XMALLOC(MTYPE_PULLWR_BUF, newsize); + } + + niov = pullwr_iov(pullwr, iov); + if (niov >= 1) { + memcpy(newbuf, iov[0].iov_base, iov[0].iov_len); + if (niov >= 2) + memcpy(newbuf + iov[0].iov_len, + iov[1].iov_base, iov[1].iov_len); + } + + XFREE(MTYPE_PULLWR_BUF, pullwr->buffer); + pullwr->buffer = newbuf; + pullwr->bufsz = newsize; + pullwr->pos = 0; +} + +void pullwr_write(struct pullwr *pullwr, const void *data, size_t len) +{ + pullwr_resize(pullwr, len); + + if (pullwr->pos + pullwr->valid > pullwr->bufsz) { + size_t pos; + + pos = (pullwr->pos + pullwr->valid) % pullwr->bufsz; + memcpy(pullwr->buffer + pos, data, len); + } else { + size_t max1, len1; + max1 = pullwr->bufsz - (pullwr->pos + pullwr->valid); + max1 = MIN(max1, len); + + memcpy(pullwr->buffer + pullwr->pos + pullwr->valid, + data, max1); + len1 = len - max1; + + if (len1) + memcpy(pullwr->buffer, (char *)data + max1, len1); + + } + pullwr->valid += len; + + pullwr_bump(pullwr); +} + +static int pullwr_run(struct thread *t) +{ + struct pullwr *pullwr = THREAD_ARG(t); + struct iovec iov[2]; + size_t niov, lastvalid; + ssize_t nwr; + struct timeval t0; + bool maxspun = false; + + monotime(&t0); + + do { + lastvalid = pullwr->valid - 1; + while (pullwr->valid < pullwr->thresh + && pullwr->valid != lastvalid + && !maxspun) { + lastvalid = pullwr->valid; + pullwr->fill(pullwr->arg, pullwr); + + /* check after doing at least one fill() call so we + * don't spin without making progress on slow boxes + */ + if (!maxspun && monotime_since(&t0, NULL) + >= pullwr->maxspin) + maxspun = true; + } + + if (pullwr->valid == 0) { + /* we made a fill() call above that didn't feed any + * data in, and we have nothing more queued, so we go + * into idle, i.e. no calling thread_add_write() + */ + pullwr_resize(pullwr, 0); + return 0; + } + + niov = pullwr_iov(pullwr, iov); + assert(niov); + + nwr = writev(pullwr->fd, iov, niov); + if (nwr < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + pullwr->err(pullwr->arg, pullwr, false); + return 0; + } + + if (nwr == 0) { + pullwr->err(pullwr->arg, pullwr, true); + return 0; + } + + pullwr->total_written += nwr; + pullwr->valid -= nwr; + pullwr->pos += nwr; + pullwr->pos %= pullwr->bufsz; + } while (pullwr->valid == 0 && !maxspun); + /* pullwr->valid != 0 implies we did an incomplete write, i.e. socket + * is full and we go wait until it's available for writing again. + */ + + thread_add_write(pullwr->tm, pullwr_run, pullwr, pullwr->fd, + &pullwr->writer); + + /* if we hit the time limit, just keep the buffer, we'll probably need + * it anyway & another run is already coming up. + */ + if (!maxspun) + pullwr_resize(pullwr, 0); + return 0; +} + +void pullwr_stats(struct pullwr *pullwr, uint64_t *total_written, + size_t *pending, size_t *kernel_pending) +{ + int tmp; + + *total_written = pullwr->total_written; + *pending = pullwr->valid; + + if (ioctl(pullwr->fd, TIOCOUTQ, &tmp) != 0) + tmp = 0; + *kernel_pending = tmp; +} diff --git a/lib/pullwr.h b/lib/pullwr.h new file mode 100644 index 0000000000..601eac1b79 --- /dev/null +++ b/lib/pullwr.h @@ -0,0 +1,110 @@ +/* + * Pull-driven write event handler + * Copyright (C) 2019 David Lamparter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _WRITEPOLL_H +#define _WRITEPOLL_H + +#include <stdbool.h> +#include <stdint.h> + +#include "thread.h" +#include "stream.h" + +struct pullwr; + +/* This is a "pull-driven" write event handler. Instead of having some buffer + * or being driven by the availability of data, it triggers on the space being + * available on the socket for data to be written on and then calls fill() to + * get data to be sent. + * + * pullwr_* maintains an "idle" vs. "active" state, going into idle when a + * fill() call completes without feeing more data into it. The overall + * semantics are: + * - to put data out, call pullwr_write(). This is possible from both inside + * fill() callbacks or anywhere else. Doing so puts the pullwr into + * active state. + * - in active state, the fill() callback will be called and should feed more + * data in. It should NOT loop to push out more than one "unit" of data; + * the pullwr code handles this by calling fill() until it has enough data. + * - if there's nothing more to be sent, fill() returns without doing anything + * and pullwr goes into idle state after flushing all buffered data out. + * - when new data becomes available, pullwr_bump() should be called to put + * the pullwr back into active mode so it will collect data from fill(), + * or you can directly call pullwr_write(). + * - only calling pullwr_write() from within fill() is the cleanest way of + * doing things. + * + * When the err() callback is called, the pullwr should be considered unusable + * and released with pullwr_del(). This can be done from inside the callback, + * the pullwr code holds no more references on it when calling err(). + */ +extern struct pullwr *_pullwr_new(struct thread_master *tm, int fd, + void *arg, + void (*fill)(void *, struct pullwr *), + void (*err)(void *, struct pullwr *, bool eof)); +extern void pullwr_del(struct pullwr *pullwr); + +/* type-checking wrapper. makes sure fill() and err() take a first argument + * whose type is identical to the type of arg. + * => use "void fill(struct mystruct *arg, ...)" - no "void *arg" + */ +#define pullwr_new(tm, fd, arg, fill, err) ({ \ + void (*fill_typechk)(typeof(arg), struct pullwr *) = fill; \ + void (*err_typechk)(typeof(arg), struct pullwr *, bool) = err; \ + _pullwr_new(tm, fd, arg, (void *)fill_typechk, (void *)err_typechk); \ +}) + +/* max_spin_usec is the time after which the pullwr event handler will stop + * trying to get more data from fill() and yield control back to the + * thread_master. It does reschedule itself to continue later; this is + * only to make sure we don't freeze the entire process if we're piping a + * lot of data to a local endpoint that reads quickly (i.e. no backpressure) + * + * default: 2500 (2.5 ms) + * + * write_threshold is the amount of data buffered from fill() calls at which + * the pullwr code starts calling write(). But this is not a "limit". + * pullwr will keep poking fill() for more data until + * (a) max_spin_usec is reached; fill() will be called again later after + * returning to the thread_master to give other events a chance to run + * (b) fill() returns without pushing any data onto the pullwr with + * pullwr_write(), so fill() will NOT be called again until a call to + * pullwr_bump() or pullwr_write() comes in. + * + * default: 16384 (16 kB) + * + * passing 0 for either value (or not calling it at all) uses the default. + */ +extern void pullwr_cfg(struct pullwr *pullwr, int64_t max_spin_usec, + size_t write_threshold); + +extern void pullwr_bump(struct pullwr *pullwr); +extern void pullwr_write(struct pullwr *pullwr, + const void *data, size_t len); + +static inline void pullwr_write_stream(struct pullwr *pullwr, + struct stream *s) +{ + pullwr_write(pullwr, s->data, stream_get_endp(s)); +} + +extern void pullwr_stats(struct pullwr *pullwr, uint64_t *total_written, + size_t *pending, size_t *kernel_pending); + +#endif /* _WRITEPOLL_H */ diff --git a/lib/subdir.am b/lib/subdir.am index 2be7537bcc..e0f1352380 100644 --- a/lib/subdir.am +++ b/lib/subdir.am @@ -65,6 +65,7 @@ lib_libfrr_la_SOURCES = \ lib/prefix.c \ lib/privs.c \ lib/ptm_lib.c \ + lib/pullwr.c \ lib/qobj.c \ lib/ringbuf.c \ lib/routemap.c \ @@ -203,6 +204,7 @@ pkginclude_HEADERS += \ lib/printfrr.h \ lib/privs.h \ lib/ptm_lib.h \ + lib/pullwr.h \ lib/pw.h \ lib/qobj.h \ lib/queue.h \ |
