From 9e06f93df1867cce7c98ea63718b971ef33c0251 Mon Sep 17 00:00:00 2001 From: Edwin Brossette Date: Fri, 8 Dec 2023 16:02:11 +0100 Subject: [PATCH] lib: use snmp's large fd sets for agentx The maximum number of file descriptors in an fd set is limited by FD_SETSIZE. This limitation is important because the libc macros FD_SET(), FD_CLR() and FD_ISSET() will invoke a sigabort if the size of the fd set given to them is above FD_SETSIZE. We ran into such a sigabort with bgpd because snmp can return an fd set of size higher than FD_SETSIZE when calling snmp_select_info(). An unfortunate FD_ISSET() call later causes the following abort: Received signal 6 at 1701115534 (si_addr 0xb94, PC 0x7ff289a16a7c); aborting... /lib/x86_64-linux-gnu/libfrr.so.0(zlog_backtrace_sigsafe+0xb3) [0x7ff289d62bba] /lib/x86_64-linux-gnu/libfrr.so.0(zlog_signal+0x1b4) [0x7ff289d62a1f] /lib/x86_64-linux-gnu/libfrr.so.0(+0x102860) [0x7ff289da4860] /lib/x86_64-linux-gnu/libc.so.6(+0x42520) [0x7ff2899c2520] /lib/x86_64-linux-gnu/libc.so.6(pthread_kill+0x12c) [0x7ff289a16a7c] /lib/x86_64-linux-gnu/libc.so.6(raise+0x16) [0x7ff2899c2476] /lib/x86_64-linux-gnu/libc.so.6(abort+0xd3) [0x7ff2899a87f3] /lib/x86_64-linux-gnu/libc.so.6(+0x896f6) [0x7ff289a096f6] /lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x2a) [0x7ff289ab676a] /lib/x86_64-linux-gnu/libc.so.6(+0x1350c6) [0x7ff289ab50c6] /lib/x86_64-linux-gnu/libc.so.6(+0x1366ab) [0x7ff289ab66ab] /lib/x86_64-linux-gnu/libfrrsnmp.so.0(+0x36f5) [0x7ff2897736f5] /lib/x86_64-linux-gnu/libfrrsnmp.so.0(+0x3c27) [0x7ff289773c27] /lib/x86_64-linux-gnu/libfrr.so.0(thread_call+0x1c2) [0x7ff289dbe105] /lib/x86_64-linux-gnu/libfrr.so.0(frr_run+0x257) [0x7ff289d56e69] /usr/bin/bgpd(main+0x4f4) [0x560965c40488] /lib/x86_64-linux-gnu/libc.so.6(+0x29d90) [0x7ff2899a9d90] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x80) [0x7ff2899a9e40] /usr/bin/bgpd(_start+0x25) [0x560965c3e965] in thread agentx_timeout scheduled from /build/make-pkg/output/_packages/cp-routing/src/lib/agentx.c:122 agentx_events_update() Also, the following error is logged by snmp just before the abort: snmp[err]: Use snmp_sess_select_info2() for processing large file descriptors snmp uses a custom struct netsnmp_large_fd_set to work above the limit imposed by FD_SETSIZE. It is noteworthy that, when calling snmp_select_info() instead of snmp_select_info2(), snmp uses the same code working with its custom, large structs, and copy/paste the result to a regular, libc compatible fd_set. So there should be no downside working with snmp_select_info2() instead of snmp_select_info(). Replace every use of the libc file descriptors sets by snmp's extended file descriptors sets in agentx to acommodate for the high number of file descriptors that can come out of snmp. This should prevent the abort seen above. Signed-off-by: Edwin Brossette --- lib/agentx.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/agentx.c b/lib/agentx.c index 45f14c2703..c2f2112d7d 100644 --- a/lib/agentx.c +++ b/lib/agentx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "command.h" #include "smux.h" @@ -43,7 +44,7 @@ static void agentx_timeout(struct event *t) static void agentx_read(struct event *t) { - fd_set fds; + netsnmp_large_fd_set lfds; int flags, new_flags = 0; int nonblock = false; struct listnode *ln = EVENT_ARG(t); @@ -68,9 +69,9 @@ static void agentx_read(struct event *t) flog_err(EC_LIB_SYSTEM_CALL, "Failed to set snmp fd non blocking: %s(%d)", strerror(errno), errno); - FD_ZERO(&fds); - FD_SET(EVENT_FD(t), &fds); - snmp_read(&fds); + netsnmp_large_fd_set_init(&lfds, FD_SETSIZE); + netsnmp_large_fd_setfd(t->u.fd, &lfds); + snmp_read2(&lfds); /* Reset the flag */ if (!nonblock) { @@ -85,6 +86,7 @@ static void agentx_read(struct event *t) netsnmp_check_outstanding_agent_requests(); agentx_events_update(); + netsnmp_large_fd_set_cleanup(&lfds); } static void agentx_events_update(void) @@ -92,15 +94,15 @@ static void agentx_events_update(void) int maxfd = 0; int block = 1; struct timeval timeout = {.tv_sec = 0, .tv_usec = 0}; - fd_set fds; + netsnmp_large_fd_set lfds; struct listnode *ln; struct event **thr; int fd, thr_fd; event_cancel(&timeout_thr); - FD_ZERO(&fds); - snmp_select_info(&maxfd, &fds, &timeout, &block); + netsnmp_large_fd_set_init(&lfds, FD_SETSIZE); + snmp_select_info2(&maxfd, &lfds, &timeout, &block); if (!block) { event_add_timer_tv(agentx_tm, agentx_timeout, NULL, &timeout, @@ -118,7 +120,7 @@ static void agentx_events_update(void) /* caught up */ if (thr_fd == fd) { struct listnode *nextln = listnextnode(ln); - if (!FD_ISSET(fd, &fds)) { + if (!netsnmp_large_fd_is_set(fd, &lfds)) { event_cancel(thr); XFREE(MTYPE_TMP, thr); list_delete_node(events, ln); @@ -128,7 +130,7 @@ static void agentx_events_update(void) thr_fd = thr ? EVENT_FD(*thr) : -1; } /* need listener, but haven't hit one where it would be */ - else if (FD_ISSET(fd, &fds)) { + else if (netsnmp_large_fd_is_set(fd, &lfds)) { struct listnode *newln; thr = XCALLOC(MTYPE_TMP, sizeof(struct event *)); @@ -147,6 +149,7 @@ static void agentx_events_update(void) list_delete_node(events, ln); ln = nextln; } + netsnmp_large_fd_set_cleanup(&lfds); } /* AgentX node. */ -- 2.39.5