]> git.puffer.fish Git - mirror/frr.git/commitdiff
lib: Prevent infinite loop in fd handling
authorDonald Sharp <sharpd@cumulusnetworks.com>
Thu, 13 Jun 2019 01:13:18 +0000 (21:13 -0400)
committerDonald Sharp <sharpd@cumulusnetworks.com>
Thu, 13 Jun 2019 19:14:04 +0000 (15:14 -0400)
If we have a case where have created a fd for i/o and we have
removed the handling thread but still have the fd in the poll
data structure, there existed a case where we would get
the handle this fd return from poll but we would immediately
do nothing with it because we didn't have a thread to hand
the event to.

This leads to an infinite loop.  Prevent the infinite loop
from happening and log the problem.

We still need to find the cause of this happening.  But
let's prevent the system from melting down in the mean time.

Fixes: #2796
Signed-off-by: David Lamparter <equinox@diac24.net>
Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
lib/lib_errors.c
lib/lib_errors.h
lib/thread.c

index b6c764d8739b80203b10a250e7eadf94660b22a6..e0559f332d286184e617699996ae9ab11f1374b0 100644 (file)
@@ -50,6 +50,12 @@ static struct log_ref ferr_lib_warn[] = {
                .description = "The Event subsystem has detected a slow process, this typically indicates that FRR is having trouble completing work in a timely manner.  This can be either a misconfiguration, bug, or some combination therof.",
                .suggestion = "Gather log data and open an Issue",
        },
+       {
+               .code = EC_LIB_NO_THREAD,
+               .title = "The Event subsystem has detected an internal FD problem",
+               .description = "The Event subsystem has detected a file descriptor read/write event without an associated handling function.  This is a bug, please collect log data and open an issue.",
+               .suggestion = "Gather log data and open an Issue",
+       },
        {
                .code = EC_LIB_RMAP_RECURSION_LIMIT,
                .title = "Reached the Route-Map Recursion Limit",
index 39b39fb065b9b618f9cfc97a06d6704e400544d8..996a16ba95b6bedbd26b82237a0869f27ef1a262 100644 (file)
@@ -45,6 +45,7 @@ enum lib_log_refs {
        EC_LIB_STREAM,
        EC_LIB_LINUX_NS,
        EC_LIB_SLOW_THREAD,
+       EC_LIB_NO_THREAD,
        EC_LIB_RMAP_RECURSION_LIMIT,
        EC_LIB_BACKUP_CONFIG,
        EC_LIB_VRF_LENGTH,
index 7489be5c2dad0aebb0b8de4833ac614de64654cd..2e7b0ca0ef7e613da6dbadaba436d59d0b165718 100644 (file)
@@ -1238,12 +1238,31 @@ static struct thread *thread_run(struct thread_master *m, struct thread *thread,
 }
 
 static int thread_process_io_helper(struct thread_master *m,
-                                   struct thread *thread, short state, int pos)
+                                   struct thread *thread, short state,
+                                   short actual_state, int pos)
 {
        struct thread **thread_array;
 
-       if (!thread)
+       /*
+        * poll() clears the .events field, but the pollfd array we
+        * pass to poll() is a copy of the one used to schedule threads.
+        * We need to synchronize state between the two here by applying
+        * the same changes poll() made on the copy of the "real" pollfd
+        * array.
+        *
+        * This cleans up a possible infinite loop where we refuse
+        * to respond to a poll event but poll is insistent that
+        * we should.
+        */
+       m->handler.pfds[pos].events &= ~(state);
+
+       if (!thread) {
+               if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
+                       flog_err(EC_LIB_NO_THREAD,
+                                "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!\n",
+                                m->handler.pfds[pos].fd, actual_state);
                return 0;
+       }
 
        if (thread->type == THREAD_READ)
                thread_array = m->read;
@@ -1253,9 +1272,7 @@ static int thread_process_io_helper(struct thread_master *m,
        thread_array[thread->u.fd] = NULL;
        thread_list_add_tail(&m->ready, thread);
        thread->type = THREAD_READY;
-       /* if another pthread scheduled this file descriptor for the event we're
-        * responding to, no problem; we're getting to it now */
-       thread->master->handler.pfds[pos].events &= ~(state);
+
        return 1;
 }
 
@@ -1291,12 +1308,13 @@ static void thread_process_io(struct thread_master *m, unsigned int num)
                 * there's no need to update it. Similarily, barring deletion,
                 * the fd
                 * should still be a valid index into the master's pfds. */
-               if (pfds[i].revents & (POLLIN | POLLHUP))
+               if (pfds[i].revents & (POLLIN | POLLHUP)) {
                        thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
-                                                i);
+                                                pfds[i].revents, i);
+               }
                if (pfds[i].revents & POLLOUT)
                        thread_process_io_helper(m, m->write[pfds[i].fd],
-                                                POLLOUT, i);
+                                                POLLOUT, pfds[i].revents, i);
 
                /* if one of our file descriptors is garbage, remove the same
                 * from