]> git.puffer.fish Git - matthieu/frr.git/commitdiff
bgpd: Modify bgp to handle packet events in a FIFO
authorDonald Sharp <sharpd@nvidia.com>
Fri, 21 Mar 2025 11:48:50 +0000 (07:48 -0400)
committerDonald Sharp <sharpd@nvidia.com>
Tue, 25 Mar 2025 13:10:46 +0000 (09:10 -0400)
Current behavor of BGP is to have a event per connection.  Given
that on startup of BGP with a high number of neighbors you end
up with 2 * # of peers events that are being processed.  Additionally
once BGP has selected the connection this still only comes down
to 512 events.  This number of events is swamping the event system
and in addition delaying any other work from being done in BGP at
all because the the 512 events are always going to take precedence
over everything else.  The other main events are the handling
of the metaQ(1 event), update group events( 1 per update group )
and the zebra batching event.  These are being swamped.

Modify the BGP code to have a FIFO of connections.  As new data
comes in to read, place the connection on the end of the FIFO.
Have the bgp_process_packet handle up to 100 packets spread
across the individual peers where each peer/connection is limited
to the original quanta.  During testing I noticed that withdrawal
events at very very large scale are taking up to 40 seconds to process
so I added a check for yielding to further limit the number of packets
being processed.

This change also allow for BGP to be interactive again on scale
setups on initial convergence.  Prior to this change any vtysh
command entered would be delayed by 10's of seconds in my setup
while BGP was doing other work.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
bgpd/bgp_fsm.c
bgpd/bgp_io.c
bgpd/bgp_io.h
bgpd/bgp_main.c
bgpd/bgp_packet.c
bgpd/bgpd.c
bgpd/bgpd.h

index 478f8c9136da9e1a8ff094b6be0cddc835e5bdbe..2c1cbee6d09b3d7cbb7d5b79a2f915ef843f547c 100644 (file)
@@ -184,7 +184,11 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
        EVENT_OFF(keeper->t_delayopen);
        EVENT_OFF(keeper->t_connect_check_r);
        EVENT_OFF(keeper->t_connect_check_w);
-       EVENT_OFF(keeper->t_process_packet);
+
+       frr_with_mutex (&bm->peer_connection_mtx) {
+               if (peer_connection_fifo_member(&bm->connection_fifo, keeper))
+                       peer_connection_fifo_del(&bm->connection_fifo, keeper);
+       }
 
        /*
         * At this point in time, it is possible that there are packets pending
@@ -305,8 +309,13 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
 
        bgp_reads_on(keeper);
        bgp_writes_on(keeper);
-       event_add_event(bm->master, bgp_process_packet, keeper, 0,
-                       &keeper->t_process_packet);
+
+       frr_with_mutex (&bm->peer_connection_mtx) {
+               if (!peer_connection_fifo_member(&bm->connection_fifo, keeper)) {
+                       peer_connection_fifo_add_tail(&bm->connection_fifo, keeper);
+               }
+       }
+       event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
 
        return (peer);
 }
index 729a8fe2992faf478ac8dc4e17fe0e0fa403c95b..dac915a73d487602c3263f8abe7b3995673e8118 100644 (file)
@@ -99,7 +99,11 @@ void bgp_reads_off(struct peer_connection *connection)
        assert(fpt->running);
 
        event_cancel_async(fpt->master, &connection->t_read, NULL);
-       EVENT_OFF(connection->t_process_packet);
+
+       frr_with_mutex (&bm->peer_connection_mtx) {
+               if (peer_connection_fifo_member(&bm->connection_fifo, connection))
+                       peer_connection_fifo_del(&bm->connection_fifo, connection);
+       }
 
        UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON);
 }
@@ -292,9 +296,13 @@ done:
 
        event_add_read(fpt->master, bgp_process_reads, connection,
                       connection->fd, &connection->t_read);
-       if (added_pkt)
-               event_add_event(bm->master, bgp_process_packet, connection, 0,
-                               &connection->t_process_packet);
+       if (added_pkt) {
+               frr_with_mutex (&bm->peer_connection_mtx) {
+                       if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
+                               peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
+               }
+               event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
+       }
 }
 
 /*
index 8d481129e592584b4f87837bbfde55c67412ba27..278980fde6060b3dd66109415237670b9d3cf382 100644 (file)
@@ -10,6 +10,7 @@
 
 #define BGP_WRITE_PACKET_MAX 64U
 #define BGP_READ_PACKET_MAX  10U
+#define BGP_PACKET_PROCESS_LIMIT 100
 
 #include "bgpd/bgpd.h"
 #include "frr_pthread.h"
index 9dbef791b0ed928cc36dee5c0b582ef225211014..1dbac2b864a2dbb1209f53869e2c55cd0c2f852a 100644 (file)
@@ -161,6 +161,14 @@ __attribute__((__noreturn__)) void sigint(void)
 
        bgp_exit(0);
 
+       /*
+        * This is being done after bgp_exit because items may be removed
+        * from the connection_fifo
+        */
+       peer_connection_fifo_fini(&bm->connection_fifo);
+       EVENT_OFF(bm->e_process_packet);
+       pthread_mutex_destroy(&bm->peer_connection_mtx);
+
        exit(0);
 }
 
index 3d33a02db08fa7b0f906acb37e74ca8fbdfc1aed..afd8f1e83ae1cda0dc326da3100ed319ea01fba0 100644 (file)
@@ -3967,6 +3967,18 @@ int bgp_capability_receive(struct peer_connection *connection,
  * would not, making event flow difficult to understand. Please think twice
  * before hacking this.
  *
+ * packet_processing is now a FIFO of connections that need to be handled
+ * This loop has a maximum run of 100(BGP_PACKET_PROCESS_LIMIT) packets,
+ * but each individual connection can only handle the quanta value as
+ * specified in bgp_vty.c.  If the connection still has work to do, place it
+ * back on the back of the queue for more work.  Do note that event_should_yield
+ * is also being called to figure out if processing should stop and work
+ * picked up after other items can run.  This was added *After* withdrawals
+ * started being processed at scale and this function was taking cpu for 40+ seconds
+ * On my machine we are getting 2-3 packets before a yield should happen in the
+ * update case.  Withdrawal is 1 packet being processed(note this is a very very
+ * fast computer) before other items should be run.
+ *
  * Thread type: EVENT_EVENT
  * @param thread
  * @return 0
@@ -3979,30 +3991,54 @@ void bgp_process_packet(struct event *thread)
        uint32_t rpkt_quanta_old; // how many packets to read
        int fsm_update_result;    // return code of bgp_event_update()
        int mprc;                 // message processing return code
+       uint32_t processed = 0, curr_connection_processed = 0;
+       bool more_work = false;
+       size_t count;
+       uint32_t total_packets_to_process, total_processed = 0;
+
+       frr_with_mutex (&bm->peer_connection_mtx)
+               connection = peer_connection_fifo_pop(&bm->connection_fifo);
+
+       if (!connection)
+               goto done;
 
-       connection = EVENT_ARG(thread);
+       total_packets_to_process = BGP_PACKET_PROCESS_LIMIT;
        peer = connection->peer;
        rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
                                               memory_order_relaxed);
+
        fsm_update_result = 0;
 
-       /* Guard against scheduled events that occur after peer deletion. */
-       if (connection->status == Deleted || connection->status == Clearing)
-               return;
+       while ((processed < total_packets_to_process) && connection) {
+               total_processed++;
+               /* Guard against scheduled events that occur after peer deletion. */
+               if (connection->status == Deleted || connection->status == Clearing) {
+                       frr_with_mutex (&bm->peer_connection_mtx)
+                               connection = peer_connection_fifo_pop(&bm->connection_fifo);
 
-       unsigned int processed = 0;
+                       if (connection)
+                               peer = connection->peer;
+
+                       continue;
+               }
 
-       while (processed < rpkt_quanta_old) {
                uint8_t type = 0;
                bgp_size_t size;
                char notify_data_length[2];
 
-               frr_with_mutex (&connection->io_mtx) {
+               frr_with_mutex (&connection->io_mtx)
                        peer->curr = stream_fifo_pop(connection->ibuf);
-               }
 
-               if (peer->curr == NULL) // no packets to process, hmm...
-                       return;
+               if (peer->curr == NULL) {
+                       frr_with_mutex (&bm->peer_connection_mtx)
+                               connection = peer_connection_fifo_pop(&bm->connection_fifo);
+
+
+                       if (connection)
+                               peer = connection->peer;
+
+                       continue;
+               }
 
                /* skip the marker and copy the packet length */
                stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
@@ -4106,32 +4142,81 @@ void bgp_process_packet(struct event *thread)
                stream_free(peer->curr);
                peer->curr = NULL;
                processed++;
+               curr_connection_processed++;
 
                /* Update FSM */
                if (mprc != BGP_PACKET_NOOP)
                        fsm_update_result = bgp_event_update(connection, mprc);
-               else
-                       continue;
 
                /*
                 * If peer was deleted, do not process any more packets. This
                 * is usually due to executing BGP_Stop or a stub deletion.
                 */
-               if (fsm_update_result == FSM_PEER_TRANSFERRED
-                   || fsm_update_result == FSM_PEER_STOPPED)
-                       break;
+               if (fsm_update_result == FSM_PEER_TRANSFERRED ||
+                   fsm_update_result == FSM_PEER_STOPPED) {
+                       frr_with_mutex (&bm->peer_connection_mtx)
+                               connection = peer_connection_fifo_pop(&bm->connection_fifo);
+
+                       if (connection)
+                               peer = connection->peer;
+
+                       continue;
+               }
+
+               bool yield = event_should_yield(thread);
+               if (curr_connection_processed >= rpkt_quanta_old || yield) {
+                       curr_connection_processed = 0;
+                       frr_with_mutex (&bm->peer_connection_mtx) {
+                               if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
+                                       peer_connection_fifo_add_tail(&bm->connection_fifo,
+                                                                     connection);
+                               if (!yield)
+                                       connection = peer_connection_fifo_pop(&bm->connection_fifo);
+                               else
+                                       connection = NULL;
+                       }
+                       if (connection)
+                               peer = connection->peer;
+
+                       continue;
+               }
+
+               frr_with_mutex (&connection->io_mtx) {
+                       if (connection->ibuf->count > 0)
+                               more_work = true;
+                       else
+                               more_work = false;
+               }
+
+               if (!more_work) {
+                       frr_with_mutex (&bm->peer_connection_mtx)
+                               connection = peer_connection_fifo_pop(&bm->connection_fifo);
+
+                       if (connection)
+                               peer = connection->peer;
+               }
        }
 
-       if (fsm_update_result != FSM_PEER_TRANSFERRED
-           && fsm_update_result != FSM_PEER_STOPPED) {
+       if (connection) {
                frr_with_mutex (&connection->io_mtx) {
-                       // more work to do, come back later
                        if (connection->ibuf->count > 0)
-                               event_add_event(bm->master, bgp_process_packet,
-                                               connection, 0,
-                                               &connection->t_process_packet);
+                               more_work = true;
+                       else
+                               more_work = false;
+               }
+               frr_with_mutex (&bm->peer_connection_mtx) {
+                       if (more_work &&
+                           !peer_connection_fifo_member(&bm->connection_fifo, connection))
+                               peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
                }
        }
+
+done:
+       frr_with_mutex (&bm->peer_connection_mtx)
+               count = peer_connection_fifo_count(&bm->connection_fifo);
+
+       if (count)
+               event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
 }
 
 /* Send EOR when routes are processed by selection deferral timer */
index 77150786ced6d4162c22f0d1317dfaf3e3d39053..5ebcebd1d6f53e86d57c67c1a2abdf76e78ff34f 100644 (file)
@@ -8683,6 +8683,10 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
 
        bm = &bgp_master;
 
+       /* Initialize the peer connection FIFO list */
+       peer_connection_fifo_init(&bm->connection_fifo);
+       pthread_mutex_init(&bm->peer_connection_mtx, NULL);
+
        zebra_announce_init(&bm->zebra_announce_head);
        zebra_l2_vni_init(&bm->zebra_l2_vni_head);
        zebra_l3_vni_init(&bm->zebra_l3_vni_head);
index 4b5833151feff808983a52c74da0d3e4f371287a..bbc45994b40c99848c482a65915495dd79189466 100644 (file)
@@ -107,6 +107,9 @@ enum bgp_af_index {
 extern struct frr_pthread *bgp_pth_io;
 extern struct frr_pthread *bgp_pth_ka;
 
+/* FIFO list for peer connections */
+PREDECL_LIST(peer_connection_fifo);
+
 /* BGP master for system wide configurations and variables.  */
 struct bgp_master {
        /* BGP instance list.  */
@@ -121,6 +124,11 @@ struct bgp_master {
        /* BGP port number.  */
        uint16_t port;
 
+       /* FIFO list head for peer connections */
+       struct peer_connection_fifo_head connection_fifo;
+       struct event *e_process_packet;
+       pthread_mutex_t peer_connection_mtx;
+
        /* Listener addresses */
        struct list *addresses;
 
@@ -1378,7 +1386,6 @@ struct peer_connection {
        struct event *t_pmax_restart;
 
        struct event *t_routeadv;
-       struct event *t_process_packet;
 
        struct event *t_stop_with_notify;
 
@@ -1394,7 +1401,14 @@ struct peer_connection {
 
        union sockunion *su_local;  /* Sockunion of local address. */
        union sockunion *su_remote; /* Sockunion of remote address. */
+
+       /* For FIFO list */
+       struct peer_connection_fifo_item fifo_item;
 };
+
+/* Declare the FIFO list implementation */
+DECLARE_LIST(peer_connection_fifo, struct peer_connection, fifo_item);
+
 const char *bgp_peer_get_connection_direction(struct peer_connection *connection);
 extern struct peer_connection *bgp_peer_connection_new(struct peer *peer);
 extern void bgp_peer_connection_free(struct peer_connection **connection);