summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuentin Young <qlyoung@cumulusnetworks.com>2017-11-30 14:11:12 -0500
committerQuentin Young <qlyoung@cumulusnetworks.com>2017-11-30 16:47:51 -0500
commit4961a5a2eb478865f963283cd61bcc9ff19b1cdc (patch)
treed11c5cb235870f27e34fa94bca72ad4dab83445d
parent5561f5234361a1ea928133cfb5193ded71aa1cd6 (diff)
bgpd: intelligently adjust coalesce timer
The subgroup coalesce timer controls how long updates to a particular subgroup are delayed in order to allow additional peers to join the subgroup. Presently the timer value is 200 ms. Increase it to 1 second and adjust up as peers are configured, with an upper cap at 10s. This cuts convergence time by a factor of 3 at large scale (300+ peers, 1000+ prefixes per peer). Signed-off-by: Quentin Young <qlyoung@cumulusnetworks.com>
-rw-r--r--bgpd/bgp_packet.c14
-rw-r--r--bgpd/bgp_updgrp.h22
-rw-r--r--bgpd/bgpd.c5
3 files changed, 26 insertions, 15 deletions
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index 09df0e35b6..867c88111c 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -2156,22 +2156,8 @@ int bgp_process_packet(struct thread *thread)
int mprc; // message processing return code
peer = THREAD_ARG(thread);
-/*
- * This functionality is presently disabled. Unfortunately due to the
- * way bgpd is structured, reading more than one packet per input cycle
- * severely impacts convergence time. This is because advancing the
- * state of the routing table based on prefixes learned from one peer
- * prior to all (or at least most) peers being established and placed
- * into an update-group will make UPDATE generation starve
- * bgp_accept(), delaying convergence. This is a deficiency that needs
- * to be fixed elsewhere in the codebase, but for now our hand is
- * forced.
- */
-#if 0
rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
memory_order_relaxed);
-#endif
- rpkt_quanta_old = 1;
fsm_update_result = 0;
/* Guard against scheduled events that occur after peer deletion. */
diff --git a/bgpd/bgp_updgrp.h b/bgpd/bgp_updgrp.h
index 97afe7a11a..e941fecb61 100644
--- a/bgpd/bgp_updgrp.h
+++ b/bgpd/bgp_updgrp.h
@@ -29,7 +29,27 @@
#include "bgp_advertise.h"
-#define BGP_DEFAULT_SUBGROUP_COALESCE_TIME 200
+/*
+ * The following three heuristic constants determine how long advertisement to
+ * a subgroup will be delayed after it is created. The intent is to allow
+ * transient changes in peer state (primarily session establishment) to settle,
+ * so that more peers can be grouped together and benefit from sharing
+ * advertisement computations with the subgroup.
+ *
+ * These values have a very large impact on initial convergence time; any
+ * changes should be accompanied by careful performance testing at all scales.
+ *
+ * The coalesce time 'C' for a new subgroup within a particular BGP instance
+ * 'B' with total number of known peers 'P', established or not, is computed as
+ * follows:
+ *
+ * C = MIN(BGP_MAX_SUBGROUP_COALESCE_TIME,
+ * BGP_DEFAULT_SUBGROUP_COALESCE_TIME +
+ * (P*BGP_PEER_ADJUST_SUBGROUP_COALESCE_TIME))
+ */
+#define BGP_DEFAULT_SUBGROUP_COALESCE_TIME 1000
+#define BGP_MAX_SUBGROUP_COALESCE_TIME 10000
+#define BGP_PEER_ADJUST_SUBGROUP_COALESCE_TIME 50
#define PEER_UPDGRP_FLAGS \
(PEER_FLAG_LOCAL_AS_NO_PREPEND | PEER_FLAG_LOCAL_AS_REPLACE_AS)
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 65c215a466..8aaf19c7e1 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -1474,6 +1474,11 @@ struct peer *peer_create(union sockunion *su, const char *conf_if,
listnode_add_sort(bgp->peer, peer);
hash_get(bgp->peerhash, peer, hash_alloc_intern);
+ /* Adjust update-group coalesce timer heuristics for # peers. */
+ long ct = BGP_DEFAULT_SUBGROUP_COALESCE_TIME
+ + (bgp->peer->count * BGP_PEER_ADJUST_SUBGROUP_COALESCE_TIME);
+ bgp->coalesce_time = MIN(BGP_MAX_SUBGROUP_COALESCE_TIME, ct);
+
active = peer_active(peer);
/* Last read and reset time set */