From 4961a5a2eb478865f963283cd61bcc9ff19b1cdc Mon Sep 17 00:00:00 2001 From: Quentin Young Date: Thu, 30 Nov 2017 14:11:12 -0500 Subject: [PATCH] bgpd: intelligently adjust coalesce timer The subgroup coalesce timer controls how long updates to a particular subgroup are delayed in order to allow additional peers to join the subgroup. Presently the timer value is 200 ms. Increase it to 1 second and adjust up as peers are configured, with an upper cap at 10s. This cuts convergence time by a factor of 3 at large scale (300+ peers, 1000+ prefixes per peer). Signed-off-by: Quentin Young --- bgpd/bgp_packet.c | 14 -------------- bgpd/bgp_updgrp.h | 22 +++++++++++++++++++++- bgpd/bgpd.c | 5 +++++ 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 09df0e35b6..867c88111c 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -2156,22 +2156,8 @@ int bgp_process_packet(struct thread *thread) int mprc; // message processing return code peer = THREAD_ARG(thread); -/* - * This functionality is presently disabled. Unfortunately due to the - * way bgpd is structured, reading more than one packet per input cycle - * severely impacts convergence time. This is because advancing the - * state of the routing table based on prefixes learned from one peer - * prior to all (or at least most) peers being established and placed - * into an update-group will make UPDATE generation starve - * bgp_accept(), delaying convergence. This is a deficiency that needs - * to be fixed elsewhere in the codebase, but for now our hand is - * forced. - */ -#if 0 rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta, memory_order_relaxed); -#endif - rpkt_quanta_old = 1; fsm_update_result = 0; /* Guard against scheduled events that occur after peer deletion. */ diff --git a/bgpd/bgp_updgrp.h b/bgpd/bgp_updgrp.h index 97afe7a11a..e941fecb61 100644 --- a/bgpd/bgp_updgrp.h +++ b/bgpd/bgp_updgrp.h @@ -29,7 +29,27 @@ #include "bgp_advertise.h" -#define BGP_DEFAULT_SUBGROUP_COALESCE_TIME 200 +/* + * The following three heuristic constants determine how long advertisement to + * a subgroup will be delayed after it is created. The intent is to allow + * transient changes in peer state (primarily session establishment) to settle, + * so that more peers can be grouped together and benefit from sharing + * advertisement computations with the subgroup. + * + * These values have a very large impact on initial convergence time; any + * changes should be accompanied by careful performance testing at all scales. + * + * The coalesce time 'C' for a new subgroup within a particular BGP instance + * 'B' with total number of known peers 'P', established or not, is computed as + * follows: + * + * C = MIN(BGP_MAX_SUBGROUP_COALESCE_TIME, + * BGP_DEFAULT_SUBGROUP_COALESCE_TIME + + * (P*BGP_PEER_ADJUST_SUBGROUP_COALESCE_TIME)) + */ +#define BGP_DEFAULT_SUBGROUP_COALESCE_TIME 1000 +#define BGP_MAX_SUBGROUP_COALESCE_TIME 10000 +#define BGP_PEER_ADJUST_SUBGROUP_COALESCE_TIME 50 #define PEER_UPDGRP_FLAGS \ (PEER_FLAG_LOCAL_AS_NO_PREPEND | PEER_FLAG_LOCAL_AS_REPLACE_AS) diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 65c215a466..8aaf19c7e1 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1474,6 +1474,11 @@ struct peer *peer_create(union sockunion *su, const char *conf_if, listnode_add_sort(bgp->peer, peer); hash_get(bgp->peerhash, peer, hash_alloc_intern); + /* Adjust update-group coalesce timer heuristics for # peers. */ + long ct = BGP_DEFAULT_SUBGROUP_COALESCE_TIME + + (bgp->peer->count * BGP_PEER_ADJUST_SUBGROUP_COALESCE_TIME); + bgp->coalesce_time = MIN(BGP_MAX_SUBGROUP_COALESCE_TIME, ct); + active = peer_active(peer); /* Last read and reset time set */ -- 2.39.5