summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDonatas Abraitis <donatas@opensourcerouting.org>2024-09-06 09:28:24 +0300
committerGitHub <noreply@github.com>2024-09-06 09:28:24 +0300
commitebfcbbcc898359ec8e79bdbae9d75a99d785a076 (patch)
tree44e7dd06cec3556a3ee6cdfbaa8551cd533bc0a9
parent340d51fc3a9c88b4db38c685cf87ab9970db060a (diff)
parent98b11de9f60c16e61a581b03a97294563eb9f673 (diff)
Merge pull request #16220 from donaldsharp/zebra_fpm_backpressure
Zebra fpm backpressure
-rw-r--r--zebra/dplane_fpm_nl.c19
-rw-r--r--zebra/rib.h1
-rw-r--r--zebra/zebra_dplane.c165
-rw-r--r--zebra/zebra_rib.c11
4 files changed, 146 insertions, 50 deletions
diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c
index eb968bcd37..1d2f9e695f 100644
--- a/zebra/dplane_fpm_nl.c
+++ b/zebra/dplane_fpm_nl.c
@@ -1678,6 +1678,25 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
fnc = dplane_provider_get_data(prov);
limit = dplane_provider_get_work_limit(prov);
+
+ frr_with_mutex (&fnc->ctxqueue_mutex) {
+ cur_queue = dplane_ctx_queue_count(&fnc->ctxqueue);
+ }
+
+ if (cur_queue >= (uint64_t)limit) {
+ if (IS_ZEBRA_DEBUG_FPM)
+ zlog_debug("%s: Already at a limit(%" PRIu64
+ ") of internal work, hold off",
+ __func__, cur_queue);
+ limit = 0;
+ } else if (cur_queue != 0) {
+ if (IS_ZEBRA_DEBUG_FPM)
+ zlog_debug("%s: current queue is %" PRIu64
+ ", limiting to lesser amount of %" PRIu64,
+ __func__, cur_queue, limit - cur_queue);
+ limit -= cur_queue;
+ }
+
for (counter = 0; counter < limit; counter++) {
ctx = dplane_provider_dequeue_in_ctx(prov);
if (ctx == NULL)
diff --git a/zebra/rib.h b/zebra/rib.h
index cd6efbfb36..3095a9d67d 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -631,6 +631,7 @@ extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
uint8_t instance, time_t restart_time);
extern void zebra_vty_init(void);
+extern uint32_t zebra_rib_dplane_results_count(void);
extern pid_t pid;
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index 0851666510..75147e7136 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -483,10 +483,8 @@ struct zebra_dplane_provider {
int (*dp_fini)(struct zebra_dplane_provider *prov, bool early_p);
_Atomic uint32_t dp_in_counter;
- _Atomic uint32_t dp_in_queued;
_Atomic uint32_t dp_in_max;
_Atomic uint32_t dp_out_counter;
- _Atomic uint32_t dp_out_queued;
_Atomic uint32_t dp_out_max;
_Atomic uint32_t dp_error_counter;
@@ -6129,35 +6127,45 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed)
struct zebra_dplane_provider *prov;
uint64_t in, in_q, in_max, out, out_q, out_max;
- vty_out(vty, "Zebra dataplane providers:\n");
-
DPLANE_LOCK();
prov = dplane_prov_list_first(&zdplane_info.dg_providers);
+ in = dplane_ctx_queue_count(&zdplane_info.dg_update_list);
DPLANE_UNLOCK();
+ vty_out(vty, "dataplane Incoming Queue from Zebra: %" PRIu64 "\n", in);
+ vty_out(vty, "Zebra dataplane providers:\n");
+
/* Show counters, useful info from each registered provider */
while (prov) {
+ dplane_provider_lock(prov);
+ in_q = dplane_ctx_queue_count(&prov->dp_ctx_in_list);
+ out_q = dplane_ctx_queue_count(&prov->dp_ctx_out_list);
+ dplane_provider_unlock(prov);
in = atomic_load_explicit(&prov->dp_in_counter,
memory_order_relaxed);
- in_q = atomic_load_explicit(&prov->dp_in_queued,
- memory_order_relaxed);
+
in_max = atomic_load_explicit(&prov->dp_in_max,
memory_order_relaxed);
out = atomic_load_explicit(&prov->dp_out_counter,
memory_order_relaxed);
- out_q = atomic_load_explicit(&prov->dp_out_queued,
- memory_order_relaxed);
+
out_max = atomic_load_explicit(&prov->dp_out_max,
memory_order_relaxed);
- vty_out(vty, "%s (%u): in: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64", out: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64"\n",
- prov->dp_name, prov->dp_id, in, in_q, in_max,
- out, out_q, out_max);
+ vty_out(vty,
+ " %s (%u): in: %" PRIu64 ", q: %" PRIu64
+ ", q_max: %" PRIu64 ", out: %" PRIu64 ", q: %" PRIu64
+ ", q_max: %" PRIu64 "\n",
+ prov->dp_name, prov->dp_id, in, in_q, in_max, out,
+ out_q, out_max);
prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov);
}
+ out = zebra_rib_dplane_results_count();
+ vty_out(vty, "dataplane Outgoing Queue to Zebra: %" PRIu64 "\n", out);
+
return CMD_SUCCESS;
}
@@ -6299,10 +6307,6 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx(
dplane_provider_lock(prov);
ctx = dplane_ctx_list_pop(&(prov->dp_ctx_in_list));
- if (ctx) {
- atomic_fetch_sub_explicit(&prov->dp_in_queued, 1,
- memory_order_relaxed);
- }
dplane_provider_unlock(prov);
@@ -6330,10 +6334,6 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov,
break;
}
- if (ret > 0)
- atomic_fetch_sub_explicit(&prov->dp_in_queued, ret,
- memory_order_relaxed);
-
dplane_provider_unlock(prov);
return ret;
@@ -6358,10 +6358,7 @@ void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov,
dplane_ctx_list_add_tail(&(prov->dp_ctx_out_list), ctx);
/* Maintain out-queue counters */
- atomic_fetch_add_explicit(&(prov->dp_out_queued), 1,
- memory_order_relaxed);
- curr = atomic_load_explicit(&prov->dp_out_queued,
- memory_order_relaxed);
+ curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list);
high = atomic_load_explicit(&prov->dp_out_max,
memory_order_relaxed);
if (curr > high)
@@ -6383,9 +6380,6 @@ dplane_provider_dequeue_out_ctx(struct zebra_dplane_provider *prov)
if (!ctx)
return NULL;
- atomic_fetch_sub_explicit(&(prov->dp_out_queued), 1,
- memory_order_relaxed);
-
return ctx;
}
@@ -7331,10 +7325,10 @@ static void dplane_thread_loop(struct event *event)
{
struct dplane_ctx_list_head work_list;
struct dplane_ctx_list_head error_list;
- struct zebra_dplane_provider *prov;
+ struct zebra_dplane_provider *prov, *next_prov;
struct zebra_dplane_ctx *ctx;
int limit, counter, error_counter;
- uint64_t curr, high;
+ uint64_t curr, out_curr, high;
bool reschedule = false;
/* Capture work limit per cycle */
@@ -7358,18 +7352,48 @@ static void dplane_thread_loop(struct event *event)
/* Locate initial registered provider */
prov = dplane_prov_list_first(&zdplane_info.dg_providers);
- /* Move new work from incoming list to temp list */
- for (counter = 0; counter < limit; counter++) {
- ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list);
- if (ctx) {
- ctx->zd_provider = prov->dp_id;
+ curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list);
+ out_curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list);
- dplane_ctx_list_add_tail(&work_list, ctx);
- } else {
- break;
+ if (curr >= (uint64_t)limit) {
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("%s: Current first provider(%s) Input queue is %" PRIu64
+ ", holding off work",
+ __func__, prov->dp_name, curr);
+ counter = 0;
+ } else if (out_curr >= (uint64_t)limit) {
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("%s: Current first provider(%s) Output queue is %" PRIu64
+ ", holding off work",
+ __func__, prov->dp_name, out_curr);
+ counter = 0;
+ } else {
+ int tlimit;
+ /*
+ * Let's limit the work to how what can be put on the
+ * in or out queue without going over
+ */
+ tlimit = limit - MAX(curr, out_curr);
+ /* Move new work from incoming list to temp list */
+ for (counter = 0; counter < tlimit; counter++) {
+ ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list);
+ if (ctx) {
+ ctx->zd_provider = prov->dp_id;
+
+ dplane_ctx_list_add_tail(&work_list, ctx);
+ } else {
+ break;
+ }
}
}
+ /*
+ * If there is anything still on the two input queues reschedule
+ */
+ if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 ||
+ dplane_ctx_queue_count(&zdplane_info.dg_update_list) > 0)
+ reschedule = true;
+
DPLANE_UNLOCK();
atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter,
@@ -7388,8 +7412,9 @@ static void dplane_thread_loop(struct event *event)
* items.
*/
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
- zlog_debug("dplane enqueues %d new work to provider '%s'",
- counter, dplane_provider_get_name(prov));
+ zlog_debug("dplane enqueues %d new work to provider '%s' curr is %" PRIu64,
+ counter, dplane_provider_get_name(prov),
+ curr);
/* Capture current provider id in each context; check for
* error status.
@@ -7422,10 +7447,7 @@ static void dplane_thread_loop(struct event *event)
atomic_fetch_add_explicit(&prov->dp_in_counter, counter,
memory_order_relaxed);
- atomic_fetch_add_explicit(&prov->dp_in_queued, counter,
- memory_order_relaxed);
- curr = atomic_load_explicit(&prov->dp_in_queued,
- memory_order_relaxed);
+ curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list);
high = atomic_load_explicit(&prov->dp_in_max,
memory_order_relaxed);
if (curr > high)
@@ -7450,18 +7472,61 @@ static void dplane_thread_loop(struct event *event)
if (!zdplane_info.dg_run)
break;
+ /* Locate next provider */
+ next_prov = dplane_prov_list_next(&zdplane_info.dg_providers,
+ prov);
+ if (next_prov) {
+ curr = dplane_ctx_queue_count(
+ &next_prov->dp_ctx_in_list);
+ out_curr = dplane_ctx_queue_count(
+ &next_prov->dp_ctx_out_list);
+ } else
+ out_curr = curr = 0;
+
/* Dequeue completed work from the provider */
dplane_provider_lock(prov);
- while (counter < limit) {
- ctx = dplane_provider_dequeue_out_ctx(prov);
- if (ctx) {
- dplane_ctx_list_add_tail(&work_list, ctx);
- counter++;
- } else
- break;
+ if (curr >= (uint64_t)limit) {
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("%s: Next Provider(%s) Input queue is %" PRIu64
+ ", holding off work",
+ __func__, next_prov->dp_name, curr);
+ counter = 0;
+ } else if (out_curr >= (uint64_t)limit) {
+ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+ zlog_debug("%s: Next Provider(%s) Output queue is %" PRIu64
+ ", holding off work",
+ __func__, next_prov->dp_name,
+ out_curr);
+ counter = 0;
+ } else {
+ int tlimit;
+
+ /*
+ * Let's limit the work to how what can be put on the
+ * in or out queue without going over
+ */
+ tlimit = limit - MAX(curr, out_curr);
+ while (counter < tlimit) {
+ ctx = dplane_provider_dequeue_out_ctx(prov);
+ if (ctx) {
+ dplane_ctx_list_add_tail(&work_list,
+ ctx);
+ counter++;
+ } else
+ break;
+ }
}
+ /*
+ * Let's check if there are still any items on the
+ * input or output queus of the current provider
+ * if so then we know we need to reschedule.
+ */
+ if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 ||
+ dplane_ctx_queue_count(&prov->dp_ctx_out_list) > 0)
+ reschedule = true;
+
dplane_provider_unlock(prov);
if (counter >= limit)
@@ -7477,7 +7542,7 @@ static void dplane_thread_loop(struct event *event)
}
/* Locate next provider */
- prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov);
+ prov = next_prov;
}
/*
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index d53b27a387..075cc2ffb4 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -5108,6 +5108,17 @@ static int rib_dplane_results(struct dplane_ctx_list_head *ctxlist)
return 0;
}
+uint32_t zebra_rib_dplane_results_count(void)
+{
+ uint32_t count;
+
+ frr_with_mutex (&dplane_mutex) {
+ count = dplane_ctx_queue_count(&rib_dplane_q);
+ }
+
+ return count;
+}
+
/*
* Ensure there are no empty slots in the route_info array.
* Every route type in zebra should be present there.