diff options
| author | Donatas Abraitis <donatas@opensourcerouting.org> | 2024-09-06 09:28:24 +0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-06 09:28:24 +0300 |
| commit | ebfcbbcc898359ec8e79bdbae9d75a99d785a076 (patch) | |
| tree | 44e7dd06cec3556a3ee6cdfbaa8551cd533bc0a9 | |
| parent | 340d51fc3a9c88b4db38c685cf87ab9970db060a (diff) | |
| parent | 98b11de9f60c16e61a581b03a97294563eb9f673 (diff) | |
Merge pull request #16220 from donaldsharp/zebra_fpm_backpressure
Zebra fpm backpressure
| -rw-r--r-- | zebra/dplane_fpm_nl.c | 19 | ||||
| -rw-r--r-- | zebra/rib.h | 1 | ||||
| -rw-r--r-- | zebra/zebra_dplane.c | 165 | ||||
| -rw-r--r-- | zebra/zebra_rib.c | 11 |
4 files changed, 146 insertions, 50 deletions
diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index eb968bcd37..1d2f9e695f 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -1678,6 +1678,25 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) fnc = dplane_provider_get_data(prov); limit = dplane_provider_get_work_limit(prov); + + frr_with_mutex (&fnc->ctxqueue_mutex) { + cur_queue = dplane_ctx_queue_count(&fnc->ctxqueue); + } + + if (cur_queue >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: Already at a limit(%" PRIu64 + ") of internal work, hold off", + __func__, cur_queue); + limit = 0; + } else if (cur_queue != 0) { + if (IS_ZEBRA_DEBUG_FPM) + zlog_debug("%s: current queue is %" PRIu64 + ", limiting to lesser amount of %" PRIu64, + __func__, cur_queue, limit - cur_queue); + limit -= cur_queue; + } + for (counter = 0; counter < limit; counter++) { ctx = dplane_provider_dequeue_in_ctx(prov); if (ctx == NULL) diff --git a/zebra/rib.h b/zebra/rib.h index cd6efbfb36..3095a9d67d 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -631,6 +631,7 @@ extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto, uint8_t instance, time_t restart_time); extern void zebra_vty_init(void); +extern uint32_t zebra_rib_dplane_results_count(void); extern pid_t pid; diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 0851666510..75147e7136 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -483,10 +483,8 @@ struct zebra_dplane_provider { int (*dp_fini)(struct zebra_dplane_provider *prov, bool early_p); _Atomic uint32_t dp_in_counter; - _Atomic uint32_t dp_in_queued; _Atomic uint32_t dp_in_max; _Atomic uint32_t dp_out_counter; - _Atomic uint32_t dp_out_queued; _Atomic uint32_t dp_out_max; _Atomic uint32_t dp_error_counter; @@ -6129,35 +6127,45 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed) struct zebra_dplane_provider *prov; uint64_t in, in_q, in_max, out, out_q, out_max; - vty_out(vty, "Zebra dataplane providers:\n"); - DPLANE_LOCK(); prov = dplane_prov_list_first(&zdplane_info.dg_providers); + in = dplane_ctx_queue_count(&zdplane_info.dg_update_list); DPLANE_UNLOCK(); + vty_out(vty, "dataplane Incoming Queue from Zebra: %" PRIu64 "\n", in); + vty_out(vty, "Zebra dataplane providers:\n"); + /* Show counters, useful info from each registered provider */ while (prov) { + dplane_provider_lock(prov); + in_q = dplane_ctx_queue_count(&prov->dp_ctx_in_list); + out_q = dplane_ctx_queue_count(&prov->dp_ctx_out_list); + dplane_provider_unlock(prov); in = atomic_load_explicit(&prov->dp_in_counter, memory_order_relaxed); - in_q = atomic_load_explicit(&prov->dp_in_queued, - memory_order_relaxed); + in_max = atomic_load_explicit(&prov->dp_in_max, memory_order_relaxed); out = atomic_load_explicit(&prov->dp_out_counter, memory_order_relaxed); - out_q = atomic_load_explicit(&prov->dp_out_queued, - memory_order_relaxed); + out_max = atomic_load_explicit(&prov->dp_out_max, memory_order_relaxed); - vty_out(vty, "%s (%u): in: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64", out: %"PRIu64", q: %"PRIu64", q_max: %"PRIu64"\n", - prov->dp_name, prov->dp_id, in, in_q, in_max, - out, out_q, out_max); + vty_out(vty, + " %s (%u): in: %" PRIu64 ", q: %" PRIu64 + ", q_max: %" PRIu64 ", out: %" PRIu64 ", q: %" PRIu64 + ", q_max: %" PRIu64 "\n", + prov->dp_name, prov->dp_id, in, in_q, in_max, out, + out_q, out_max); prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov); } + out = zebra_rib_dplane_results_count(); + vty_out(vty, "dataplane Outgoing Queue to Zebra: %" PRIu64 "\n", out); + return CMD_SUCCESS; } @@ -6299,10 +6307,6 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( dplane_provider_lock(prov); ctx = dplane_ctx_list_pop(&(prov->dp_ctx_in_list)); - if (ctx) { - atomic_fetch_sub_explicit(&prov->dp_in_queued, 1, - memory_order_relaxed); - } dplane_provider_unlock(prov); @@ -6330,10 +6334,6 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, break; } - if (ret > 0) - atomic_fetch_sub_explicit(&prov->dp_in_queued, ret, - memory_order_relaxed); - dplane_provider_unlock(prov); return ret; @@ -6358,10 +6358,7 @@ void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, dplane_ctx_list_add_tail(&(prov->dp_ctx_out_list), ctx); /* Maintain out-queue counters */ - atomic_fetch_add_explicit(&(prov->dp_out_queued), 1, - memory_order_relaxed); - curr = atomic_load_explicit(&prov->dp_out_queued, - memory_order_relaxed); + curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list); high = atomic_load_explicit(&prov->dp_out_max, memory_order_relaxed); if (curr > high) @@ -6383,9 +6380,6 @@ dplane_provider_dequeue_out_ctx(struct zebra_dplane_provider *prov) if (!ctx) return NULL; - atomic_fetch_sub_explicit(&(prov->dp_out_queued), 1, - memory_order_relaxed); - return ctx; } @@ -7331,10 +7325,10 @@ static void dplane_thread_loop(struct event *event) { struct dplane_ctx_list_head work_list; struct dplane_ctx_list_head error_list; - struct zebra_dplane_provider *prov; + struct zebra_dplane_provider *prov, *next_prov; struct zebra_dplane_ctx *ctx; int limit, counter, error_counter; - uint64_t curr, high; + uint64_t curr, out_curr, high; bool reschedule = false; /* Capture work limit per cycle */ @@ -7358,18 +7352,48 @@ static void dplane_thread_loop(struct event *event) /* Locate initial registered provider */ prov = dplane_prov_list_first(&zdplane_info.dg_providers); - /* Move new work from incoming list to temp list */ - for (counter = 0; counter < limit; counter++) { - ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list); - if (ctx) { - ctx->zd_provider = prov->dp_id; + curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list); + out_curr = dplane_ctx_queue_count(&prov->dp_ctx_out_list); - dplane_ctx_list_add_tail(&work_list, ctx); - } else { - break; + if (curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Current first provider(%s) Input queue is %" PRIu64 + ", holding off work", + __func__, prov->dp_name, curr); + counter = 0; + } else if (out_curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Current first provider(%s) Output queue is %" PRIu64 + ", holding off work", + __func__, prov->dp_name, out_curr); + counter = 0; + } else { + int tlimit; + /* + * Let's limit the work to how what can be put on the + * in or out queue without going over + */ + tlimit = limit - MAX(curr, out_curr); + /* Move new work from incoming list to temp list */ + for (counter = 0; counter < tlimit; counter++) { + ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list); + if (ctx) { + ctx->zd_provider = prov->dp_id; + + dplane_ctx_list_add_tail(&work_list, ctx); + } else { + break; + } } } + /* + * If there is anything still on the two input queues reschedule + */ + if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 || + dplane_ctx_queue_count(&zdplane_info.dg_update_list) > 0) + reschedule = true; + DPLANE_UNLOCK(); atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter, @@ -7388,8 +7412,9 @@ static void dplane_thread_loop(struct event *event) * items. */ if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) - zlog_debug("dplane enqueues %d new work to provider '%s'", - counter, dplane_provider_get_name(prov)); + zlog_debug("dplane enqueues %d new work to provider '%s' curr is %" PRIu64, + counter, dplane_provider_get_name(prov), + curr); /* Capture current provider id in each context; check for * error status. @@ -7422,10 +7447,7 @@ static void dplane_thread_loop(struct event *event) atomic_fetch_add_explicit(&prov->dp_in_counter, counter, memory_order_relaxed); - atomic_fetch_add_explicit(&prov->dp_in_queued, counter, - memory_order_relaxed); - curr = atomic_load_explicit(&prov->dp_in_queued, - memory_order_relaxed); + curr = dplane_ctx_queue_count(&prov->dp_ctx_in_list); high = atomic_load_explicit(&prov->dp_in_max, memory_order_relaxed); if (curr > high) @@ -7450,18 +7472,61 @@ static void dplane_thread_loop(struct event *event) if (!zdplane_info.dg_run) break; + /* Locate next provider */ + next_prov = dplane_prov_list_next(&zdplane_info.dg_providers, + prov); + if (next_prov) { + curr = dplane_ctx_queue_count( + &next_prov->dp_ctx_in_list); + out_curr = dplane_ctx_queue_count( + &next_prov->dp_ctx_out_list); + } else + out_curr = curr = 0; + /* Dequeue completed work from the provider */ dplane_provider_lock(prov); - while (counter < limit) { - ctx = dplane_provider_dequeue_out_ctx(prov); - if (ctx) { - dplane_ctx_list_add_tail(&work_list, ctx); - counter++; - } else - break; + if (curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Next Provider(%s) Input queue is %" PRIu64 + ", holding off work", + __func__, next_prov->dp_name, curr); + counter = 0; + } else if (out_curr >= (uint64_t)limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("%s: Next Provider(%s) Output queue is %" PRIu64 + ", holding off work", + __func__, next_prov->dp_name, + out_curr); + counter = 0; + } else { + int tlimit; + + /* + * Let's limit the work to how what can be put on the + * in or out queue without going over + */ + tlimit = limit - MAX(curr, out_curr); + while (counter < tlimit) { + ctx = dplane_provider_dequeue_out_ctx(prov); + if (ctx) { + dplane_ctx_list_add_tail(&work_list, + ctx); + counter++; + } else + break; + } } + /* + * Let's check if there are still any items on the + * input or output queus of the current provider + * if so then we know we need to reschedule. + */ + if (dplane_ctx_queue_count(&prov->dp_ctx_in_list) > 0 || + dplane_ctx_queue_count(&prov->dp_ctx_out_list) > 0) + reschedule = true; + dplane_provider_unlock(prov); if (counter >= limit) @@ -7477,7 +7542,7 @@ static void dplane_thread_loop(struct event *event) } /* Locate next provider */ - prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov); + prov = next_prov; } /* diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index d53b27a387..075cc2ffb4 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -5108,6 +5108,17 @@ static int rib_dplane_results(struct dplane_ctx_list_head *ctxlist) return 0; } +uint32_t zebra_rib_dplane_results_count(void) +{ + uint32_t count; + + frr_with_mutex (&dplane_mutex) { + count = dplane_ctx_queue_count(&rib_dplane_q); + } + + return count; +} + /* * Ensure there are no empty slots in the route_info array. * Every route type in zebra should be present there. |
