From 039d547f6f1b7fe4b96cc22ac5a6ef8d18d5cf97 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Tue, 2 Feb 2021 12:56:06 -0500 Subject: lib: Differentiate between real and cpu bound processes When generating SLOW_THREAD warnings let's differentiate between a cpu bound process and a wall bound process. Effectively a slow thread can now be a process in FRR doing lots of work( cpu bound ) or wall bound ( the cpu is heavy load and a FRR process may be pre-empted and never scheduled ). Signed-off-by: Donald Sharp --- lib/lib_errors.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib/lib_errors.c') diff --git a/lib/lib_errors.c b/lib/lib_errors.c index 6e5088142a..17695e6607 100644 --- a/lib/lib_errors.c +++ b/lib/lib_errors.c @@ -45,9 +45,15 @@ static struct log_ref ferr_lib_warn[] = { .suggestion = "Gather log data and open an Issue. restart FRR", }, { - .code = EC_LIB_SLOW_THREAD, - .title = "The Event subsystem has detected a slow process", - .description = "The Event subsystem has detected a slow process, this typically indicates that FRR is having trouble completing work in a timely manner. This can be either a misconfiguration, bug, or some combination therof.", + .code = EC_LIB_SLOW_THREAD_CPU, + .title = "The Event subsystem has detected a slow cpu time process", + .description = "The Event subsystem has detected a slow process, this typically indicates that FRR is having trouble completing work in a timely manner. This can be either a misconfiguration, bug, or some combination therof. In this case total CPU time was over 5 seconds. Which indicates that FRR is very busy doing some work and should be addressed", + .suggestion = "Gather log data and open an Issue", + }, + { + .code = EC_LIB_SLOW_THREAD_WALL, + .title = "The Event subsystem has detected a slow wall time process", + .description = "The Event subsystem has detected a slow process, this typically indicates that FRR is having trouble completing work in a timely manner. This can be either a misconfiguration, bug or some combination therof. In this case total WALL time was over 5 seconds. Which indicates that FRR might be having trouble being scheduled or some system call is delaying", .suggestion = "Gather log data and open an Issue", }, { -- cgit v1.2.3