]> git.puffer.fish Git - matthieu/frr.git/commitdiff
lib: make cputime checks runtime options (v2)
authorDavid Lamparter <equinox@diac24.net>
Tue, 13 Apr 2021 18:38:09 +0000 (20:38 +0200)
committerDavid Lamparter <equinox@opensourcerouting.org>
Thu, 24 Jun 2021 14:42:59 +0000 (16:42 +0200)
...really no reason to force this into a compile time decision.  The
only point is avoiding the getrusage() syscall, which can easily be a
runtime decision.

[v2: also split cputime & walltime limits]

Signed-off-by: David Lamparter <equinox@diac24.net>
configure.ac
lib/command.c
lib/subdir.am
lib/thread.c
lib/thread.h
lib/vty.c
vtysh/vtysh.c

index 5d4b26eba08262e612e478a5fffb244d42e255ee..9ba95fe98bdc47e73aad6ea7cceb1b94070114bc 100644 (file)
@@ -783,7 +783,6 @@ fi
 
 if test "$enable_datacenter" = "yes" ; then
   AC_DEFINE([HAVE_DATACENTER], [1], [Compile extensions for a DataCenter])
-  AC_MSG_WARN([The --enable-datacenter compile time option is deprecated.  Please modify the init script to pass -F datacenter to the daemons instead.])
   DFLT_NAME="datacenter"
 else
   DFLT_NAME="traditional"
@@ -2695,6 +2694,16 @@ vici socket path        : ${vici_socket}
 The above user and group must have read/write access to the state file
 directory and to the config files in the config file directory."
 
+if test -n "$enable_datacenter"; then
+  AC_MSG_WARN([The --enable-datacenter compile time option is deprecated.  Please modify the init script to pass -F datacenter to the daemons instead.])
+fi
+if test -n "$enable_time_check"; then
+  AC_MSG_WARN([The --enable-time-check compile time option is deprecated.  Please use the service cputime-stats configuration option instead.])
+fi
+if test -n "$enable_cpu_time"; then
+  AC_MSG_WARN([The --enable-cpu-time compile time option is deprecated.  Please use the service cputime-warning NNN configuration option instead.])
+fi
+
 if test "$enable_doc" != "no" -a "$frr_py_mod_sphinx" = "false"; then
   AC_MSG_WARN([sphinx is missing but required to build documentation])
 fi
index 7be54907ed02e66d03d7f9f72fab364ce0515680..e00d84a051dd524d4ebfa8696971d10fa3f4113a 100644 (file)
@@ -434,6 +434,36 @@ static int config_write_host(struct vty *vty)
                }
                log_config_write(vty);
 
+               /* print disable always, but enable only if default is flipped
+                * => prep for future removal of compile-time knob
+                */
+               if (!cputime_enabled)
+                       vty_out(vty, "no service cputime-stats\n");
+#ifdef EXCLUDE_CPU_TIME
+               else
+                       vty_out(vty, "service cputime-stats\n");
+#endif
+
+               if (!cputime_threshold)
+                       vty_out(vty, "no service cputime-warning\n");
+#if defined(CONSUMED_TIME_CHECK) && CONSUMED_TIME_CHECK != 5000000
+               else /* again, always print non-default */
+#else
+               else if (cputime_threshold != 5000000)
+#endif
+                       vty_out(vty, "service cputime-warning %lu\n",
+                               cputime_threshold);
+
+               if (!walltime_threshold)
+                       vty_out(vty, "no service walltime-warning\n");
+#if defined(CONSUMED_TIME_CHECK) && CONSUMED_TIME_CHECK != 5000000
+               else /* again, always print non-default */
+#else
+               else if (walltime_threshold != 5000000)
+#endif
+                       vty_out(vty, "service walltime-warning %lu\n",
+                               walltime_threshold);
+
                if (host.advanced)
                        vty_out(vty, "service advanced-vty\n");
 
index 4015c67ea8d6ac8d8812a475b6397ac8a9a63caa..b4ba078e4610a678060b99e12567c2db19ce7308 100644 (file)
@@ -168,6 +168,7 @@ clippy_scan += \
        lib/northbound_cli.c \
        lib/plist.c \
        lib/routemap_cli.c \
+       lib/thread.c \
        lib/vty.c \
        # end
 
index 3af89fad5aea740e24a6428937a6162c8c76e000..dd5c1fed4071dd945e6b418e9d486963b784e748 100644 (file)
@@ -90,7 +90,22 @@ static struct list *masters;
 
 static void thread_free(struct thread_master *master, struct thread *thread);
 
+#ifndef EXCLUDE_CPU_TIME
+#define EXCLUDE_CPU_TIME 0
+#endif
+#ifndef CONSUMED_TIME_CHECK
+#define CONSUMED_TIME_CHECK 0
+#endif
+
+bool cputime_enabled = !EXCLUDE_CPU_TIME;
+unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
+unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
+
 /* CLI start ---------------------------------------------------------------- */
+#ifndef VTYSH_EXTRACT_PL
+#include "lib/thread_clippy.c"
+#endif
+
 static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
 {
        int size = sizeof(a->func);
@@ -120,7 +135,6 @@ static void cpu_record_hash_free(void *a)
        XFREE(MTYPE_THREAD_STATS, hist);
 }
 
-#ifndef EXCLUDE_CPU_TIME
 static void vty_out_cpu_thread_history(struct vty *vty,
                                       struct cpu_thread_history *a)
 {
@@ -187,6 +201,14 @@ static void cpu_record_print(struct vty *vty, uint8_t filter)
        struct thread_master *m;
        struct listnode *ln;
 
+       if (!cputime_enabled)
+               vty_out(vty,
+                       "\n"
+                       "Collecting CPU time statistics is currently disabled.  Following statistics\n"
+                       "will be zero or may display data from when collection was enabled.  Use the\n"
+                       "  \"service cputime-stats\"  command to start collecting data.\n"
+                       "\nCounters and wallclock times are always maintained and should be accurate.\n");
+
        memset(&tmp, 0, sizeof(tmp));
        tmp.funcname = "TOTAL";
        tmp.types = filter;
@@ -236,7 +258,6 @@ static void cpu_record_print(struct vty *vty, uint8_t filter)
        if (tmp.total_calls > 0)
                vty_out_cpu_thread_history(vty, &tmp);
 }
-#endif
 
 static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
 {
@@ -306,7 +327,6 @@ static uint8_t parse_filter(const char *filterstr)
        return filter;
 }
 
-#ifndef EXCLUDE_CPU_TIME
 DEFUN_NOSH (show_thread_cpu,
            show_thread_cpu_cmd,
            "show thread cpu [FILTER]",
@@ -331,7 +351,61 @@ DEFUN_NOSH (show_thread_cpu,
        cpu_record_print(vty, filter);
        return CMD_SUCCESS;
 }
-#endif
+
+DEFPY (service_cputime_stats,
+       service_cputime_stats_cmd,
+       "[no] service cputime-stats",
+       NO_STR
+       "Set up miscellaneous service\n"
+       "Collect CPU usage statistics\n")
+{
+       cputime_enabled = !no;
+       return CMD_SUCCESS;
+}
+
+DEFPY (service_cputime_warning,
+       service_cputime_warning_cmd,
+       "[no] service cputime-warning (1-4294967295)",
+       NO_STR
+       "Set up miscellaneous service\n"
+       "Warn for tasks exceeding CPU usage threshold\n"
+       "Warning threshold in milliseconds\n")
+{
+       if (no)
+               cputime_threshold = 0;
+       else
+               cputime_threshold = cputime_warning * 1000;
+       return CMD_SUCCESS;
+}
+
+ALIAS (service_cputime_warning,
+       no_service_cputime_warning_cmd,
+       "no service cputime-warning",
+       NO_STR
+       "Set up miscellaneous service\n"
+       "Warn for tasks exceeding CPU usage threshold\n")
+
+DEFPY (service_walltime_warning,
+       service_walltime_warning_cmd,
+       "[no] service walltime-warning (1-4294967295)",
+       NO_STR
+       "Set up miscellaneous service\n"
+       "Warn for tasks exceeding total wallclock threshold\n"
+       "Warning threshold in milliseconds\n")
+{
+       if (no)
+               walltime_threshold = 0;
+       else
+               walltime_threshold = walltime_warning * 1000;
+       return CMD_SUCCESS;
+}
+
+ALIAS (service_walltime_warning,
+       no_service_walltime_warning_cmd,
+       "no service walltime-warning",
+       NO_STR
+       "Set up miscellaneous service\n"
+       "Warn for tasks exceeding total wallclock threshold\n")
 
 static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
 {
@@ -421,11 +495,15 @@ DEFUN (clear_thread_cpu,
 
 void thread_cmd_init(void)
 {
-#ifndef EXCLUDE_CPU_TIME
        install_element(VIEW_NODE, &show_thread_cpu_cmd);
-#endif
        install_element(VIEW_NODE, &show_thread_poll_cmd);
        install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
+
+       install_element(CONFIG_NODE, &service_cputime_stats_cmd);
+       install_element(CONFIG_NODE, &service_cputime_warning_cmd);
+       install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
+       install_element(CONFIG_NODE, &service_walltime_warning_cmd);
+       install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
 }
 /* CLI end ------------------------------------------------------------------ */
 
@@ -1784,9 +1862,8 @@ void thread_getrusage(RUSAGE_T *r)
 #define FRR_RUSAGE RUSAGE_SELF
 #endif
        monotime(&r->real);
-#ifndef EXCLUDE_CPU_TIME
-       getrusage(FRR_RUSAGE, &(r->cpu));
-#endif
+       if (cputime_enabled)
+               getrusage(FRR_RUSAGE, &(r->cpu));
 }
 
 /*
@@ -1802,13 +1879,14 @@ void thread_getrusage(RUSAGE_T *r)
  */
 void thread_call(struct thread *thread)
 {
-#ifndef EXCLUDE_CPU_TIME
-       _Atomic unsigned long realtime, cputime;
-       unsigned long exp;
-       unsigned long helper;
-#endif
        RUSAGE_T before, after;
 
+       /* if the thread being called is the CLI, it may change cputime_enabled
+        * ("service cputime-stats" command), which can result in nonsensical
+        * and very confusing warnings
+        */
+       bool cputime_enabled_here = cputime_enabled;
+
        if (thread->master->ready_run_loop)
                before = thread->master->last_getrusage;
        else
@@ -1828,43 +1906,45 @@ void thread_call(struct thread *thread)
        GETRUSAGE(&after);
        thread->master->last_getrusage = after;
 
-#ifndef EXCLUDE_CPU_TIME
-       realtime = thread_consumed_time(&after, &before, &helper);
-       cputime = helper;
+       unsigned long walltime, cputime;
+       unsigned long exp;
 
-       /* update realtime */
-       atomic_fetch_add_explicit(&thread->hist->real.total, realtime,
+       walltime = thread_consumed_time(&after, &before, &cputime);
+
+       /* update walltime */
+       atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
                                  memory_order_seq_cst);
        exp = atomic_load_explicit(&thread->hist->real.max,
                                   memory_order_seq_cst);
-       while (exp < realtime
+       while (exp < walltime
               && !atomic_compare_exchange_weak_explicit(
-                         &thread->hist->real.max, &exp, realtime,
-                         memory_order_seq_cst, memory_order_seq_cst))
+                      &thread->hist->real.max, &exp, walltime,
+                      memory_order_seq_cst, memory_order_seq_cst))
                ;
 
-       /* update cputime */
-       atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
-                                 memory_order_seq_cst);
-       exp = atomic_load_explicit(&thread->hist->cpu.max,
-                                  memory_order_seq_cst);
-       while (exp < cputime
-              && !atomic_compare_exchange_weak_explicit(
-                         &thread->hist->cpu.max, &exp, cputime,
-                         memory_order_seq_cst, memory_order_seq_cst))
-               ;
+       if (cputime_enabled_here && cputime_enabled) {
+               /* update cputime */
+               atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
+                                         memory_order_seq_cst);
+               exp = atomic_load_explicit(&thread->hist->cpu.max,
+                                          memory_order_seq_cst);
+               while (exp < cputime
+                      && !atomic_compare_exchange_weak_explicit(
+                              &thread->hist->cpu.max, &exp, cputime,
+                              memory_order_seq_cst, memory_order_seq_cst))
+                       ;
+       }
 
        atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
                                  memory_order_seq_cst);
        atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
                                 memory_order_seq_cst);
 
-#ifdef CONSUMED_TIME_CHECK
-       if (cputime > CONSUMED_TIME_CHECK) {
+       if (cputime_enabled_here && cputime_enabled && cputime_threshold
+           && cputime > cputime_threshold) {
                /*
-                * We have a CPU Hog on our hands.  The time FRR
-                * has spent doing actual work ( not sleeping )
-                * is greater than 5 seconds.
+                * We have a CPU Hog on our hands.  The time FRR has spent
+                * doing actual work (not sleeping) is greater than 5 seconds.
                 * Whinge about it now, so we're aware this is yet another task
                 * to fix.
                 */
@@ -1874,13 +1954,13 @@ void thread_call(struct thread *thread)
                        EC_LIB_SLOW_THREAD_CPU,
                        "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
                        thread->xref->funcname, (unsigned long)thread->func,
-                       realtime / 1000, cputime / 1000);
-       } else if (realtime > CONSUMED_TIME_CHECK) {
+                       walltime / 1000, cputime / 1000);
+
+       } else if (walltime_threshold && walltime > walltime_threshold) {
                /*
-                * The runtime for a task is greater than 5 seconds, but
-                * the cpu time is under 5 seconds.  Let's whine
-                * about this because this could imply some sort of
-                * scheduling issue.
+                * The runtime for a task is greater than 5 seconds, but the
+                * cpu time is under 5 seconds.  Let's whine about this because
+                * this could imply some sort of scheduling issue.
                 */
                atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
                                          1, memory_order_seq_cst);
@@ -1888,10 +1968,8 @@ void thread_call(struct thread *thread)
                        EC_LIB_SLOW_THREAD_WALL,
                        "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
                        thread->xref->funcname, (unsigned long)thread->func,
-                       realtime / 1000, cputime / 1000);
+                       walltime / 1000, cputime / 1000);
        }
-#endif /* CONSUMED_TIME_CHECK */
-#endif /* Exclude CPU Time */
 }
 
 /* Execute thread */
index fee728dbf9416478cdf857620d2edc480b38fb8f..737ed005c5a95f791bc2f43db5350c5be05b4d4d 100644 (file)
 extern "C" {
 #endif
 
+extern bool cputime_enabled;
+extern unsigned long cputime_threshold;
+/* capturing wallclock time is always enabled since it is fast (reading
+ * hardware TSC w/o syscalls)
+ */
+extern unsigned long walltime_threshold;
+
 struct rusage_t {
        struct rusage cpu;
        struct timeval real;
index 50d116c564b2efe382322940707b6fe249117560..f64ab83847af3695ce9121b86492fc4d2198000c 100644 (file)
--- a/lib/vty.c
+++ b/lib/vty.c
@@ -502,37 +502,37 @@ static int vty_command(struct vty *vty, char *buf)
                zlog_notice("%s%s", prompt_str, buf);
        }
 
-#ifdef CONSUMED_TIME_CHECK
-       {
-               RUSAGE_T before;
-               RUSAGE_T after;
-               unsigned long realtime, cputime;
+       RUSAGE_T before;
+       RUSAGE_T after;
+       unsigned long walltime, cputime;
 
-               GETRUSAGE(&before);
-#endif /* CONSUMED_TIME_CHECK */
+       /* cmd_execute() may change cputime_enabled if we're executing the
+        * "service cputime-stats" command, which can result in nonsensical
+        * and very confusing warnings
+        */
+       bool cputime_enabled_here = cputime_enabled;
 
-               ret = cmd_execute(vty, buf, NULL, 0);
+       GETRUSAGE(&before);
 
-               /* Get the name of the protocol if any */
-               protocolname = frr_protoname;
+       ret = cmd_execute(vty, buf, NULL, 0);
 
-#ifdef CONSUMED_TIME_CHECK
-               GETRUSAGE(&after);
-               realtime = thread_consumed_time(&after, &before, &cputime);
-               if (cputime > CONSUMED_TIME_CHECK) {
-                       /* Warn about CPU hog that must be fixed. */
-                       flog_warn(
-                               EC_LIB_SLOW_THREAD_CPU,
-                               "CPU HOG: command took %lums (cpu time %lums): %s",
-                               realtime / 1000, cputime / 1000, buf);
-               } else if (realtime > CONSUMED_TIME_CHECK) {
-                       flog_warn(
-                               EC_LIB_SLOW_THREAD_WALL,
-                               "STARVATION: command took %lums (cpu time %lums): %s",
-                               realtime / 1000, cputime / 1000, buf);
-               }
-       }
-#endif /* CONSUMED_TIME_CHECK */
+       GETRUSAGE(&after);
+
+       walltime = thread_consumed_time(&after, &before, &cputime);
+
+       if (cputime_enabled_here && cputime_enabled && cputime_threshold
+           && cputime > cputime_threshold)
+               /* Warn about CPU hog that must be fixed. */
+               flog_warn(EC_LIB_SLOW_THREAD_CPU,
+                         "CPU HOG: command took %lums (cpu time %lums): %s",
+                         walltime / 1000, cputime / 1000, buf);
+       else if (walltime_threshold && walltime > walltime_threshold)
+               flog_warn(EC_LIB_SLOW_THREAD_WALL,
+                         "STARVATION: command took %lums (cpu time %lums): %s",
+                         walltime / 1000, cputime / 1000, buf);
+
+       /* Get the name of the protocol if any */
+       protocolname = frr_protoname;
 
        if (ret != CMD_SUCCESS)
                switch (ret) {
index 41b7bc8a10d5674f3f354ca31176267babd76d82..507c6ce8821e09c4283b280897fa191c3a048de4 100644 (file)
@@ -2851,7 +2851,6 @@ DEFUN (vtysh_show_poll,
        return show_per_daemon(vty, argv, argc, "Thread statistics for %s:\n");
 }
 
-#ifndef EXCLUDE_CPU_TIME
 DEFUN (vtysh_show_thread,
        vtysh_show_thread_cmd,
        "show thread cpu [FILTER]",
@@ -2862,7 +2861,6 @@ DEFUN (vtysh_show_thread,
 {
        return show_per_daemon(vty, argv, argc, "Thread statistics for %s:\n");
 }
-#endif
 
 DEFUN (vtysh_show_work_queues,
        vtysh_show_work_queues_cmd,
@@ -4567,9 +4565,7 @@ void vtysh_init_vty(void)
        install_element(VIEW_NODE, &vtysh_show_modules_cmd);
        install_element(VIEW_NODE, &vtysh_show_work_queues_cmd);
        install_element(VIEW_NODE, &vtysh_show_work_queues_daemon_cmd);
-#ifndef EXCLUDE_CPU_TIME
        install_element(VIEW_NODE, &vtysh_show_thread_cmd);
-#endif
        install_element(VIEW_NODE, &vtysh_show_poll_cmd);
 
        /* Logging */