]> git.puffer.fish Git - matthieu/frr.git/commitdiff
tests: Allow mgmtd and zebra to fully come up before other daemons
authorDonald Sharp <sharpd@nvidia.com>
Fri, 7 Mar 2025 23:35:53 +0000 (18:35 -0500)
committerDonald Sharp <sharpd@nvidia.com>
Fri, 7 Mar 2025 23:43:18 +0000 (18:43 -0500)
Currently the topotest infrastructure is starting up daemons
in mgmtd,zebra, staticd then everything else.

The problem that is happening, under heavy load, is that
zebra may not be fully started and when a daemon attempts
to connect to it, it will not be able to connect.
Some of the daemons do not have great retry mechanisms at all.
In addition our normal systemctl startup scripts actually
wait a small amount of time for zebra to be ready before
moving onto the other daemons.

Let's make topotests startup a tiny bit more nuanced
and have mgmtd fully up before starting up zebra.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
tests/topotests/lib/topotest.py

index e2c70cdccd9bf0fe9c9df90405083768202ce572..301f245b17294e550b667f051ef6f0962e1ec905 100644 (file)
@@ -2244,17 +2244,39 @@ class Router(Node):
                 else:
                     logger.debug("%s: %s %s started", self, self.routertype, daemon)
 
+        # Check if the daemons are running
+        def _check_daemons_running(check_daemon_files):
+            wait_time = 30 if (gdb_routers or gdb_daemons) else 10
+            timeout = Timeout(wait_time)
+            for remaining in timeout:
+                if not check_daemon_files:
+                    break
+                check = check_daemon_files[0]
+                if self.path_exists(check):
+                    check_daemon_files.pop(0)
+                    continue
+                self.logger.debug(
+                    "Waiting {}s for {} to appear".format(remaining, check)
+                )
+                time.sleep(0.5)
+
         # Start mgmtd first
         if "mgmtd" in daemons_list:
             start_daemon("mgmtd")
             while "mgmtd" in daemons_list:
                 daemons_list.remove("mgmtd")
+            # Wait till mgmtd is up and running to some
+            # very small extent before moving on
+            _check_daemons_running(check_daemon_files)
 
         # Start Zebra after mgmtd
         if "zebra" in daemons_list:
             start_daemon("zebra")
             while "zebra" in daemons_list:
                 daemons_list.remove("zebra")
+            # Wait till zebra is up and running to some
+            # very small extent before moving on
+            _check_daemons_running(check_daemon_files)
 
         # Start staticd next if required
         if "staticd" in daemons_list:
@@ -2290,17 +2312,7 @@ class Router(Node):
                 start_daemon(daemon)
 
         # Check if daemons are running.
-        wait_time = 30 if (gdb_routers or gdb_daemons) else 10
-        timeout = Timeout(wait_time)
-        for remaining in timeout:
-            if not check_daemon_files:
-                break
-            check = check_daemon_files[0]
-            if self.path_exists(check):
-                check_daemon_files.pop(0)
-                continue
-            self.logger.debug("Waiting {}s for {} to appear".format(remaining, check))
-            time.sleep(0.5)
+        _check_daemons_running(check_daemon_files)
 
         if check_daemon_files:
             assert False, "Timeout({}) waiting for {} to appear on {}".format(