From 6122c87b377d1da14ee31b2136fc718fbb9ae434 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 7 Mar 2025 18:35:53 -0500 Subject: [PATCH] tests: Allow mgmtd and zebra to fully come up before other daemons Currently the topotest infrastructure is starting up daemons in mgmtd,zebra, staticd then everything else. The problem that is happening, under heavy load, is that zebra may not be fully started and when a daemon attempts to connect to it, it will not be able to connect. Some of the daemons do not have great retry mechanisms at all. In addition our normal systemctl startup scripts actually wait a small amount of time for zebra to be ready before moving onto the other daemons. Let's make topotests startup a tiny bit more nuanced and have mgmtd fully up before starting up zebra. Signed-off-by: Donald Sharp (cherry picked from commit dd609bc069857a38ff3577b76d0b9ef708b8d2aa) --- tests/topotests/lib/topotest.py | 34 ++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/tests/topotests/lib/topotest.py b/tests/topotests/lib/topotest.py index e2c70cdccd..301f245b17 100644 --- a/tests/topotests/lib/topotest.py +++ b/tests/topotests/lib/topotest.py @@ -2244,17 +2244,39 @@ class Router(Node): else: logger.debug("%s: %s %s started", self, self.routertype, daemon) + # Check if the daemons are running + def _check_daemons_running(check_daemon_files): + wait_time = 30 if (gdb_routers or gdb_daemons) else 10 + timeout = Timeout(wait_time) + for remaining in timeout: + if not check_daemon_files: + break + check = check_daemon_files[0] + if self.path_exists(check): + check_daemon_files.pop(0) + continue + self.logger.debug( + "Waiting {}s for {} to appear".format(remaining, check) + ) + time.sleep(0.5) + # Start mgmtd first if "mgmtd" in daemons_list: start_daemon("mgmtd") while "mgmtd" in daemons_list: daemons_list.remove("mgmtd") + # Wait till mgmtd is up and running to some + # very small extent before moving on + _check_daemons_running(check_daemon_files) # Start Zebra after mgmtd if "zebra" in daemons_list: start_daemon("zebra") while "zebra" in daemons_list: daemons_list.remove("zebra") + # Wait till zebra is up and running to some + # very small extent before moving on + _check_daemons_running(check_daemon_files) # Start staticd next if required if "staticd" in daemons_list: @@ -2290,17 +2312,7 @@ class Router(Node): start_daemon(daemon) # Check if daemons are running. - wait_time = 30 if (gdb_routers or gdb_daemons) else 10 - timeout = Timeout(wait_time) - for remaining in timeout: - if not check_daemon_files: - break - check = check_daemon_files[0] - if self.path_exists(check): - check_daemon_files.pop(0) - continue - self.logger.debug("Waiting {}s for {} to appear".format(remaining, check)) - time.sleep(0.5) + _check_daemons_running(check_daemon_files) if check_daemon_files: assert False, "Timeout({}) waiting for {} to appear on {}".format( -- 2.39.5