summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDonald Sharp <sharpd@nvidia.com>2025-03-07 18:35:53 -0500
committerMergify <37929162+mergify[bot]@users.noreply.github.com>2025-03-09 03:53:04 +0000
commit6122c87b377d1da14ee31b2136fc718fbb9ae434 (patch)
treed4190a20345df82f1851194aa403cd84d4a903eb
parenta40c800c2e1ade525c7a3fa5f67dedb4c853234f (diff)
tests: Allow mgmtd and zebra to fully come up before other daemons
Currently the topotest infrastructure is starting up daemons in mgmtd,zebra, staticd then everything else. The problem that is happening, under heavy load, is that zebra may not be fully started and when a daemon attempts to connect to it, it will not be able to connect. Some of the daemons do not have great retry mechanisms at all. In addition our normal systemctl startup scripts actually wait a small amount of time for zebra to be ready before moving onto the other daemons. Let's make topotests startup a tiny bit more nuanced and have mgmtd fully up before starting up zebra. Signed-off-by: Donald Sharp <sharpd@nvidia.com> (cherry picked from commit dd609bc069857a38ff3577b76d0b9ef708b8d2aa)
-rw-r--r--tests/topotests/lib/topotest.py34
1 files changed, 23 insertions, 11 deletions
diff --git a/tests/topotests/lib/topotest.py b/tests/topotests/lib/topotest.py
index e2c70cdccd..301f245b17 100644
--- a/tests/topotests/lib/topotest.py
+++ b/tests/topotests/lib/topotest.py
@@ -2244,17 +2244,39 @@ class Router(Node):
else:
logger.debug("%s: %s %s started", self, self.routertype, daemon)
+ # Check if the daemons are running
+ def _check_daemons_running(check_daemon_files):
+ wait_time = 30 if (gdb_routers or gdb_daemons) else 10
+ timeout = Timeout(wait_time)
+ for remaining in timeout:
+ if not check_daemon_files:
+ break
+ check = check_daemon_files[0]
+ if self.path_exists(check):
+ check_daemon_files.pop(0)
+ continue
+ self.logger.debug(
+ "Waiting {}s for {} to appear".format(remaining, check)
+ )
+ time.sleep(0.5)
+
# Start mgmtd first
if "mgmtd" in daemons_list:
start_daemon("mgmtd")
while "mgmtd" in daemons_list:
daemons_list.remove("mgmtd")
+ # Wait till mgmtd is up and running to some
+ # very small extent before moving on
+ _check_daemons_running(check_daemon_files)
# Start Zebra after mgmtd
if "zebra" in daemons_list:
start_daemon("zebra")
while "zebra" in daemons_list:
daemons_list.remove("zebra")
+ # Wait till zebra is up and running to some
+ # very small extent before moving on
+ _check_daemons_running(check_daemon_files)
# Start staticd next if required
if "staticd" in daemons_list:
@@ -2290,17 +2312,7 @@ class Router(Node):
start_daemon(daemon)
# Check if daemons are running.
- wait_time = 30 if (gdb_routers or gdb_daemons) else 10
- timeout = Timeout(wait_time)
- for remaining in timeout:
- if not check_daemon_files:
- break
- check = check_daemon_files[0]
- if self.path_exists(check):
- check_daemon_files.pop(0)
- continue
- self.logger.debug("Waiting {}s for {} to appear".format(remaining, check))
- time.sleep(0.5)
+ _check_daemons_running(check_daemon_files)
if check_daemon_files:
assert False, "Timeout({}) waiting for {} to appear on {}".format(