From c9f92703bc66f2a764e9475751a50c2697a3f255 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Mon, 29 Nov 2021 19:33:48 -0500 Subject: [PATCH] tests: Fix Daemon Killing to actually notice when a deamon dies Lot's of the GR topotests kill daemons in order to test code that deals with crashing daemons. Under heavy system load it was noticed that a kill command was sent and if told to wait we would sleep 2 seconds send another kill command and call it good. This was causiing issues when subsuquent json commands would get errors like `lost connection to daemon` as the daemon finally shut down after some time due to load. Modify the kill the daemon function to notice that the daemon was not actually killed and if we need to wait wait some more time for it too happen Signed-off-by: Donald Sharp --- tests/topotests/lib/topotest.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/topotests/lib/topotest.py b/tests/topotests/lib/topotest.py index 4e613ce8ac..6be644ac00 100644 --- a/tests/topotests/lib/topotest.py +++ b/tests/topotests/lib/topotest.py @@ -1859,7 +1859,7 @@ class Router(Node): self.cmd("kill -9 %s" % daemonpid) if pid_exists(int(daemonpid)): numRunning += 1 - if wait and numRunning > 0: + while wait and numRunning > 0: sleep( 2, "{}: waiting for {} daemon to be stopped".format( @@ -1883,7 +1883,11 @@ class Router(Node): ) ) self.cmd("kill -9 %s" % daemonpid) - self.cmd("rm -- {}".format(d.rstrip())) + if daemonpid.isdigit() and not pid_exists( + int(daemonpid) + ): + numRunning -= 1 + self.cmd("rm -- {}".format(d.rstrip())) if wait: errors = self.checkRouterCores(reportOnce=True) if self.checkRouterVersion("<", minErrorVersion): -- 2.39.5