]> git.puffer.fish Git - matthieu/frr.git/commitdiff
topotests: stop wasting time at exit
authorDavid Lamparter <equinox@diac24.net>
Wed, 15 Jul 2020 16:48:18 +0000 (18:48 +0200)
committerDavid Lamparter <equinox@diac24.net>
Wed, 15 Jul 2020 16:50:30 +0000 (18:50 +0200)
... and clean up zombie child processes

Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
tests/topotests/lib/topotest.py

index cb40feeeb8c81db65be321a044047c896a063080..b5fa2ea59bf59a81540ab1e81e09136fa78a54e4 100644 (file)
@@ -35,6 +35,7 @@ import tempfile
 import platform
 import difflib
 import time
+import signal
 
 from lib.topolog import logger
 from copy import deepcopy
@@ -450,6 +451,10 @@ def pid_exists(pid):
 
     if pid <= 0:
         return False
+    try:
+        os.waitpid(pid, os.WNOHANG)
+    except:
+        pass
     try:
         os.kill(pid, 0)
     except OSError as err:
@@ -1021,8 +1026,8 @@ class Router(Node):
         os.system("chmod -R go+rw /tmp/topotests")
 
     # Return count of running daemons
-    def countDaemons(self):
-        numRunning = 0
+    def listDaemons(self):
+        ret = []
         rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
         errors = ""
         if re.search(r"No such file or directory", rundaemons):
@@ -1031,12 +1036,11 @@ class Router(Node):
             for d in StringIO.StringIO(rundaemons):
                 daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
                 if daemonpid.isdigit() and pid_exists(int(daemonpid)):
-                    numRunning += 1
-        return numRunning
+                    ret.append(os.path.basename(d.rstrip().rsplit(".", 1)[0]))
+        return ret
 
     def stopRouter(self, wait=True, assertOnError=True, minErrorVersion="5.1"):
         # Stop Running FRR Daemons
-        numRunning = 0
         rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype)
         errors = ""
         if re.search(r"No such file or directory", rundaemons):
@@ -1045,24 +1049,36 @@ class Router(Node):
             for d in StringIO.StringIO(rundaemons):
                 daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
                 if daemonpid.isdigit() and pid_exists(int(daemonpid)):
+                    daemonname = os.path.basename(d.rstrip().rsplit(".", 1)[0])
                     logger.info(
                         "{}: stopping {}".format(
-                            self.name, os.path.basename(d.rstrip().rsplit(".", 1)[0])
+                            self.name, daemonname
                         )
                     )
-                    self.cmd("kill -TERM %s" % daemonpid)
-                    self.waitOutput()
-                    if pid_exists(int(daemonpid)):
-                        numRunning += 1
-
-            if wait and numRunning > 0:
-                counter = 5
-                while counter > 0 and numRunning > 0:
-                    sleep(2, "{}: waiting for daemons stopping".format(self.name))
-                    numRunning = self.countDaemons()
+                    try:
+                        os.kill(int(daemonpid), signal.SIGTERM)
+                    except OSError as err:
+                        if err.errno == errno.ESRCH:
+                            logger.error("{}: {} left a dead pidfile (pid={})".format(self.name, daemonname, daemonpid))
+                        else:
+                            logger.info("{}: {} could not kill pid {}: {}".format(self.name, daemonname, daemonpid, str(err)))
+
+            if not wait:
+                return errors
+
+            running = self.listDaemons()
+
+            if running:
+                sleep(0.1, "{}: waiting for daemons stopping: {}".format(self.name, ', '.join(running)))
+                running = self.listDaemons()
+
+                counter = 20
+                while counter > 0 and running:
+                    sleep(0.5, "{}: waiting for daemons stopping: {}".format(self.name, ', '.join(running)))
+                    running = self.listDaemons()
                     counter -= 1
 
-            if wait and numRunning > 0:
+            if running:
                 # 2nd round of kill if daemons didn't exit
                 for d in StringIO.StringIO(rundaemons):
                     daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip()
@@ -1077,13 +1093,15 @@ class Router(Node):
                         self.waitOutput()
                     self.cmd("rm -- {}".format(d.rstrip()))
 
-        if wait:
-            errors = self.checkRouterCores(reportOnce=True)
-            if self.checkRouterVersion("<", minErrorVersion):
-                # ignore errors in old versions
-                errors = ""
-            if assertOnError and len(errors) > 0:
-                assert "Errors found - details follow:" == 0, errors
+        if not wait:
+            return errors
+
+        errors = self.checkRouterCores(reportOnce=True)
+        if self.checkRouterVersion("<", minErrorVersion):
+            # ignore errors in old versions
+            errors = ""
+        if assertOnError and len(errors) > 0:
+            assert "Errors found - details follow:" == 0, errors
         return errors
 
     def removeIPs(self):