diff options
Diffstat (limited to 'tests/topotests/lib/topotest.py')
| -rw-r--r-- | tests/topotests/lib/topotest.py | 268 |
1 files changed, 158 insertions, 110 deletions
diff --git a/tests/topotests/lib/topotest.py b/tests/topotests/lib/topotest.py index d35b908e12..967f09ecbd 100644 --- a/tests/topotests/lib/topotest.py +++ b/tests/topotests/lib/topotest.py @@ -9,13 +9,13 @@ # Network Device Education Foundation, Inc. ("NetDEF") # +import configparser import difflib import errno import functools import glob import json import os -import pdb import platform import re import resource @@ -24,22 +24,17 @@ import subprocess import sys import tempfile import time +from collections.abc import Mapping from copy import deepcopy import lib.topolog as topolog +from lib.micronet_compat import Node from lib.topolog import logger - -if sys.version_info[0] > 2: - import configparser - from collections.abc import Mapping -else: - import ConfigParser as configparser - from collections import Mapping +from munet.base import Timeout from lib import micronet -from lib.micronet_compat import Node -g_extra_config = {} +g_pytest_config = None def get_logs_path(rundir): @@ -352,7 +347,7 @@ def run_and_expect(func, what, count=20, wait=3): count, wait ) - logger.info( + logger.debug( "'{}' polling started (interval {} secs, maximum {} tries)".format( func_name, wait, count ) @@ -366,7 +361,7 @@ def run_and_expect(func, what, count=20, wait=3): continue end_time = time.time() - logger.info( + logger.debug( "'{}' succeeded after {:.2f} seconds".format( func_name, end_time - start_time ) @@ -409,7 +404,7 @@ def run_and_expect_type(func, etype, count=20, wait=3, avalue=None): count, wait ) - logger.info( + logger.debug( "'{}' polling started (interval {} secs, maximum wait {} secs)".format( func_name, wait, int(wait * count) ) @@ -432,7 +427,7 @@ def run_and_expect_type(func, etype, count=20, wait=3, avalue=None): continue end_time = time.time() - logger.info( + logger.debug( "'{}' succeeded after {:.2f} seconds".format( func_name, end_time - start_time ) @@ -474,32 +469,6 @@ def int2dpid(dpid): ) -def pid_exists(pid): - "Check whether pid exists in the current process table." - - if pid <= 0: - return False - try: - os.waitpid(pid, os.WNOHANG) - except: - pass - try: - os.kill(pid, 0) - except OSError as err: - if err.errno == errno.ESRCH: - # ESRCH == No such process - return False - elif err.errno == errno.EPERM: - # EPERM clearly means there's a process to deny access to - return True - else: - # According to "man 2 kill" possible error values are - # (EINVAL, EPERM, ESRCH) - raise - else: - return True - - def get_textdiff(text1, text2, title1="", title2="", **opts): "Returns empty string if same or formatted diff" @@ -1086,7 +1055,7 @@ def checkAddressSanitizerError(output, router, component, logdir=""): # No Address Sanitizer Error in Output. Now check for AddressSanitizer daemon file if logdir: - filepattern = logdir + "/" + router + "/" + component + ".asan.*" + filepattern = logdir + "/" + router + ".asan." + component + ".*" logger.debug( "Log check for %s on %s, pattern %s\n" % (component, router, filepattern) ) @@ -1130,7 +1099,7 @@ def _sysctl_atleast(commander, variable, min_value): valstr = " ".join([str(x) for x in min_value]) else: valstr = str(min_value) - logger.info("Increasing sysctl %s from %s to %s", variable, cur_val, valstr) + logger.debug("Increasing sysctl %s from %s to %s", variable, cur_val, valstr) commander.cmd_raises('sysctl -w {}="{}"\n'.format(variable, valstr)) @@ -1161,7 +1130,7 @@ def _sysctl_assure(commander, variable, value): valstr = " ".join([str(x) for x in value]) else: valstr = str(value) - logger.info("Changing sysctl %s from %s to %s", variable, cur_val, valstr) + logger.debug("Changing sysctl %s from %s to %s", variable, cur_val, valstr) commander.cmd_raises('sysctl -w {}="{}"\n'.format(variable, valstr)) @@ -1204,7 +1173,7 @@ def rlimit_atleast(rname, min_value, raises=False): soft, hard = cval if soft < min_value: nval = (min_value, hard if min_value < hard else min_value) - logger.info("Increasing rlimit %s from %s to %s", rname, cval, nval) + logger.debug("Increasing rlimit %s from %s to %s", rname, cval, nval) resource.setrlimit(rname, nval) except subprocess.CalledProcessError as error: logger.warning( @@ -1215,10 +1184,9 @@ def rlimit_atleast(rname, min_value, raises=False): def fix_netns_limits(ns): - # Maximum read and write socket buffer sizes - sysctl_atleast(ns, "net.ipv4.tcp_rmem", [10 * 1024, 87380, 16 * 2 ** 20]) - sysctl_atleast(ns, "net.ipv4.tcp_wmem", [10 * 1024, 87380, 16 * 2 ** 20]) + sysctl_atleast(ns, "net.ipv4.tcp_rmem", [10 * 1024, 87380, 16 * 2**20]) + sysctl_atleast(ns, "net.ipv4.tcp_wmem", [10 * 1024, 87380, 16 * 2**20]) sysctl_assure(ns, "net.ipv4.conf.all.rp_filter", 0) sysctl_assure(ns, "net.ipv4.conf.default.rp_filter", 0) @@ -1277,8 +1245,8 @@ def fix_host_limits(): sysctl_atleast(None, "net.core.netdev_max_backlog", 4 * 1024) # Maximum read and write socket buffer sizes - sysctl_atleast(None, "net.core.rmem_max", 16 * 2 ** 20) - sysctl_atleast(None, "net.core.wmem_max", 16 * 2 ** 20) + sysctl_atleast(None, "net.core.rmem_max", 16 * 2**20) + sysctl_atleast(None, "net.core.wmem_max", 16 * 2**20) # Garbage Collection Settings for ARP and Neighbors sysctl_atleast(None, "net.ipv4.neigh.default.gc_thresh2", 4 * 1024) @@ -1303,7 +1271,8 @@ def fix_host_limits(): def setup_node_tmpdir(logdir, name): # Cleanup old log, valgrind, and core files. subprocess.check_call( - "rm -rf {0}/{1}.valgrind.* {1}.*.asan {0}/{1}/".format(logdir, name), shell=True + "rm -rf {0}/{1}.valgrind.* {0}/{1}.asan.* {0}/{1}/".format(logdir, name), + shell=True, ) # Setup the per node directory. @@ -1318,8 +1287,7 @@ def setup_node_tmpdir(logdir, name): class Router(Node): "A Node with IPv4/IPv6 forwarding enabled" - def __init__(self, name, **params): - + def __init__(self, name, *posargs, **params): # Backward compatibility: # Load configuration defaults like topogen. self.config_defaults = configparser.ConfigParser( @@ -1335,11 +1303,13 @@ class Router(Node): os.path.join(os.path.dirname(os.path.realpath(__file__)), "../pytest.ini") ) + self.perf_daemons = {} + # If this topology is using old API and doesn't have logdir # specified, then attempt to generate an unique logdir. self.logdir = params.get("logdir") if self.logdir is None: - self.logdir = get_logs_path(g_extra_config["rundir"]) + self.logdir = get_logs_path(g_pytest_config.getoption("--rundir")) if not params.get("logger"): # If logger is present topogen has already set this up @@ -1347,7 +1317,7 @@ class Router(Node): l = topolog.get_logger(name, log_level="debug", target=logfile) params["logger"] = l - super(Router, self).__init__(name, **params) + super(Router, self).__init__(name, *posargs, **params) self.daemondir = None self.hasmpls = False @@ -1407,8 +1377,8 @@ class Router(Node): # pylint: disable=W0221 # Some params are only meaningful for the parent class. - def config(self, **params): - super(Router, self).config(**params) + def config_host(self, **params): + super(Router, self).config_host(**params) # User did not specify the daemons directory, try to autodetect it. self.daemondir = params.get("daemondir") @@ -1478,11 +1448,11 @@ class Router(Node): logger.info("%s: stopping %s", self.name, ", ".join([x[0] for x in running])) for name, pid in running: - logger.info("{}: sending SIGTERM to {}".format(self.name, name)) + logger.debug("{}: sending SIGTERM to {}".format(self.name, name)) try: os.kill(pid, signal.SIGTERM) except OSError as err: - logger.info( + logger.debug( "%s: could not kill %s (%s): %s", self.name, name, pid, str(err) ) @@ -1526,10 +1496,13 @@ class Router(Node): def removeIPs(self): for interface in self.intfNames(): try: - self.intf_ip_cmd(interface, "ip address flush " + interface) + self.intf_ip_cmd(interface, "ip -4 address flush " + interface) + self.intf_ip_cmd( + interface, "ip -6 address flush " + interface + " scope global" + ) except Exception as ex: logger.error("%s can't remove IPs %s", self, str(ex)) - # pdb.set_trace() + # breakpoint() # assert False, "can't remove IPs %s" % str(ex) def checkCapability(self, daemon, param): @@ -1560,7 +1533,7 @@ class Router(Node): router_relative = os.path.join(script_dir, self.name, tail) if self.path_exists(router_relative): source = router_relative - self.logger.info( + self.logger.debug( "using router relative configuration: {}".format(source) ) @@ -1595,10 +1568,7 @@ class Router(Node): if (daemon == "zebra") and (self.daemons["mgmtd"] == 0): # Add mgmtd with zebra - if it exists - try: - mgmtd_path = os.path.join(self.daemondir, "mgmtd") - except: - pdb.set_trace() + mgmtd_path = os.path.join(self.daemondir, "mgmtd") if os.path.isfile(mgmtd_path): self.daemons["mgmtd"] = 1 self.daemons_options["mgmtd"] = "" @@ -1606,18 +1576,14 @@ class Router(Node): if (daemon == "zebra") and (self.daemons["staticd"] == 0): # Add staticd with zebra - if it exists - try: - staticd_path = os.path.join(self.daemondir, "staticd") - except: - pdb.set_trace() - + staticd_path = os.path.join(self.daemondir, "staticd") if os.path.isfile(staticd_path): self.daemons["staticd"] = 1 self.daemons_options["staticd"] = "" # Auto-Started staticd has no config, so it will read from zebra config else: - logger.info("No daemon {} known".format(daemon)) + logger.warning("No daemon {} known".format(daemon)) # print "Daemons after:", self.daemons def runInWindow(self, cmd, title=None): @@ -1685,8 +1651,7 @@ class Router(Node): # used self.cmd("echo 100000 > /proc/sys/net/mpls/platform_labels") - shell_routers = g_extra_config["shell"] - if "all" in shell_routers or self.name in shell_routers: + if g_pytest_config.name_in_option_list(self.name, "--shell"): self.run_in_window(os.getenv("SHELL", "bash"), title="sh-%s" % self.name) if self.daemons["eigrpd"] == 1: @@ -1703,8 +1668,7 @@ class Router(Node): status = self.startRouterDaemons(tgen=tgen) - vtysh_routers = g_extra_config["vtysh"] - if "all" in vtysh_routers or self.name in vtysh_routers: + if g_pytest_config.name_in_option_list(self.name, "--vtysh"): self.run_in_window("vtysh", title="vt-%s" % self.name) if self.unified_config: @@ -1724,13 +1688,13 @@ class Router(Node): def startRouterDaemons(self, daemons=None, tgen=None): "Starts FRR daemons for this router." - asan_abort = g_extra_config["asan_abort"] - gdb_breakpoints = g_extra_config["gdb_breakpoints"] - gdb_daemons = g_extra_config["gdb_daemons"] - gdb_routers = g_extra_config["gdb_routers"] - valgrind_extra = g_extra_config["valgrind_extra"] - valgrind_memleaks = g_extra_config["valgrind_memleaks"] - strace_daemons = g_extra_config["strace_daemons"] + asan_abort = bool(g_pytest_config.option.asan_abort) + gdb_breakpoints = g_pytest_config.get_option_list("--gdb-breakpoints") + gdb_daemons = g_pytest_config.get_option_list("--gdb-daemons") + gdb_routers = g_pytest_config.get_option_list("--gdb-routers") + valgrind_extra = bool(g_pytest_config.option.valgrind_extra) + valgrind_memleaks = bool(g_pytest_config.option.valgrind_memleaks) + strace_daemons = g_pytest_config.get_option_list("--strace-daemons") # Get global bundle data if not self.path_exists("/etc/frr/support_bundle_commands.conf"): @@ -1754,11 +1718,31 @@ class Router(Node): self.reportCores = True # XXX: glue code forward ported from removed function. - if self.version == None: + if self.version is None: self.version = self.cmd( os.path.join(self.daemondir, "bgpd") + " -v" ).split()[2] logger.info("{}: running version: {}".format(self.name, self.version)) + + perfds = {} + perf_options = g_pytest_config.get_option("--perf-options", "-g") + for perf in g_pytest_config.get_option("--perf", []): + if "," in perf: + daemon, routers = perf.split(",", 1) + perfds[daemon] = routers.split(",") + else: + daemon = perf + perfds[daemon] = ["all"] + + logd_options = {} + for logd in g_pytest_config.get_option("--logd", []): + if "," in logd: + daemon, routers = logd.split(",", 1) + logd_options[daemon] = routers.split(",") + else: + daemon = logd + logd_options[daemon] = ["all"] + # If `daemons` was specified then some upper API called us with # specific daemons, otherwise just use our own configuration. daemons_list = [] @@ -1770,22 +1754,36 @@ class Router(Node): if self.daemons[daemon] == 1: daemons_list.append(daemon) + tail_log_files = [] + check_daemon_files = [] + def start_daemon(daemon, extra_opts=None): daemon_opts = self.daemons_options.get(daemon, "") + + # get pid and vty filenames and remove the files + m = re.match(r"(.* |^)-n (\d+)( ?.*|$)", daemon_opts) + dfname = daemon if not m else "{}-{}".format(daemon, m.group(2)) + runbase = "/var/run/{}/{}".format(self.routertype, dfname) + # If this is a new system bring-up remove the pid/vty files, otherwise + # do not since apparently presence of the pidfile impacts BGP GR + self.cmd_status("rm -f {0}.pid {0}.vty".format(runbase)) + rediropt = " > {0}.out 2> {0}.err".format(daemon) if daemon == "snmpd": binary = "/usr/sbin/snmpd" cmdenv = "" cmdopt = "{} -C -c /etc/frr/snmpd.conf -p ".format( daemon_opts - ) + "/var/run/{}/snmpd.pid -x /etc/frr/agentx".format(self.routertype) + ) + "{}.pid -x /etc/frr/agentx".format(runbase) + # check_daemon_files.append(runbase + ".pid") else: binary = os.path.join(self.daemondir, daemon) + check_daemon_files.extend([runbase + ".pid", runbase + ".vty"]) cmdenv = "ASAN_OPTIONS=" if asan_abort: - cmdenv = "abort_on_error=1:" - cmdenv += "log_path={0}/{1}.{2}.asan ".format( + cmdenv += "abort_on_error=1:" + cmdenv += "log_path={0}/{1}.asan.{2} ".format( self.logdir, self.name, daemon ) @@ -1808,9 +1806,15 @@ class Router(Node): daemon, self.logdir, self.name ) - cmdopt = "{} --command-log-always --log file:{}.log --log-level debug".format( - daemon_opts, daemon - ) + cmdopt = "{} --command-log-always ".format(daemon_opts) + cmdopt += "--log file:{}.log --log-level debug".format(daemon) + + if daemon in logd_options: + logdopt = logd_options[daemon] + if "all" in logdopt or self.name in logdopt: + tail_log_files.append( + "{}/{}/{}.log".format(self.logdir, self.name, daemon) + ) if extra_opts: cmdopt += " " + extra_opts @@ -1837,6 +1841,23 @@ class Router(Node): logger.info( "%s: %s %s launched in gdb window", self, self.routertype, daemon ) + elif daemon in perfds and (self.name in perfds[daemon] or "all" in perfds[daemon]): + cmdopt += rediropt + cmd = " ".join(["perf record {} --".format(perf_options), binary, cmdopt]) + p = self.popen(cmd) + self.perf_daemons[daemon] = p + if p.poll() and p.returncode: + self.logger.error( + '%s: Failed to launch "%s" (%s) with perf using: %s', + self, + daemon, + p.returncode, + cmd, + ) + else: + logger.debug( + "%s: %s %s started with perf", self, self.routertype, daemon + ) else: if daemon != "snmpd": cmdopt += " -d " @@ -1859,7 +1880,7 @@ class Router(Node): else "", ) else: - logger.info("%s: %s %s started", self, self.routertype, daemon) + logger.debug("%s: %s %s started", self, self.routertype, daemon) # Start mgmtd first if "mgmtd" in daemons_list: @@ -1888,15 +1909,6 @@ class Router(Node): while "snmpd" in daemons_list: daemons_list.remove("snmpd") - if daemons is None: - # Fix Link-Local Addresses on initial startup - # Somehow (on Mininet only), Zebra removes the IPv6 Link-Local addresses on start. Fix this - _, output, _ = self.cmd_status( - "for i in `ls /sys/class/net/` ; do mac=`cat /sys/class/net/$i/address`; echo $i: $mac; [ -z \"$mac\" ] && continue; IFS=':'; set $mac; unset IFS; ip address add dev $i scope link fe80::$(printf %02x $((0x$1 ^ 2)))$2:${3}ff:fe$4:$5$6/64; done", - stderr=subprocess.STDOUT, - ) - logger.debug("Set MACs:\n%s", output) - # Now start all the other daemons for daemon in daemons_list: if self.daemons[daemon] == 0: @@ -1904,16 +1916,50 @@ class Router(Node): start_daemon(daemon) # Check if daemons are running. - rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype) - if re.search(r"No such file or directory", rundaemons): - return "Daemons are not running" + wait_time = 30 if (gdb_routers or gdb_daemons) else 10 + timeout = Timeout(wait_time) + for remaining in timeout: + if not check_daemon_files: + break + check = check_daemon_files[0] + if self.path_exists(check): + check_daemon_files.pop(0) + continue + self.logger.debug("Waiting {}s for {} to appear".format(remaining, check)) + time.sleep(0.5) + + if check_daemon_files: + assert False, "Timeout({}) waiting for {} to appear on {}".format( + wait_time, check_daemon_files[0], self.name + ) # Update the permissions on the log files self.cmd("chown frr:frr -R {}/{}".format(self.logdir, self.name)) self.cmd("chmod ug+rwX,o+r -R {}/{}".format(self.logdir, self.name)) + if "frr" in logd_options: + logdopt = logd_options["frr"] + if "all" in logdopt or self.name in logdopt: + tail_log_files.append("{}/{}/frr.log".format(self.logdir, self.name)) + + for tailf in tail_log_files: + self.run_in_window("tail -f " + tailf, title=tailf, background=True) + return "" + def pid_exists(self, pid): + if pid <= 0: + return False + try: + # If we are not using PID namespaces then we will be a parent of the pid, + # otherwise the init process of the PID namespace will have reaped the proc. + os.waitpid(pid, os.WNOHANG) + except Exception: + pass + + rc, o, e = self.cmd_status("kill -0 " + str(pid), warn=False) + return rc == 0 or "No such process" not in e + def killRouterDaemons( self, daemons, wait=True, assertOnError=True, minErrorVersion="5.1" ): @@ -1933,15 +1979,15 @@ class Router(Node): if re.search(r"%s" % daemon, d): daemonpidfile = d.rstrip() daemonpid = self.cmd("cat %s" % daemonpidfile).rstrip() - if daemonpid.isdigit() and pid_exists(int(daemonpid)): - logger.info( + if daemonpid.isdigit() and self.pid_exists(int(daemonpid)): + logger.debug( "{}: killing {}".format( self.name, os.path.basename(daemonpidfile.rsplit(".", 1)[0]), ) ) - os.kill(int(daemonpid), signal.SIGKILL) - if pid_exists(int(daemonpid)): + self.cmd_status("kill -KILL {}".format(daemonpid)) + if self.pid_exists(int(daemonpid)): numRunning += 1 while wait and numRunning > 0: sleep( @@ -1955,7 +2001,7 @@ class Router(Node): for d in dmns[:-1]: if re.search(r"%s" % daemon, d): daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip() - if daemonpid.isdigit() and pid_exists( + if daemonpid.isdigit() and self.pid_exists( int(daemonpid) ): logger.info( @@ -1966,8 +2012,10 @@ class Router(Node): ), ) ) - os.kill(int(daemonpid), signal.SIGKILL) - if daemonpid.isdigit() and not pid_exists( + self.cmd_status( + "kill -KILL {}".format(daemonpid) + ) + if daemonpid.isdigit() and not self.pid_exists( int(daemonpid) ): numRunning -= 1 @@ -2198,7 +2246,7 @@ class Router(Node): log = self.getStdErr(daemon) if "memstats" in log: # Found memory leak - logger.info( + logger.warning( "\nRouter {} {} StdErr Log:\n{}".format(self.name, daemon, log) ) if not leakfound: |
