From 6255aad0bc78c1b110d72988f62427a057c1a80f Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 8 Oct 2021 07:37:15 -0400 Subject: [PATCH] tests: Fix ospf[6]_gr_topo1 tests to work better under load 2 things: a) Each test was setting up for graceful restart with calls to `graceful-restart prepare ip[v6] ospf`, then sleeping for 3 or 5 seconds. Then killing the ospf process. Under heavy load there is no guarantee that zebra has received/processed this signal. Write some code to ensure that this happens b) Tests are issuing commands in this order: 1) issue gr prepare command 2) kill router 3) 4) start router 5) Imagine that the system is under some load and there is a small amount of time before step 5 happens. In this case ospf could have come up and started neighbor relations and also started installing routes. If zebra receives a new route before step 5 is issued then the route could be in a state where it is not installed, because it is being sent to the kernel for installation. This would fail the test because it would only look 1 time. This is fixed by giving time on restart for the routes to be in the installed state. Signed-off-by: Donald Sharp --- .../ospf6_gr_topo1/test_ospf6_gr_topo1.py | 48 +++++++++++++++---- .../ospf_gr_topo1/test_ospf_gr_topo1.py | 47 ++++++++++++++---- 2 files changed, 78 insertions(+), 17 deletions(-) diff --git a/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py b/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py index ccbcadb8b1..d50223191d 100755 --- a/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py +++ b/tests/topotests/ospf6_gr_topo1/test_ospf6_gr_topo1.py @@ -175,10 +175,21 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None): for rname in ["rt1", "rt2", "rt3", "rt4", "rt5", "rt6", "rt7"]: # Check the RIB first, which should be preserved across restarts in # all routers of the routing domain. + # If we are not on initial convergence *but* we are checking + # after a restart. Looking in the zebra rib for installed + # is a recipe for test failure. Why? because if we are restarting + # then ospf is in the process of establishing neighbors and passing + # new routes to zebra. Zebra will not mark the route as installed + # when it receives a replacement from ospf until it has finished + # processing it. Let's give it a few seconds to allow this to happen + # under load. if initial_convergence == True: tries = 240 else: - tries = 1 + if restarting != None: + tries = 40 + else: + tries = 1 router_compare_json_output( rname, "show ipv6 route ospf json", "show_ipv6_route.json", tries ) @@ -212,6 +223,26 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None): ) +def ensure_gr_is_in_zebra(rname): + retry = True + retry_times = 10 + tgen = get_topogen() + + while retry and retry_times > 0: + out = tgen.net[rname].cmd( + 'vtysh -c "show zebra client" | grep "Client: ospf6$" -A 40 | grep "Capabilities "' + ) + + if "Graceful Restart" not in out: + sleep(2) + retry_times -= 1 + else: + retry = False + + assertmsg = "%s does not appear to have Graceful Restart setup" % rname + assert not retry and retry_times > 0, assertmsg + + # # Test initial network convergence # @@ -238,10 +269,9 @@ def test_gr_rt1(): pytest.skip(tgen.errors) tgen.net["rt1"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"') - sleep(5) + ensure_gr_is_in_zebra("rt1") kill_router_daemons(tgen, "rt1", ["ospf6d"], save_config=False) check_routers(exiting="rt1") - start_router_daemons(tgen, "rt1", ["ospf6d"]) check_routers(restarting="rt1") @@ -258,7 +288,7 @@ def test_gr_rt2(): pytest.skip(tgen.errors) tgen.net["rt2"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"') - sleep(5) + ensure_gr_is_in_zebra("rt2") kill_router_daemons(tgen, "rt2", ["ospf6d"], save_config=False) check_routers(exiting="rt2") @@ -278,7 +308,7 @@ def test_gr_rt3(): pytest.skip(tgen.errors) tgen.net["rt3"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"') - sleep(5) + ensure_gr_is_in_zebra("rt3") kill_router_daemons(tgen, "rt3", ["ospf6d"], save_config=False) check_routers(exiting="rt3") @@ -298,7 +328,7 @@ def test_gr_rt4(): pytest.skip(tgen.errors) tgen.net["rt4"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"') - sleep(5) + ensure_gr_is_in_zebra("rt4") kill_router_daemons(tgen, "rt4", ["ospf6d"], save_config=False) check_routers(exiting="rt4") @@ -318,7 +348,7 @@ def test_gr_rt5(): pytest.skip(tgen.errors) tgen.net["rt5"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"') - sleep(5) + ensure_gr_is_in_zebra("rt5") kill_router_daemons(tgen, "rt5", ["ospf6d"], save_config=False) check_routers(exiting="rt5") @@ -338,7 +368,7 @@ def test_gr_rt6(): pytest.skip(tgen.errors) tgen.net["rt6"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"') - sleep(5) + ensure_gr_is_in_zebra("rt6") kill_router_daemons(tgen, "rt6", ["ospf6d"], save_config=False) check_routers(exiting="rt6") @@ -358,7 +388,7 @@ def test_gr_rt7(): pytest.skip(tgen.errors) tgen.net["rt7"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"') - sleep(5) + ensure_gr_is_in_zebra("rt7") kill_router_daemons(tgen, "rt7", ["ospf6d"], save_config=False) check_routers(exiting="rt7") diff --git a/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py b/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py index 7d9cc68412..1432d53ffc 100755 --- a/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py +++ b/tests/topotests/ospf_gr_topo1/test_ospf_gr_topo1.py @@ -184,10 +184,21 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None): for rname in ["rt1", "rt2", "rt3", "rt4", "rt5", "rt6", "rt7"]: # Check the RIB first, which should be preserved across restarts in # all routers of the routing domain. + # If we are not on initial convergence *but* we are checking + # after a restart. Looking in the zebra rib for installed + # is a recipe for test failure. Why? because if we are restarting + # then ospf is in the process of establishing neighbors and passing + # new routes to zebra. Zebra will not mark the route as installed + # when it receives a replacement from ospf until it has finished + # processing it. Let's give it a few seconds to allow this to happen + # under load. if initial_convergence == True: tries = 240 else: - tries = 1 + if restarting != None: + tries = 40 + else: + tries = 1 router_compare_json_output( rname, "show ip route ospf json", "show_ip_route.json", tries ) @@ -215,6 +226,26 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None): ) +def ensure_gr_is_in_zebra(rname): + retry = True + retry_times = 10 + tgen = get_topogen() + + while retry and retry_times > 0: + out = tgen.net[rname].cmd( + 'vtysh -c "show zebra client" | grep "Client: ospf$" -A 40 | grep "Capabilities "' + ) + + if "Graceful Restart" not in out: + sleep(2) + retry_times -= 1 + else: + retry = False + + assertmsg = "%s does not appear to have Graceful Restart setup" % rname + assert not retry and retry_times > 0, assertmsg + + # # Test initial network convergence # @@ -241,7 +272,7 @@ def test_gr_rt1(): pytest.skip(tgen.errors) tgen.net["rt1"].cmd('vtysh -c "graceful-restart prepare ip ospf"') - sleep(3) + ensure_gr_is_in_zebra("rt1") kill_router_daemons(tgen, "rt1", ["ospfd"], save_config=False) check_routers(exiting="rt1") @@ -261,7 +292,7 @@ def test_gr_rt2(): pytest.skip(tgen.errors) tgen.net["rt2"].cmd('vtysh -c "graceful-restart prepare ip ospf"') - sleep(3) + ensure_gr_is_in_zebra("rt2") kill_router_daemons(tgen, "rt2", ["ospfd"], save_config=False) check_routers(exiting="rt2") @@ -281,7 +312,7 @@ def test_gr_rt3(): pytest.skip(tgen.errors) tgen.net["rt3"].cmd('vtysh -c "graceful-restart prepare ip ospf"') - sleep(3) + ensure_gr_is_in_zebra("rt3") kill_router_daemons(tgen, "rt3", ["ospfd"], save_config=False) check_routers(exiting="rt3") @@ -301,7 +332,7 @@ def test_gr_rt4(): pytest.skip(tgen.errors) tgen.net["rt4"].cmd('vtysh -c "graceful-restart prepare ip ospf"') - sleep(3) + ensure_gr_is_in_zebra("rt4") kill_router_daemons(tgen, "rt4", ["ospfd"], save_config=False) check_routers(exiting="rt4") @@ -321,7 +352,7 @@ def test_gr_rt5(): pytest.skip(tgen.errors) tgen.net["rt5"].cmd('vtysh -c "graceful-restart prepare ip ospf"') - sleep(3) + ensure_gr_is_in_zebra("rt5") kill_router_daemons(tgen, "rt5", ["ospfd"], save_config=False) check_routers(exiting="rt5") @@ -341,7 +372,7 @@ def test_gr_rt6(): pytest.skip(tgen.errors) tgen.net["rt6"].cmd('vtysh -c "graceful-restart prepare ip ospf"') - sleep(3) + ensure_gr_is_in_zebra("rt6") kill_router_daemons(tgen, "rt6", ["ospfd"], save_config=False) check_routers(exiting="rt6") @@ -361,7 +392,7 @@ def test_gr_rt7(): pytest.skip(tgen.errors) tgen.net["rt7"].cmd('vtysh -c "graceful-restart prepare ip ospf"') - sleep(3) + ensure_gr_is_in_zebra("rt7") kill_router_daemons(tgen, "rt7", ["ospfd"], save_config=False) check_routers(exiting="rt7") -- 2.39.5