]> git.puffer.fish Git - mirror/frr.git/commitdiff
zebra: Fix reinstalling nexthops in NHGs upon interface flaps 18374/head
authorRajasekar Raja <rajasekarr@nvidia.com>
Tue, 11 Mar 2025 19:15:32 +0000 (12:15 -0700)
committerRajasekar Raja <rajasekarr@nvidia.com>
Tue, 18 Mar 2025 19:21:42 +0000 (12:21 -0700)
Trigger:
Imagine a route utilizing an NHG with six nexthops (Intf swp1-swp6).
If interfaces swp1-swp4 flaps, the NHG remains the same but now only
references two nexthops (swp5-6) instead of all six. This behavior
occurs due to how NHGs with recursive nexthops are managed within Zebra.

In the scenario below, NHG 370 has all six nexthops installed in the
kernel. However, Zebra maintains a list of recursive NHGs that NHG 370
references i.e., Depends: (371), (372), (373) which are not directly
installed in the kernel.
- When an interface comes up, its nexthop and corresponding dependents
  are installed.
- These dependents (counterparts to 371-373) are non-recursive and
  are installed as well.
- However, when attempting to install the recursive ones in
  zebra_nhg_install_kernel(), they resolve to the already installed
  counterparts, resulting in a NO-OP.

Fixing this by iterating all dependents of the recursively resolved
NHGs and reinstalling them.

Trigger: Flap swp1 to swp4

Before Fix:
root@leaf-11:mgmt:/var/home/cumulus# ip route show | grep 6.0.0.5
6.0.0.5 nhid 370 proto bgp metric 20
ip -d next show
id 337 via 2000:1:0:1:0:f:0:9 dev swp6 scope link proto zebra
id 339 via 2000:1:0:1:0:e:0:9 dev swp5 scope link proto zebra
id 341 via 2000:1:0:1:0:8:0:8 dev swp4 scope link proto zebra
id 343 via 2000:1:0:1:0:7:0:8 dev swp3 scope link proto zebra
id 346 via 2000:1:0:1:0:1:0:7 dev swp2 scope link proto zebra
id 348 via 2000:1:0:1::7 dev swp1 scope link proto zebra
id 370 group 346/348/341/343/337/339 scope global proto zebra

After Trigger:
root@leaf-11:mgmt:/var/home/cumulus# ip route show | grep 6.0.0.5
6.0.0.5 nhid 370 proto bgp metric 20
root@leaf-11:mgmt:/var/home/cumulus# ip -d next show
id 337 via 2000:1:0:1:0:f:0:9 dev swp6 scope link proto zebra
id 339 via 2000:1:0:1:0:e:0:9 dev swp5 scope link proto zebra
id 370 group 337/339 scope global proto zebra

After Fix:
root@leaf-11:mgmt:/var/home/cumulus# ip route show | grep 6.0.0.5
6.0.0.5 nhid 432 proto bgp metric 20
ip -d next show
id 432 group 395/397/400/402/405/407 scope global proto zebra

After Trigger
root@leaf-11:mgmt:/var/home/cumulus# ip route show | grep 6.0.0.5
6.0.0.5 nhid 432 proto bgp metric 20
root@leaf-11:mgmt:/var/home/cumulus# ip -d next show
id 432 group 395/397/400/402/405/407 scope global proto zebra

Ticket :#

Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
tests/topotests/all_protocol_startup/test_all_protocol_startup.py
zebra/zebra_nhg.c

index 06a350c8e96c55e315415c5e27509831c0f8823f..80903b134e1008c30647c911ded27a1265157c0e 100644 (file)
@@ -652,6 +652,34 @@ def test_nexthop_groups():
         nhg_id
     )
 
+    ## Validate NHG's installed in kernel has same nexthops with Interface flaps
+    pre_out = net["r1"].cmd('ip route show | grep "5.5.5.1"')
+    pre_nhg = re.search(r"nhid\s+(\d+)", pre_out)
+    pre_nh_show = net["r1"].cmd("ip next show id {}".format(pre_nhg.group(1)))
+    pre_total_nhs = len((re.search(r"group ([\d/]+)", pre_nh_show)).group(1).split("/"))
+
+    net["r1"].cmd(
+        "ip link set r1-eth1 down;ip link set r1-eth2 down;ip link set r1-eth3 down;ip link set r1-eth4 down"
+    )
+    sleep(1)
+    net["r1"].cmd(
+        "ip link set r1-eth1 up;ip link set r1-eth2 up;ip link set r1-eth3 up;ip link set r1-eth4 up"
+    )
+    sleep(5)
+
+    post_out = net["r1"].cmd('ip route show | grep "5.5.5.1"')
+    post_nhg = re.search(r"nhid\s+(\d+)", post_out)
+    post_nh_show = net["r1"].cmd("ip next show id {}".format(post_nhg.group(1)))
+    post_total_nhs = len(
+        (re.search(r"group ([\d/]+)", post_nh_show)).group(1).split("/")
+    )
+
+    assert (
+        post_total_nhs == pre_total_nhs
+    ), "Expected same nexthops(pre-{}: post-{}) in NHG (pre-{}:post-{}) after few Interface flaps".format(
+        pre_total_nhs, post_total_nhs, pre_nhg.group(1), post_nhg.group(1)
+    )
+
     ## Remove all NHG routes
 
     net["r1"].cmd('vtysh -c "sharp remove routes 2.2.2.1 1"')
index f5141c8f235b9bf159acd3d92f5078ad84d0c6bb..b20aa3f2a3d54da545037eb84197b49a5858e0d4 100644 (file)
@@ -1165,11 +1165,29 @@ static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe, bool install)
 {
        /* Update validity of groups depending on it */
        struct nhg_connected *rb_node_dep;
+       struct nhg_connected *rb_node_indirect_dep = NULL;
 
        frr_each_safe (nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) {
                zebra_nhg_set_valid(rb_node_dep->nhe, true);
                /* install dependent NHG into kernel */
                if (install) {
+                       if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) &&
+                           CHECK_FLAG(rb_node_dep->nhe->flags, NEXTHOP_GROUP_RECURSIVE)) {
+                               frr_each_safe (nhg_connected_tree, &rb_node_dep->nhe->nhg_dependents,
+                                              rb_node_indirect_dep) {
+                                       SET_FLAG(rb_node_indirect_dep->nhe->flags,
+                                                NEXTHOP_GROUP_REINSTALL);
+                                       if (IS_ZEBRA_DEBUG_NHG_DETAIL)
+                                               zlog_debug("%s nh id %u (flags 0x%x) associated dependents NHG %pNG (flags 0x%x) Re-install",
+                                                          __func__, rb_node_dep->nhe->id,
+                                                          rb_node_dep->nhe->flags,
+                                                          rb_node_indirect_dep->nhe,
+                                                          rb_node_indirect_dep->nhe->flags);
+                                       zebra_nhg_install_kernel(rb_node_indirect_dep->nhe,
+                                                                ZEBRA_ROUTE_MAX);
+                               }
+                       }
+
                        if (IS_ZEBRA_DEBUG_NHG_DETAIL)
                                zlog_debug(
                                        "%s nh id %u (flags 0x%x) associated dependent NHG %pNG install",