summaryrefslogtreecommitdiff
path: root/zebra/kernel_netlink.c
diff options
context:
space:
mode:
authorRajasekar Raja <rajasekarr@nvidia.com>2024-08-12 11:51:06 -0700
committerRajasekar Raja <rajasekarr@nvidia.com>2024-09-26 20:17:35 -0700
commitaa4786642c9a65c282d0fd5247a35b0f14fa1c3c (patch)
tree3bc2f08e2fe25d5dee7714117e66e51ad0f4146e /zebra/kernel_netlink.c
parent1632988acf944b7f5e7d24c4aacbcfd4c5f626b0 (diff)
zebra: vlan to dplane Offload from main
Trigger: Zebra core seen when we convert l2vni to l3vni and back BackTrace: /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(_zlog_assert_failed+0xe9) [0x7f4af96989d9] /usr/lib/frr/zebra(zebra_vxlan_if_vni_up+0x250) [0x5561022ae030] /usr/lib/frr/zebra(netlink_vlan_change+0x2f4) [0x5561021fd354] /usr/lib/frr/zebra(netlink_parse_info+0xff) [0x55610220d37f] /usr/lib/frr/zebra(+0xc264a) [0x55610220d64a] /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(thread_call+0x7d) [0x7f4af967e96d] /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(frr_run+0xe8) [0x7f4af9637588] /usr/lib/frr/zebra(main+0x402) [0x5561021f4d32] /lib/x86_64-linux-gnu/libc.so.6(+0x2724a) [0x7f4af932624a] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85) [0x7f4af9326305] /usr/lib/frr/zebra(_start+0x21) [0x5561021f72f1] Root Cause: In working case, - We get a RTM_NEWLINK whose ctx is enqueued by zebra dplane and dequeued by zebra main and processed i.e. (102000 is deleted from vxlan99) before we handle RTM_NEWVLAN. - So in handling of NEWVLAN (vxlan99) we bail out since find with vlan id 703 does not exist. root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/working/nocras.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread" 2024/07/18 23:09:33.741105 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0 2024/07/18 23:09:33.744061 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (65), result QUEUED 2024/07/18 23:09:33.767240 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0 2024/07/18 23:09:33.767380 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (73), result QUEUED 2024/07/18 23:09:33.767389 ZEBRA: [NVFT0-HS1EX] INTF_INSTALL for vxlan99(73) 2024/07/18 23:09:33.767404 ZEBRA: [TQR2A-H2RFY] Vlan-Vni(1186:1186-6000002:6000002) update for VxLAN IF vxlan99(73) 2024/07/18 23:09:33.767422 ZEBRA: [TP4VP-XZ627] Del L2-VNI 102000 intf vxlan99(73) 2024/07/18 23:09:33.767858 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0 2024/07/18 23:09:33.767866 ZEBRA: [KKZGZ-8PCDW] Cannot find VNI for VID (703) IF vxlan99 for vlan state update >>>>BAIL OUT In failure case, - The NEWVLAN is received first even before processing RTM_NEWLINK. - Since the vxlan id 102000 is not removed from the vxlan99, the find with vlan id 703 returns the 102000 one and we invoke zebra_vxlan_if_vni_up where the interfaces don't match and assert. root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/noworking/crash.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread" 2024/07/18 22:26:43.829370 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0 2024/07/18 22:26:43.829646 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7fe07c026d30, op INTF_INSTALL, ifindex (65), result QUEUED 2024/07/18 22:26:43.853930 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0 2024/07/18 22:26:43.853949 ZEBRA: [K61WJ-XQQ3X] Intf vxlan99(73) L2-VNI 102000 is UP >>> VLAN PROCESSED BEFORE INTF EVENT 2024/07/18 22:26:43.853951 ZEBRA: [SPV50-BX2RP] RAJA zevpn_vxlanif vxlan48 and ifp vxlan99 2024/07/18 22:26:43.854005 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0 2024/07/18 22:26:43.854241 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=516, seq=0, pid=0 2024/07/18 22:26:43.854251 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=544, seq=0, pid=0 ZEBRA: in thread kernel_read scheduled from zebra/kernel_netlink.c:505 kernel_read() Fix: Similar to #13396, where link change handling was offloaded to dplane, same is being done for vlan events. Note: Prior to this change, zebra main thread was interested in the RTNLGRP_BRVLAN. So all the kernel events pertaining to vlan was handled by zebra main. With this change change as well the handling of vlan events is still with Zebra main. However we offload it via Dplane thread. Ticket :#3878175 Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
Diffstat (limited to 'zebra/kernel_netlink.c')
-rw-r--r--zebra/kernel_netlink.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
index 84aabc4254..3547314f84 100644
--- a/zebra/kernel_netlink.c
+++ b/zebra/kernel_netlink.c
@@ -430,10 +430,6 @@ static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
case RTM_NEWTFILTER:
case RTM_DELTFILTER:
return netlink_tfilter_change(h, ns_id, startup);
- case RTM_NEWVLAN:
- return netlink_vlan_change(h, ns_id, startup);
- case RTM_DELVLAN:
- return netlink_vlan_change(h, ns_id, startup);
/* Messages we may receive, but ignore */
case RTM_NEWCHAIN:
@@ -449,6 +445,8 @@ static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
case RTM_NEWTUNNEL:
case RTM_DELTUNNEL:
case RTM_GETTUNNEL:
+ case RTM_NEWVLAN:
+ case RTM_DELVLAN:
return 0;
default:
/*
@@ -492,6 +490,10 @@ static int dplane_netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
case RTM_DELLINK:
return netlink_link_change(h, ns_id, startup);
+ case RTM_NEWVLAN:
+ case RTM_DELVLAN:
+ return netlink_vlan_change(h, ns_id, startup);
+
default:
break;
}
@@ -1621,6 +1623,7 @@ static enum netlink_msg_status nl_put_msg(struct nl_batch *bth,
case DPLANE_OP_IPSET_ENTRY_ADD:
case DPLANE_OP_IPSET_ENTRY_DELETE:
case DPLANE_OP_STARTUP_STAGE:
+ case DPLANE_OP_VLAN_INSTALL:
return FRR_NETLINK_ERROR;
case DPLANE_OP_GRE_SET:
@@ -1862,8 +1865,8 @@ void kernel_init(struct zebra_ns *zns)
* setsockopt multicast group subscriptions that don't fit in nl_groups
*/
grp = RTNLGRP_BRVLAN;
- ret = setsockopt(zns->netlink.sock, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
- &grp, sizeof(grp));
+ ret = setsockopt(zns->netlink_dplane_in.sock, SOL_NETLINK,
+ NETLINK_ADD_MEMBERSHIP, &grp, sizeof(grp));
if (ret < 0)
zlog_notice(