summaryrefslogtreecommitdiff
path: root/zebra/zebra_dplane.c
diff options
context:
space:
mode:
authorRajasekar Raja <rajasekarr@nvidia.com>2024-08-12 11:51:06 -0700
committerRajasekar Raja <rajasekarr@nvidia.com>2024-09-26 20:17:35 -0700
commitaa4786642c9a65c282d0fd5247a35b0f14fa1c3c (patch)
tree3bc2f08e2fe25d5dee7714117e66e51ad0f4146e /zebra/zebra_dplane.c
parent1632988acf944b7f5e7d24c4aacbcfd4c5f626b0 (diff)
zebra: vlan to dplane Offload from main
Trigger: Zebra core seen when we convert l2vni to l3vni and back BackTrace: /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(_zlog_assert_failed+0xe9) [0x7f4af96989d9] /usr/lib/frr/zebra(zebra_vxlan_if_vni_up+0x250) [0x5561022ae030] /usr/lib/frr/zebra(netlink_vlan_change+0x2f4) [0x5561021fd354] /usr/lib/frr/zebra(netlink_parse_info+0xff) [0x55610220d37f] /usr/lib/frr/zebra(+0xc264a) [0x55610220d64a] /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(thread_call+0x7d) [0x7f4af967e96d] /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(frr_run+0xe8) [0x7f4af9637588] /usr/lib/frr/zebra(main+0x402) [0x5561021f4d32] /lib/x86_64-linux-gnu/libc.so.6(+0x2724a) [0x7f4af932624a] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85) [0x7f4af9326305] /usr/lib/frr/zebra(_start+0x21) [0x5561021f72f1] Root Cause: In working case, - We get a RTM_NEWLINK whose ctx is enqueued by zebra dplane and dequeued by zebra main and processed i.e. (102000 is deleted from vxlan99) before we handle RTM_NEWVLAN. - So in handling of NEWVLAN (vxlan99) we bail out since find with vlan id 703 does not exist. root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/working/nocras.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread" 2024/07/18 23:09:33.741105 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0 2024/07/18 23:09:33.744061 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (65), result QUEUED 2024/07/18 23:09:33.767240 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0 2024/07/18 23:09:33.767380 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (73), result QUEUED 2024/07/18 23:09:33.767389 ZEBRA: [NVFT0-HS1EX] INTF_INSTALL for vxlan99(73) 2024/07/18 23:09:33.767404 ZEBRA: [TQR2A-H2RFY] Vlan-Vni(1186:1186-6000002:6000002) update for VxLAN IF vxlan99(73) 2024/07/18 23:09:33.767422 ZEBRA: [TP4VP-XZ627] Del L2-VNI 102000 intf vxlan99(73) 2024/07/18 23:09:33.767858 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0 2024/07/18 23:09:33.767866 ZEBRA: [KKZGZ-8PCDW] Cannot find VNI for VID (703) IF vxlan99 for vlan state update >>>>BAIL OUT In failure case, - The NEWVLAN is received first even before processing RTM_NEWLINK. - Since the vxlan id 102000 is not removed from the vxlan99, the find with vlan id 703 returns the 102000 one and we invoke zebra_vxlan_if_vni_up where the interfaces don't match and assert. root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/noworking/crash.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread" 2024/07/18 22:26:43.829370 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0 2024/07/18 22:26:43.829646 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7fe07c026d30, op INTF_INSTALL, ifindex (65), result QUEUED 2024/07/18 22:26:43.853930 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0 2024/07/18 22:26:43.853949 ZEBRA: [K61WJ-XQQ3X] Intf vxlan99(73) L2-VNI 102000 is UP >>> VLAN PROCESSED BEFORE INTF EVENT 2024/07/18 22:26:43.853951 ZEBRA: [SPV50-BX2RP] RAJA zevpn_vxlanif vxlan48 and ifp vxlan99 2024/07/18 22:26:43.854005 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0 2024/07/18 22:26:43.854241 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=516, seq=0, pid=0 2024/07/18 22:26:43.854251 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=544, seq=0, pid=0 ZEBRA: in thread kernel_read scheduled from zebra/kernel_netlink.c:505 kernel_read() Fix: Similar to #13396, where link change handling was offloaded to dplane, same is being done for vlan events. Note: Prior to this change, zebra main thread was interested in the RTNLGRP_BRVLAN. So all the kernel events pertaining to vlan was handled by zebra main. With this change change as well the handling of vlan events is still with Zebra main. However we offload it via Dplane thread. Ticket :#3878175 Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
Diffstat (limited to 'zebra/zebra_dplane.c')
-rw-r--r--zebra/zebra_dplane.c56
1 files changed, 56 insertions, 0 deletions
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
index 75147e7136..f88464d745 100644
--- a/zebra/zebra_dplane.c
+++ b/zebra/zebra_dplane.c
@@ -35,6 +35,8 @@ DEFINE_MTYPE_STATIC(ZEBRA, DP_PROV, "Zebra DPlane Provider");
DEFINE_MTYPE_STATIC(ZEBRA, DP_NETFILTER, "Zebra Netfilter Internal Object");
DEFINE_MTYPE_STATIC(ZEBRA, DP_NS, "DPlane NSes");
+DEFINE_MTYPE(ZEBRA, VLAN_CHANGE_ARR, "Vlan Change Array");
+
#ifndef AOK
# define AOK 0
#endif
@@ -371,6 +373,14 @@ struct dplane_srv6_encap_ctx {
};
/*
+ * VLAN info for the dataplane
+ */
+struct dplane_vlan_info {
+ ifindex_t ifindex;
+ struct zebra_vxlan_vlan_array *vlan_array;
+};
+
+/*
* The context block used to exchange info about route updates across
* the boundary between the zebra main context (and pthread) and the
* dataplane layer (and pthread).
@@ -416,6 +426,7 @@ struct zebra_dplane_ctx {
struct dplane_pw_info pw;
struct dplane_br_port_info br_port;
struct dplane_intf_info intf;
+ struct dplane_vlan_info vlan_info;
struct dplane_mac_info macinfo;
struct dplane_neigh_info neigh;
struct dplane_rule_info rule;
@@ -885,6 +896,11 @@ static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_STARTUP_STAGE:
case DPLANE_OP_SRV6_ENCAP_SRCADDR_SET:
break;
+ case DPLANE_OP_VLAN_INSTALL:
+ if (ctx->u.vlan_info.vlan_array)
+ XFREE(MTYPE_VLAN_CHANGE_ARR,
+ ctx->u.vlan_info.vlan_array);
+ break;
}
}
@@ -1219,6 +1235,10 @@ const char *dplane_op2str(enum dplane_op_e op)
case DPLANE_OP_SRV6_ENCAP_SRCADDR_SET:
ret = "SRV6_ENCAP_SRCADDR_SET";
break;
+
+ case DPLANE_OP_VLAN_INSTALL:
+ ret = "NEW_VLAN";
+ break;
}
return ret;
@@ -3321,6 +3341,35 @@ uint32_t dplane_get_in_queue_len(void)
memory_order_seq_cst);
}
+void dplane_ctx_set_vlan_ifindex(struct zebra_dplane_ctx *ctx, ifindex_t ifindex)
+{
+ DPLANE_CTX_VALID(ctx);
+ ctx->u.vlan_info.ifindex = ifindex;
+}
+
+ifindex_t dplane_ctx_get_vlan_ifindex(struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return ctx->u.vlan_info.ifindex;
+}
+
+void dplane_ctx_set_vxlan_vlan_array(struct zebra_dplane_ctx *ctx,
+ struct zebra_vxlan_vlan_array *vlan_array)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ ctx->u.vlan_info.vlan_array = vlan_array;
+}
+
+const struct zebra_vxlan_vlan_array *
+dplane_ctx_get_vxlan_vlan_array(struct zebra_dplane_ctx *ctx)
+{
+ DPLANE_CTX_VALID(ctx);
+
+ return ctx->u.vlan_info.vlan_array;
+}
+
/*
* Internal helper that copies information from a zebra ns object; this is
* called in the zebra main pthread context as part of dplane ctx init.
@@ -6720,6 +6769,12 @@ static void kernel_dplane_log_detail(struct zebra_dplane_ctx *ctx)
dplane_op2str(dplane_ctx_get_op(ctx)),
&ctx->u.srv6_encap.srcaddr);
break;
+
+ case DPLANE_OP_VLAN_INSTALL:
+ zlog_debug("Dplane %s on idx %u",
+ dplane_op2str(dplane_ctx_get_op(ctx)),
+ dplane_ctx_get_vlan_ifindex(ctx));
+ break;
}
}
@@ -6888,6 +6943,7 @@ static void kernel_dplane_handle_result(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_ADDR_ADD:
case DPLANE_OP_INTF_ADDR_DEL:
case DPLANE_OP_INTF_NETCONFIG:
+ case DPLANE_OP_VLAN_INSTALL:
break;
case DPLANE_OP_SRV6_ENCAP_SRCADDR_SET: