diff options
| author | Rajasekar Raja <rajasekarr@nvidia.com> | 2024-08-12 11:51:06 -0700 |
|---|---|---|
| committer | Rajasekar Raja <rajasekarr@nvidia.com> | 2024-09-26 20:17:35 -0700 |
| commit | aa4786642c9a65c282d0fd5247a35b0f14fa1c3c (patch) | |
| tree | 3bc2f08e2fe25d5dee7714117e66e51ad0f4146e /zebra/if_netlink.c | |
| parent | 1632988acf944b7f5e7d24c4aacbcfd4c5f626b0 (diff) | |
zebra: vlan to dplane Offload from main
Trigger: Zebra core seen when we convert l2vni to l3vni and back
BackTrace:
/usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(_zlog_assert_failed+0xe9) [0x7f4af96989d9]
/usr/lib/frr/zebra(zebra_vxlan_if_vni_up+0x250) [0x5561022ae030]
/usr/lib/frr/zebra(netlink_vlan_change+0x2f4) [0x5561021fd354]
/usr/lib/frr/zebra(netlink_parse_info+0xff) [0x55610220d37f]
/usr/lib/frr/zebra(+0xc264a) [0x55610220d64a]
/usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(thread_call+0x7d) [0x7f4af967e96d]
/usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(frr_run+0xe8) [0x7f4af9637588]
/usr/lib/frr/zebra(main+0x402) [0x5561021f4d32]
/lib/x86_64-linux-gnu/libc.so.6(+0x2724a) [0x7f4af932624a]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85) [0x7f4af9326305]
/usr/lib/frr/zebra(_start+0x21) [0x5561021f72f1]
Root Cause:
In working case,
- We get a RTM_NEWLINK whose ctx is enqueued by zebra dplane and
dequeued by zebra main and processed i.e.
(102000 is deleted from vxlan99) before we handle RTM_NEWVLAN.
- So in handling of NEWVLAN (vxlan99) we bail out since find with
vlan id 703 does not exist.
root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/working/nocras.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread"
2024/07/18 23:09:33.741105 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0
2024/07/18 23:09:33.744061 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (65), result QUEUED
2024/07/18 23:09:33.767240 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0
2024/07/18 23:09:33.767380 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (73), result QUEUED
2024/07/18 23:09:33.767389 ZEBRA: [NVFT0-HS1EX] INTF_INSTALL for vxlan99(73)
2024/07/18 23:09:33.767404 ZEBRA: [TQR2A-H2RFY] Vlan-Vni(1186:1186-6000002:6000002) update for VxLAN IF vxlan99(73)
2024/07/18 23:09:33.767422 ZEBRA: [TP4VP-XZ627] Del L2-VNI 102000 intf vxlan99(73)
2024/07/18 23:09:33.767858 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0
2024/07/18 23:09:33.767866 ZEBRA: [KKZGZ-8PCDW] Cannot find VNI for VID (703) IF vxlan99 for vlan state update >>>>BAIL OUT
In failure case,
- The NEWVLAN is received first even before processing RTM_NEWLINK.
- Since the vxlan id 102000 is not removed from the vxlan99,
the find with vlan id 703 returns the 102000 one and we invoke
zebra_vxlan_if_vni_up where the interfaces don't match and assert.
root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/noworking/crash.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread"
2024/07/18 22:26:43.829370 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0
2024/07/18 22:26:43.829646 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7fe07c026d30, op INTF_INSTALL, ifindex (65), result QUEUED
2024/07/18 22:26:43.853930 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0
2024/07/18 22:26:43.853949 ZEBRA: [K61WJ-XQQ3X] Intf vxlan99(73) L2-VNI 102000 is UP >>> VLAN PROCESSED BEFORE INTF EVENT
2024/07/18 22:26:43.853951 ZEBRA: [SPV50-BX2RP] RAJA zevpn_vxlanif vxlan48 and ifp vxlan99
2024/07/18 22:26:43.854005 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0
2024/07/18 22:26:43.854241 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=516, seq=0, pid=0
2024/07/18 22:26:43.854251 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=544, seq=0, pid=0
ZEBRA: in thread kernel_read scheduled from zebra/kernel_netlink.c:505 kernel_read()
Fix:
Similar to #13396, where link change
handling was offloaded to dplane, same is being done for vlan events.
Note: Prior to this change, zebra main thread was interested in the
RTNLGRP_BRVLAN. So all the kernel events pertaining to vlan was
handled by zebra main.
With this change change as well the handling of vlan events is still
with Zebra main. However we offload it via Dplane thread.
Ticket :#3878175
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
Diffstat (limited to 'zebra/if_netlink.c')
| -rw-r--r-- | zebra/if_netlink.c | 79 |
1 files changed, 46 insertions, 33 deletions
diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c index 3de6661f5d..c9861b0b5e 100644 --- a/zebra/if_netlink.c +++ b/zebra/if_netlink.c @@ -63,7 +63,6 @@ #include "zebra/zebra_l2.h" #include "zebra/netconf_netlink.h" #include "zebra/zebra_trace.h" -#include "zebra/zebra_vxlan.h" extern struct zebra_privs_t zserv_privs; @@ -1817,6 +1816,20 @@ int netlink_tunneldump_read(struct zebra_ns *zns) return 0; } +static uint8_t netlink_get_dplane_vlan_state(uint8_t state) +{ + if (state == BR_STATE_LISTENING) + return ZEBRA_DPLANE_BR_STATE_LISTENING; + else if (state == BR_STATE_LEARNING) + return ZEBRA_DPLANE_BR_STATE_LEARNING; + else if (state == BR_STATE_FORWARDING) + return ZEBRA_DPLANE_BR_STATE_FORWARDING; + else if (state == BR_STATE_BLOCKING) + return ZEBRA_DPLANE_BR_STATE_BLOCKING; + + return ZEBRA_DPLANE_BR_STATE_DISABLED; +} + /** * netlink_vlan_change() - Read in change about vlans from the kernel * @@ -1829,7 +1842,6 @@ int netlink_tunneldump_read(struct zebra_ns *zns) int netlink_vlan_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) { int len, rem; - struct interface *ifp; struct br_vlan_msg *bvm; struct bridge_vlan_info *vinfo; struct rtattr *vtb[BRIDGE_VLANDB_ENTRY_MAX + 1] = {}; @@ -1837,6 +1849,9 @@ int netlink_vlan_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) uint8_t state; uint32_t vrange; int type; + uint32_t count = 0; + struct zebra_dplane_ctx *ctx = NULL; + struct zebra_vxlan_vlan_array *vlan_array = NULL; /* We only care about state changes for now */ if (!(h->nlmsg_type == RTM_NEWVLAN)) @@ -1856,25 +1871,10 @@ int netlink_vlan_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) if (bvm->family != AF_BRIDGE) return 0; - ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), bvm->ifindex); - if (!ifp) { - zlog_debug("Cannot find bridge-vlan IF (%u) for vlan update", - bvm->ifindex); - return 0; - } - - if (!IS_ZEBRA_IF_VXLAN(ifp)) { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug("Ignoring non-vxlan IF (%s) for vlan update", - ifp->name); - - return 0; - } - - if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_VXLAN) - zlog_debug("%s %s IF %s NS %u", - nl_msg_type_to_str(h->nlmsg_type), - nl_family_to_str(bvm->family), ifp->name, ns_id); + ctx = dplane_ctx_alloc(); + dplane_ctx_set_ns_id(ctx, ns_id); + dplane_ctx_set_op(ctx, DPLANE_OP_VLAN_INSTALL); + dplane_ctx_set_vlan_ifindex(ctx, bvm->ifindex); /* Loop over "ALL" BRIDGE_VLANDB_ENTRY */ rem = len; @@ -1905,26 +1905,39 @@ int netlink_vlan_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) if (!vtb[BRIDGE_VLANDB_ENTRY_STATE]) continue; + count++; + vlan_array = + XREALLOC(MTYPE_VLAN_CHANGE_ARR, vlan_array, + sizeof(struct zebra_vxlan_vlan_array) + + count * sizeof(struct zebra_vxlan_vlan)); + + memset(&vlan_array->vlans[count - 1], 0, + sizeof(struct zebra_vxlan_vlan)); + state = *(uint8_t *)RTA_DATA(vtb[BRIDGE_VLANDB_ENTRY_STATE]); if (vtb[BRIDGE_VLANDB_ENTRY_RANGE]) vrange = *(uint32_t *)RTA_DATA( vtb[BRIDGE_VLANDB_ENTRY_RANGE]); - if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_VXLAN) { - if (vrange) - zlog_debug("VLANDB_ENTRY: VID (%u-%u) state=%s", - vinfo->vid, vrange, - port_state2str(state)); - else - zlog_debug("VLANDB_ENTRY: VID (%u) state=%s", - vinfo->vid, port_state2str(state)); - } - - vlan_id_range_state_change( - ifp, vinfo->vid, (vrange ? vrange : vinfo->vid), state); + vlan_array->vlans[count - 1].state = + netlink_get_dplane_vlan_state(state); + vlan_array->vlans[count - 1].vid = vinfo->vid; + vlan_array->vlans[count - 1].vrange = vrange; } + if (count) { + vlan_array->count = count; + dplane_ctx_set_vxlan_vlan_array(ctx, vlan_array); + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("RTM_NEWVLAN for ifindex %u NS %u, enqueuing for zebra main", + bvm->ifindex, ns_id); + + dplane_provider_enqueue_to_zebra(ctx); + } else + dplane_ctx_fini(&ctx); + + return 0; } |
