diff options
| author | Rajasekar Raja <rajasekarr@nvidia.com> | 2024-08-12 11:51:06 -0700 |
|---|---|---|
| committer | Rajasekar Raja <rajasekarr@nvidia.com> | 2024-09-26 20:17:35 -0700 |
| commit | aa4786642c9a65c282d0fd5247a35b0f14fa1c3c (patch) | |
| tree | 3bc2f08e2fe25d5dee7714117e66e51ad0f4146e /zebra/zebra_vxlan.c | |
| parent | 1632988acf944b7f5e7d24c4aacbcfd4c5f626b0 (diff) | |
zebra: vlan to dplane Offload from main
Trigger: Zebra core seen when we convert l2vni to l3vni and back
BackTrace:
/usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(_zlog_assert_failed+0xe9) [0x7f4af96989d9]
/usr/lib/frr/zebra(zebra_vxlan_if_vni_up+0x250) [0x5561022ae030]
/usr/lib/frr/zebra(netlink_vlan_change+0x2f4) [0x5561021fd354]
/usr/lib/frr/zebra(netlink_parse_info+0xff) [0x55610220d37f]
/usr/lib/frr/zebra(+0xc264a) [0x55610220d64a]
/usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(thread_call+0x7d) [0x7f4af967e96d]
/usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(frr_run+0xe8) [0x7f4af9637588]
/usr/lib/frr/zebra(main+0x402) [0x5561021f4d32]
/lib/x86_64-linux-gnu/libc.so.6(+0x2724a) [0x7f4af932624a]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85) [0x7f4af9326305]
/usr/lib/frr/zebra(_start+0x21) [0x5561021f72f1]
Root Cause:
In working case,
- We get a RTM_NEWLINK whose ctx is enqueued by zebra dplane and
dequeued by zebra main and processed i.e.
(102000 is deleted from vxlan99) before we handle RTM_NEWVLAN.
- So in handling of NEWVLAN (vxlan99) we bail out since find with
vlan id 703 does not exist.
root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/working/nocras.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread"
2024/07/18 23:09:33.741105 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0
2024/07/18 23:09:33.744061 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (65), result QUEUED
2024/07/18 23:09:33.767240 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0
2024/07/18 23:09:33.767380 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (73), result QUEUED
2024/07/18 23:09:33.767389 ZEBRA: [NVFT0-HS1EX] INTF_INSTALL for vxlan99(73)
2024/07/18 23:09:33.767404 ZEBRA: [TQR2A-H2RFY] Vlan-Vni(1186:1186-6000002:6000002) update for VxLAN IF vxlan99(73)
2024/07/18 23:09:33.767422 ZEBRA: [TP4VP-XZ627] Del L2-VNI 102000 intf vxlan99(73)
2024/07/18 23:09:33.767858 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0
2024/07/18 23:09:33.767866 ZEBRA: [KKZGZ-8PCDW] Cannot find VNI for VID (703) IF vxlan99 for vlan state update >>>>BAIL OUT
In failure case,
- The NEWVLAN is received first even before processing RTM_NEWLINK.
- Since the vxlan id 102000 is not removed from the vxlan99,
the find with vlan id 703 returns the 102000 one and we invoke
zebra_vxlan_if_vni_up where the interfaces don't match and assert.
root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/noworking/crash.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread"
2024/07/18 22:26:43.829370 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0
2024/07/18 22:26:43.829646 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7fe07c026d30, op INTF_INSTALL, ifindex (65), result QUEUED
2024/07/18 22:26:43.853930 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0
2024/07/18 22:26:43.853949 ZEBRA: [K61WJ-XQQ3X] Intf vxlan99(73) L2-VNI 102000 is UP >>> VLAN PROCESSED BEFORE INTF EVENT
2024/07/18 22:26:43.853951 ZEBRA: [SPV50-BX2RP] RAJA zevpn_vxlanif vxlan48 and ifp vxlan99
2024/07/18 22:26:43.854005 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0
2024/07/18 22:26:43.854241 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=516, seq=0, pid=0
2024/07/18 22:26:43.854251 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=544, seq=0, pid=0
ZEBRA: in thread kernel_read scheduled from zebra/kernel_netlink.c:505 kernel_read()
Fix:
Similar to #13396, where link change
handling was offloaded to dplane, same is being done for vlan events.
Note: Prior to this change, zebra main thread was interested in the
RTNLGRP_BRVLAN. So all the kernel events pertaining to vlan was
handled by zebra main.
With this change change as well the handling of vlan events is still
with Zebra main. However we offload it via Dplane thread.
Ticket :#3878175
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
Diffstat (limited to 'zebra/zebra_vxlan.c')
| -rw-r--r-- | zebra/zebra_vxlan.c | 79 |
1 files changed, 63 insertions, 16 deletions
diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index 0f0464e486..0f72259951 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -41,8 +41,6 @@ #include "zebra/zebra_evpn_mh.h" #include "zebra/zebra_evpn_vxlan.h" #include "zebra/zebra_router.h" -#include "zebra/zebra_trace.h" -#include <linux/if_bridge.h> DEFINE_MTYPE_STATIC(ZEBRA, HOST_PREFIX, "host prefix"); DEFINE_MTYPE_STATIC(ZEBRA, ZL3VNI, "L3 VNI hash"); @@ -6249,25 +6247,26 @@ extern void zebra_evpn_init(void) hook_register(zserv_client_close, zebra_evpn_cfg_clean_up); } -const char *port_state2str(uint8_t state) +static const char *port_state2str(uint8_t state) { switch (state) { - case BR_STATE_DISABLED: + case ZEBRA_DPLANE_BR_STATE_DISABLED: return "DISABLED"; - case BR_STATE_LISTENING: + case ZEBRA_DPLANE_BR_STATE_LISTENING: return "LISTENING"; - case BR_STATE_LEARNING: + case ZEBRA_DPLANE_BR_STATE_LEARNING: return "LEARNING"; - case BR_STATE_FORWARDING: + case ZEBRA_DPLANE_BR_STATE_FORWARDING: return "FORWARDING"; - case BR_STATE_BLOCKING: + case ZEBRA_DPLANE_BR_STATE_BLOCKING: return "BLOCKING"; } return "UNKNOWN"; } -void vxlan_vni_state_change(struct zebra_if *zif, uint16_t id, uint8_t state) +static void vxlan_vni_state_change(struct zebra_if *zif, uint16_t id, + uint8_t state) { struct zebra_vxlan_vni *vnip; @@ -6282,23 +6281,23 @@ void vxlan_vni_state_change(struct zebra_if *zif, uint16_t id, uint8_t state) } switch (state) { - case BR_STATE_FORWARDING: + case ZEBRA_DPLANE_BR_STATE_FORWARDING: zebra_vxlan_if_vni_up(zif->ifp, vnip); break; - case BR_STATE_BLOCKING: + case ZEBRA_DPLANE_BR_STATE_BLOCKING: zebra_vxlan_if_vni_down(zif->ifp, vnip); break; - case BR_STATE_DISABLED: - case BR_STATE_LISTENING: - case BR_STATE_LEARNING: + case ZEBRA_DPLANE_BR_STATE_DISABLED: + case ZEBRA_DPLANE_BR_STATE_LISTENING: + case ZEBRA_DPLANE_BR_STATE_LEARNING: default: /* Not used for anything at the moment */ break; } } -void vlan_id_range_state_change(struct interface *ifp, uint16_t id_start, - uint16_t id_end, uint8_t state) +static void vlan_id_range_state_change(struct interface *ifp, uint16_t id_start, + uint16_t id_end, uint8_t state) { struct zebra_if *zif; @@ -6310,3 +6309,51 @@ void vlan_id_range_state_change(struct interface *ifp, uint16_t id_start, for (uint16_t i = id_start; i <= id_end; i++) vxlan_vni_state_change(zif, i, state); } + +void zebra_vlan_dplane_result(struct zebra_dplane_ctx *ctx) +{ + int i; + struct interface *ifp = NULL; + ns_id_t ns_id = dplane_ctx_get_ns_id(ctx); + enum dplane_op_e op = dplane_ctx_get_op(ctx); + const struct zebra_vxlan_vlan_array *vlan_array = + dplane_ctx_get_vxlan_vlan_array(ctx); + ifindex_t ifindex = dplane_ctx_get_vlan_ifindex(ctx); + + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), ifindex); + if (!ifp) { + zlog_debug("Cannot find bridge-vlan IF (%u) for vlan update", + ifindex); + return; + } + + if (!IS_ZEBRA_IF_VXLAN(ifp)) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Ignoring non-vxlan IF (%s) for vlan update", + ifp->name); + + return; + } + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Dequeuing in zebra main..%s IF %s ifindex %u NS %u", + dplane_op2str(op), ifp->name, ifindex, ns_id); + + for (i = 0; i < vlan_array->count; i++) { + vlanid_t vid = vlan_array->vlans[i].vid; + uint8_t state = vlan_array->vlans[i].state; + uint32_t vrange = vlan_array->vlans[i].vrange; + + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_VXLAN) { + if (vrange) + zlog_debug("VLANDB_ENTRY: VID (%u-%u) state=%s", + vid, vrange, port_state2str(state)); + else + zlog_debug("VLANDB_ENTRY: VID (%u) state=%s", + vid, port_state2str(state)); + } + + vlan_id_range_state_change(ifp, vid, (vrange ? vrange : vid), + state); + } +} |
