From f883c7119e76e264fb8045bf1884e436eb66527a Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Tue, 2 Jul 2024 19:02:53 +0200 Subject: [PATCH] zebra: evpn: not coerce VTEP IP to IPv4 in nh_list In L3 BGP-EVPN, if there are both IPv4 and IPv6 routes in the VPN, zebra maintains two instances of `struct zebra_neigh` object: one with IPv4 address of the nexthop, and another with IPv6 address that is an IPv4 mapped to IPv6, but only one intance of `struct zebra_mac` object, that contains a list of nexthop addresses that use this mac. The code in `zebra_vxlan` module uses the fact that the list is empty as the indication that the `zebra_mac` object is unused, and needs to be dropped. However, preexisting code used nexthop address converted to IPv4 notation for the element of this list. As a result, when two `zebra_neigh` objects, one IPv4 and one IPv6-mapped-IPv4 were linked to the `zebra_mac` object, only one element was added to the list. Consequently, when one of the two `zebra_neigh` objects was dropped, the only element in the list was removed, making it empty, and `zebra_mac` object was dropped, and neigbrour cache elements uninstalled from the kernel. As a result, after the last route in _one_ family was removed from a remote vtep, all remaining routes in the _other_ family became unreachable, because RMAC of the vtep was removed. This commit makes `zebra_mac` use uncoerced IP address of the `zebra_neigh` object for the entries in the `nh_list`. This way, `zebra_mac` object no longer loses track of `zebra_neigh` objects that need it. Bug-URL: https://github.com/FRRouting/frr/issues/16340 Signed-off-by: Eugene Crosser --- zebra/zebra_vxlan.c | 78 ++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 44 deletions(-) diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index b8c11e186a..f1ae42e320 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -1356,6 +1356,18 @@ static int zl3vni_remote_rmac_add(struct zebra_l3vni *zl3vni, { struct zebra_mac *zrmac = NULL; struct ipaddr *vtep = NULL; + struct ipaddr ipv4_vtep; + + /* vtep_ip may be v4 or v6-mapped-v4. But zrmac->fwd_info + * can only contain v4 version. So convert if needed + */ + memset(&ipv4_vtep, 0, sizeof(ipv4_vtep)); + ipv4_vtep.ipa_type = IPADDR_V4; + if (vtep_ip->ipa_type == IPADDR_V6) + ipv4_mapped_ipv6_to_ipv4(&vtep_ip->ipaddr_v6, + &(ipv4_vtep.ipaddr_v4)); + else + IPV4_ADDR_COPY(&(ipv4_vtep.ipaddr_v4), &vtep_ip->ipaddr_v4); zrmac = zl3vni_rmac_lookup(zl3vni, rmac); if (!zrmac) { @@ -1369,7 +1381,7 @@ static int zl3vni_remote_rmac_add(struct zebra_l3vni *zl3vni, return -1; } memset(&zrmac->fwd_info, 0, sizeof(zrmac->fwd_info)); - zrmac->fwd_info.r_vtep_ip = vtep_ip->ipaddr_v4; + zrmac->fwd_info.r_vtep_ip = ipv4_vtep.ipaddr_v4; vtep = XCALLOC(MTYPE_EVPN_VTEP, sizeof(struct ipaddr)); memcpy(vtep, vtep_ip, sizeof(struct ipaddr)); @@ -1383,14 +1395,14 @@ static int zl3vni_remote_rmac_add(struct zebra_l3vni *zl3vni, /* install rmac in kernel */ zl3vni_rmac_install(zl3vni, zrmac); } else if (!IPV4_ADDR_SAME(&zrmac->fwd_info.r_vtep_ip, - &vtep_ip->ipaddr_v4)) { + &(ipv4_vtep.ipaddr_v4))) { if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( "L3VNI %u Remote VTEP change(%pI4 -> %pIA) for RMAC %pEA", zl3vni->vni, &zrmac->fwd_info.r_vtep_ip, vtep_ip, rmac); - zrmac->fwd_info.r_vtep_ip = vtep_ip->ipaddr_v4; + zrmac->fwd_info.r_vtep_ip = ipv4_vtep.ipaddr_v4; vtep = XCALLOC(MTYPE_EVPN_VTEP, sizeof(struct ipaddr)); memcpy(vtep, vtep_ip, sizeof(struct ipaddr)); @@ -1410,36 +1422,29 @@ static void zl3vni_remote_rmac_del(struct zebra_l3vni *zl3vni, struct zebra_mac *zrmac, struct ipaddr *vtep_ip) { - struct ipaddr ipv4_vtep; - if (!zl3vni_nh_lookup(zl3vni, vtep_ip)) { - memset(&ipv4_vtep, 0, sizeof(ipv4_vtep)); - ipv4_vtep.ipa_type = IPADDR_V4; - if (vtep_ip->ipa_type == IPADDR_V6) - ipv4_mapped_ipv6_to_ipv4(&vtep_ip->ipaddr_v6, - &ipv4_vtep.ipaddr_v4); - else - memcpy(&(ipv4_vtep.ipaddr_v4), &vtep_ip->ipaddr_v4, - sizeof(struct in_addr)); - /* remove nh from rmac's list */ - l3vni_rmac_nh_list_nh_delete(zl3vni, zrmac, &ipv4_vtep); - /* delete nh is same as current selected, fall back to - * one present in the list - */ - if (IPV4_ADDR_SAME(&zrmac->fwd_info.r_vtep_ip, - &ipv4_vtep.ipaddr_v4) && - listcount(zrmac->nh_list)) { + l3vni_rmac_nh_list_nh_delete(zl3vni, zrmac, vtep_ip); + /* If there are remaining entries, use IPv4 from one */ + if (listcount(zrmac->nh_list)) { struct ipaddr *vtep; + struct ipaddr ipv4_vtep; vtep = listgetdata(listhead(zrmac->nh_list)); - zrmac->fwd_info.r_vtep_ip = vtep->ipaddr_v4; + memset(&ipv4_vtep, 0, sizeof(ipv4_vtep)); + ipv4_vtep.ipa_type = IPADDR_V4; + if (vtep->ipa_type == IPADDR_V6) + ipv4_mapped_ipv6_to_ipv4(&vtep->ipaddr_v6, + &(ipv4_vtep.ipaddr_v4)); + else + IPV4_ADDR_COPY(&(ipv4_vtep.ipaddr_v4), + &vtep->ipaddr_v4); + zrmac->fwd_info.r_vtep_ip = ipv4_vtep.ipaddr_v4; if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug( - "L3VNI %u Remote VTEP nh change(%pIA -> %pI4) for RMAC %pEA", - zl3vni->vni, &ipv4_vtep, - &zrmac->fwd_info.r_vtep_ip, - &zrmac->macaddr); + zlog_debug("L3VNI %u Remote VTEP nh change(%pIA -> %pI4) for RMAC %pEA", + zl3vni->vni, vtep_ip, + &zrmac->fwd_info.r_vtep_ip, + &zrmac->macaddr); /* install rmac in kernel */ zl3vni_rmac_install(zl3vni, zrmac); @@ -2531,7 +2536,6 @@ void zebra_vxlan_evpn_vrf_route_add(vrf_id_t vrf_id, const struct ethaddr *rmac, const struct prefix *host_prefix) { struct zebra_l3vni *zl3vni = NULL; - struct ipaddr ipv4_vtep; zl3vni = zl3vni_from_vrf(vrf_id); if (!zl3vni || !is_l3vni_oper_up(zl3vni)) @@ -2547,24 +2551,10 @@ void zebra_vxlan_evpn_vrf_route_add(vrf_id_t vrf_id, const struct ethaddr *rmac, svd_remote_nh_add(zl3vni, vtep_ip, rmac, host_prefix); /* - * if the remote vtep is a ipv4 mapped ipv6 address convert it to ipv4 - * address. Rmac is programmed against the ipv4 vtep because we only - * support ipv4 tunnels in the h/w right now - */ - memset(&ipv4_vtep, 0, sizeof(ipv4_vtep)); - ipv4_vtep.ipa_type = IPADDR_V4; - if (vtep_ip->ipa_type == IPADDR_V6) - ipv4_mapped_ipv6_to_ipv4(&vtep_ip->ipaddr_v6, - &(ipv4_vtep.ipaddr_v4)); - else - memcpy(&(ipv4_vtep.ipaddr_v4), &vtep_ip->ipaddr_v4, - sizeof(struct in_addr)); - - /* - * add the rmac - remote rmac to be installed is against the ipv4 + * add the rmac - remote rmac to be installed is against the * nexthop address */ - zl3vni_remote_rmac_add(zl3vni, rmac, &ipv4_vtep); + zl3vni_remote_rmac_add(zl3vni, rmac, vtep_ip); } /* handle evpn vrf route delete */ -- 2.39.5