diff options
Diffstat (limited to 'zebra')
| -rw-r--r-- | zebra/if_netlink.c | 91 | ||||
| -rw-r--r-- | zebra/interface.c | 51 | ||||
| -rw-r--r-- | zebra/ioctl.c | 180 | ||||
| -rw-r--r-- | zebra/ioctl.h | 6 | ||||
| -rw-r--r-- | zebra/ioctl_solaris.c | 71 | ||||
| -rw-r--r-- | zebra/rt.h | 7 | ||||
| -rw-r--r-- | zebra/zebra_dplane.c | 306 | ||||
| -rw-r--r-- | zebra/zebra_dplane.h | 29 | ||||
| -rw-r--r-- | zebra/zebra_l2.c | 18 | ||||
| -rw-r--r-- | zebra/zebra_l2.h | 1 | ||||
| -rw-r--r-- | zebra/zebra_rib.c | 175 | ||||
| -rw-r--r-- | zebra/zebra_rnh.c | 31 | ||||
| -rw-r--r-- | zebra/zebra_vrf.h | 3 | ||||
| -rw-r--r-- | zebra/zebra_vxlan.c | 388 | ||||
| -rw-r--r-- | zebra/zebra_vxlan.h | 8 | ||||
| -rw-r--r-- | zebra/zebra_vxlan_private.h | 29 | ||||
| -rw-r--r-- | zebra/zserv.c | 2 | ||||
| -rw-r--r-- | zebra/zserv.h | 2 |
18 files changed, 1069 insertions, 329 deletions
diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c index b2f470bc8d..ba518ea576 100644 --- a/zebra/if_netlink.c +++ b/zebra/if_netlink.c @@ -481,6 +481,11 @@ static int netlink_extract_vxlan_info(struct rtattr *link_data, vxl_info->vtep_ip = vtep_ip_in_msg; } + if (attr[IFLA_VXLAN_GROUP]) { + vxl_info->mcast_grp = + *(struct in_addr *)RTA_DATA(attr[IFLA_VXLAN_GROUP]); + } + return 0; } @@ -834,11 +839,12 @@ int kernel_interface_set_master(struct interface *master, } /* Interface address modification. */ -static int netlink_address(int cmd, int family, struct interface *ifp, - struct connected *ifc) +static int netlink_address_ctx(const struct zebra_dplane_ctx *ctx) { int bytelen; - struct prefix *p; + const struct prefix *p; + int cmd; + const char *label; struct { struct nlmsghdr n; @@ -846,72 +852,59 @@ static int netlink_address(int cmd, int family, struct interface *ifp, char buf[NL_PKT_BUF_SIZE]; } req; - struct zebra_ns *zns; - - if (vrf_is_backend_netns()) - zns = zebra_ns_lookup((ns_id_t)ifp->vrf_id); - else - zns = zebra_ns_lookup(NS_DEFAULT); - p = ifc->address; - memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE); + p = dplane_ctx_get_intf_addr(ctx); + memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE); - bytelen = (family == AF_INET ? 4 : 16); + bytelen = (p->family == AF_INET ? 4 : 16); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)); req.n.nlmsg_flags = NLM_F_REQUEST; - req.n.nlmsg_type = cmd; - req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; - req.ifa.ifa_family = family; + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_INSTALL) + cmd = RTM_NEWADDR; + else + cmd = RTM_DELADDR; + + req.n.nlmsg_type = cmd; + req.ifa.ifa_family = p->family; - req.ifa.ifa_index = ifp->ifindex; + req.ifa.ifa_index = dplane_ctx_get_ifindex(ctx); - addattr_l(&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen); + addattr_l(&req.n, sizeof(req), IFA_LOCAL, &p->u.prefix, bytelen); - if (family == AF_INET) { - if (CONNECTED_PEER(ifc)) { - p = ifc->destination; - addattr_l(&req.n, sizeof req, IFA_ADDRESS, &p->u.prefix, - bytelen); - } else if (cmd == RTM_NEWADDR && ifc->destination) { - p = ifc->destination; - addattr_l(&req.n, sizeof req, IFA_BROADCAST, + if (p->family == AF_INET) { + if (dplane_ctx_intf_is_connected(ctx)) { + p = dplane_ctx_get_intf_dest(ctx); + addattr_l(&req.n, sizeof(req), IFA_ADDRESS, + &p->u.prefix, bytelen); + } else if (cmd == RTM_NEWADDR && + dplane_ctx_intf_has_dest(ctx)) { + p = dplane_ctx_get_intf_dest(ctx); + addattr_l(&req.n, sizeof(req), IFA_BROADCAST, &p->u.prefix, bytelen); } } - /* p is now either ifc->address or ifc->destination */ + /* p is now either address or destination/bcast addr */ req.ifa.ifa_prefixlen = p->prefixlen; - if (CHECK_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY)) + if (dplane_ctx_intf_is_secondary(ctx)) SET_FLAG(req.ifa.ifa_flags, IFA_F_SECONDARY); - if (ifc->label) - addattr_l(&req.n, sizeof req, IFA_LABEL, ifc->label, - strlen(ifc->label) + 1); - - return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, - 0); -} - -int kernel_address_add_ipv4(struct interface *ifp, struct connected *ifc) -{ - return netlink_address(RTM_NEWADDR, AF_INET, ifp, ifc); -} - -int kernel_address_delete_ipv4(struct interface *ifp, struct connected *ifc) -{ - return netlink_address(RTM_DELADDR, AF_INET, ifp, ifc); -} + if (dplane_ctx_intf_has_label(ctx)) { + label = dplane_ctx_get_intf_label(ctx); + addattr_l(&req.n, sizeof(req), IFA_LABEL, label, + strlen(label) + 1); + } -int kernel_address_add_ipv6(struct interface *ifp, struct connected *ifc) -{ - return netlink_address(RTM_NEWADDR, AF_INET6, ifp, ifc); + return netlink_talk_info(netlink_talk_filter, &req.n, + dplane_ctx_get_ns(ctx), 0); } -int kernel_address_delete_ipv6(struct interface *ifp, struct connected *ifc) +enum zebra_dplane_result kernel_address_update_ctx(struct zebra_dplane_ctx *ctx) { - return netlink_address(RTM_DELADDR, AF_INET6, ifp, ifc); + return (netlink_address_ctx(ctx) == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); } int netlink_interface_addr(struct nlmsghdr *h, ns_id_t ns_id, int startup) diff --git a/zebra/interface.c b/zebra/interface.c index 229f9c1da4..10f1f92100 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -441,7 +441,7 @@ static void if_addr_wakeup(struct interface *ifp) struct listnode *node, *nnode; struct connected *ifc; struct prefix *p; - int ret; + enum zebra_dplane_result dplane_res; for (ALL_LIST_ELEMENTS(ifp->connected, node, nnode, ifc)) { p = ifc->address; @@ -479,12 +479,13 @@ static void if_addr_wakeup(struct interface *ifp) if_refresh(ifp); } - ret = if_set_prefix(ifp, ifc); - if (ret < 0) { + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == + ZEBRA_DPLANE_REQUEST_FAILURE) { flog_err_sys( EC_ZEBRA_IFACE_ADDR_ADD_FAILED, "Can't set interface's address: %s", - safe_strerror(errno)); + dplane_res2str(dplane_res)); continue; } @@ -502,12 +503,14 @@ static void if_addr_wakeup(struct interface *ifp) if_refresh(ifp); } - ret = if_prefix_add_ipv6(ifp, ifc); - if (ret < 0) { + + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == + ZEBRA_DPLANE_REQUEST_FAILURE) { flog_err_sys( EC_ZEBRA_IFACE_ADDR_ADD_FAILED, "Can't set interface's address: %s", - safe_strerror(errno)); + dplane_res2str(dplane_res)); continue; } @@ -1369,8 +1372,11 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp) vty_out(vty, " VTEP IP: %s", inet_ntoa(vxlan_info->vtep_ip)); if (vxlan_info->access_vlan) - vty_out(vty, " Access VLAN Id %u", + vty_out(vty, " Access VLAN Id %u\n", vxlan_info->access_vlan); + if (vxlan_info->mcast_grp.s_addr != INADDR_ANY) + vty_out(vty, " Mcast Group %s", + inet_ntoa(vxlan_info->mcast_grp)); vty_out(vty, "\n"); } @@ -2626,6 +2632,7 @@ static int ip_address_install(struct vty *vty, struct interface *ifp, struct connected *ifc; struct prefix_ipv4 *p; int ret; + enum zebra_dplane_result dplane_res; if_data = ifp->info; @@ -2699,10 +2706,10 @@ static int ip_address_install(struct vty *vty, struct interface *ifp, if_refresh(ifp); } - ret = if_set_prefix(ifp, ifc); - if (ret < 0) { + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { vty_out(vty, "%% Can't set interface IP address: %s.\n", - safe_strerror(errno)); + dplane_res2str(dplane_res)); return CMD_WARNING_CONFIG_FAILED; } @@ -2723,6 +2730,7 @@ static int ip_address_uninstall(struct vty *vty, struct interface *ifp, struct prefix_ipv4 lp, pp; struct connected *ifc; int ret; + enum zebra_dplane_result dplane_res; /* Convert to prefix structure. */ ret = str2prefix_ipv4(addr_str, &lp); @@ -2767,10 +2775,10 @@ static int ip_address_uninstall(struct vty *vty, struct interface *ifp, } /* This is real route. */ - ret = if_unset_prefix(ifp, ifc); - if (ret < 0) { + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { vty_out(vty, "%% Can't unset interface IP address: %s.\n", - safe_strerror(errno)); + dplane_res2str(dplane_res)); return CMD_WARNING_CONFIG_FAILED; } UNSET_FLAG(ifc->conf, ZEBRA_IFC_QUEUED); @@ -2877,6 +2885,7 @@ static int ipv6_address_install(struct vty *vty, struct interface *ifp, struct connected *ifc; struct prefix_ipv6 *p; int ret; + enum zebra_dplane_result dplane_res; if_data = ifp->info; @@ -2923,11 +2932,10 @@ static int ipv6_address_install(struct vty *vty, struct interface *ifp, if_refresh(ifp); } - ret = if_prefix_add_ipv6(ifp, ifc); - - if (ret < 0) { + dplane_res = dplane_intf_addr_set(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { vty_out(vty, "%% Can't set interface IP address: %s.\n", - safe_strerror(errno)); + dplane_res2str(dplane_res)); return CMD_WARNING_CONFIG_FAILED; } @@ -2961,6 +2969,7 @@ static int ipv6_address_uninstall(struct vty *vty, struct interface *ifp, struct prefix_ipv6 cp; struct connected *ifc; int ret; + enum zebra_dplane_result dplane_res; /* Convert to prefix structure. */ ret = str2prefix_ipv6(addr_str, &cp); @@ -2991,10 +3000,10 @@ static int ipv6_address_uninstall(struct vty *vty, struct interface *ifp, } /* This is real route. */ - ret = if_prefix_delete_ipv6(ifp, ifc); - if (ret < 0) { + dplane_res = dplane_intf_addr_unset(ifp, ifc); + if (dplane_res == ZEBRA_DPLANE_REQUEST_FAILURE) { vty_out(vty, "%% Can't unset interface IP address: %s.\n", - safe_strerror(errno)); + dplane_res2str(dplane_res)); return CMD_WARNING_CONFIG_FAILED; } diff --git a/zebra/ioctl.c b/zebra/ioctl.c index 9499c731ef..322527015b 100644 --- a/zebra/ioctl.c +++ b/zebra/ioctl.c @@ -34,6 +34,7 @@ #include "zebra/rt.h" #include "zebra/interface.h" #include "zebra/zebra_errors.h" +#include "zebra/debug.h" #ifndef SUNOS_5 @@ -180,40 +181,72 @@ void if_get_mtu(struct interface *ifp) #endif } -#ifdef HAVE_NETLINK -/* Interface address setting via netlink interface. */ -int if_set_prefix(struct interface *ifp, struct connected *ifc) -{ - return kernel_address_add_ipv4(ifp, ifc); -} +/* + * Handler for interface address programming via the zebra dplane, + * for non-netlink platforms. This handler dispatches to per-platform + * helpers, based on the operation requested. + */ +#ifndef HAVE_NETLINK -/* Interface address is removed using netlink interface. */ -int if_unset_prefix(struct interface *ifp, struct connected *ifc) +/* Prototypes: these are placed in this block so that they're only seen + * on non-netlink platforms. + */ +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx); + +enum zebra_dplane_result kernel_address_update_ctx( + struct zebra_dplane_ctx *ctx) { - return kernel_address_delete_ipv4(ifp, ifc); + int ret = -1; + const struct prefix *p; + + p = dplane_ctx_get_intf_addr(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_INSTALL) { + if (p->family == AF_INET) + ret = if_set_prefix_ctx(ctx); + else + ret = if_set_prefix6_ctx(ctx); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_UNINSTALL) { + if (p->family == AF_INET) + ret = if_unset_prefix_ctx(ctx); + else + ret = if_unset_prefix6_ctx(ctx); + } else { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Invalid op in interface-addr install"); + } + + return (ret == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); } + +#endif /* !HAVE_NETLINK */ + +#ifdef HAVE_NETLINK + +/* TODO -- remove; no use of these apis with netlink any longer */ + #else /* ! HAVE_NETLINK */ #ifdef HAVE_STRUCT_IFALIASREQ -/* Set up interface's IP address, netmask (and broadcas? ). *BSD may - has ifaliasreq structure. */ -int if_set_prefix(struct interface *ifp, struct connected *ifc) + +/* + * Helper for interface-addr install, non-netlink + */ +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct ifaliasreq addreq; struct sockaddr_in addr, mask, peer; struct prefix_ipv4 *p; - /* don't configure PtP addresses on broadcast ifs or reverse */ - if (!(ifp->flags & IFF_POINTOPOINT) != !CONNECTED_PEER(ifc)) { - errno = EINVAL; - return -1; - } + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); - p = (struct prefix_ipv4 *)ifc->address; - rib_lookup_and_pushup(p, ifp->vrf_id); - - memset(&addreq, 0, sizeof addreq); - strlcpy(addreq.ifra_name, ifp->name, sizeof(addreq.ifra_name)); + memset(&addreq, 0, sizeof(addreq)); + strncpy((char *)&addreq.ifra_name, dplane_ctx_get_ifname(ctx), + sizeof(addreq.ifra_name)); memset(&addr, 0, sizeof(struct sockaddr_in)); addr.sin_addr = p->prefix; @@ -223,8 +256,8 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) #endif memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in)); - if (CONNECTED_PEER(ifc)) { - p = (struct prefix_ipv4 *)ifc->destination; + if (dplane_ctx_intf_is_connected(ctx)) { + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_dest(ctx); memset(&mask, 0, sizeof(struct sockaddr_in)); peer.sin_addr = p->prefix; peer.sin_family = p->family; @@ -247,27 +280,24 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) if (ret < 0) return ret; return 0; + } -/* Set up interface's IP address, netmask (and broadcas? ). *BSD may - has ifaliasreq structure. */ -int if_unset_prefix(struct interface *ifp, struct connected *ifc) +/* + * Helper for interface-addr un-install, non-netlink + */ +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct ifaliasreq addreq; struct sockaddr_in addr, mask, peer; struct prefix_ipv4 *p; - /* this would probably wreak havoc */ - if (!(ifp->flags & IFF_POINTOPOINT) != !CONNECTED_PEER(ifc)) { - errno = EINVAL; - return -1; - } + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); - p = (struct prefix_ipv4 *)ifc->address; - - memset(&addreq, 0, sizeof addreq); - strlcpy(addreq.ifra_name, ifp->name, sizeof(addreq.ifra_name)); + memset(&addreq, 0, sizeof(addreq)); + strncpy((char *)&addreq.ifra_name, dplane_ctx_get_ifname(ctx), + sizeof(addreq.ifra_name)); memset(&addr, 0, sizeof(struct sockaddr_in)); addr.sin_addr = p->prefix; @@ -277,8 +307,8 @@ int if_unset_prefix(struct interface *ifp, struct connected *ifc) #endif memcpy(&addreq.ifra_addr, &addr, sizeof(struct sockaddr_in)); - if (CONNECTED_PEER(ifc)) { - p = (struct prefix_ipv4 *)ifc->destination; + if (dplane_ctx_intf_is_connected(ctx)) { + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_dest(ctx); memset(&mask, 0, sizeof(struct sockaddr_in)); peer.sin_addr = p->prefix; peer.sin_family = p->family; @@ -305,7 +335,7 @@ int if_unset_prefix(struct interface *ifp, struct connected *ifc) #else /* Set up interface's address, netmask (and broadcas? ). Linux or Solaris uses ifname:number semantics to set IP address aliases. */ -int if_set_prefix(struct interface *ifp, struct connected *ifc) +int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct ifreq ifreq; @@ -315,11 +345,12 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) struct prefix_ipv4 ifaddr; struct prefix_ipv4 *p; - p = (struct prefix_ipv4 *)ifc->address; + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); ifaddr = *p; - ifreq_set_name(&ifreq, ifp); + strlcpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); addr.sin_addr = p->prefix; addr.sin_family = p->family; @@ -331,7 +362,7 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) /* We need mask for make broadcast addr. */ masklen2ip(p->prefixlen, &mask.sin_addr); - if (if_is_broadcast(ifp)) { + if (dplane_ctx_intf_is_broadcast(ctx)) { apply_mask_ipv4(&ifaddr); addr.sin_addr = ifaddr.prefix; @@ -350,7 +381,7 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) #ifdef SUNOS_5 memcpy(&mask, &ifreq.ifr_addr, sizeof(mask)); #else - memcpy(&ifreq.ifr_netmask, &mask, sizeof(struct sockaddr_in)); + memcpy(&ifreq.ifr_addr, &mask, sizeof(struct sockaddr_in)); #endif /* SUNOS5 */ ret = if_ioctl(SIOCSIFNETMASK, (caddr_t)&ifreq); if (ret < 0) @@ -361,16 +392,17 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) /* Set up interface's address, netmask (and broadcas? ). Linux or Solaris uses ifname:number semantics to set IP address aliases. */ -int if_unset_prefix(struct interface *ifp, struct connected *ifc) +int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct ifreq ifreq; struct sockaddr_in addr; struct prefix_ipv4 *p; - p = (struct prefix_ipv4 *)ifc->address; + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); - ifreq_set_name(&ifreq, ifp); + strlcpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); memset(&addr, 0, sizeof(struct sockaddr_in)); addr.sin_family = p->family; @@ -475,35 +507,17 @@ int if_unset_flags(struct interface *ifp, uint64_t flags) return 0; } -#ifdef LINUX_IPV6 -#ifndef _LINUX_IN6_H -/* linux/include/net/ipv6.h */ -struct in6_ifreq { - struct in6_addr ifr6_addr; - uint32_t ifr6_prefixlen; - int ifr6_ifindex; -}; -#endif /* _LINUX_IN6_H */ -/* Interface's address add/delete functions. */ -int if_prefix_add_ipv6(struct interface *ifp, struct connected *ifc) -{ -#ifdef HAVE_NETLINK - return kernel_address_add_ipv6(ifp, ifc); -#endif /* HAVE_NETLINK */ -} +#ifndef LINUX_IPV6 /* Netlink has its own code */ -int if_prefix_delete_ipv6(struct interface *ifp, struct connected *ifc) -{ -#ifdef HAVE_NETLINK - return kernel_address_delete_ipv6(ifp, ifc); -#endif /* HAVE_NETLINK */ -} -#else /* LINUX_IPV6 */ #ifdef HAVE_STRUCT_IN6_ALIASREQ #ifndef ND6_INFINITE_LIFETIME #define ND6_INFINITE_LIFETIME 0xffffffffL #endif /* ND6_INFINITE_LIFETIME */ -int if_prefix_add_ipv6(struct interface *ifp, struct connected *ifc) + +/* + * Helper for interface-addr install, non-netlink + */ +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct in6_aliasreq addreq; @@ -511,10 +525,11 @@ int if_prefix_add_ipv6(struct interface *ifp, struct connected *ifc) struct sockaddr_in6 mask; struct prefix_ipv6 *p; - p = (struct prefix_ipv6 *)ifc->address; + p = (struct prefix_ipv6 *)dplane_ctx_get_intf_addr(ctx); - memset(&addreq, 0, sizeof addreq); - strlcpy(addreq.ifra_name, ifp->name, sizeof(addreq.ifra_name)); + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, + dplane_ctx_get_ifname(ctx), sizeof(addreq.ifra_name)); memset(&addr, 0, sizeof(struct sockaddr_in6)); addr.sin6_addr = p->prefix; @@ -546,7 +561,10 @@ int if_prefix_add_ipv6(struct interface *ifp, struct connected *ifc) return 0; } -int if_prefix_delete_ipv6(struct interface *ifp, struct connected *ifc) +/* + * Helper for interface-addr un-install, non-netlink + */ +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct in6_aliasreq addreq; @@ -554,10 +572,11 @@ int if_prefix_delete_ipv6(struct interface *ifp, struct connected *ifc) struct sockaddr_in6 mask; struct prefix_ipv6 *p; - p = (struct prefix_ipv6 *)ifc->address; + p = (struct prefix_ipv6 *)dplane_ctx_get_intf_addr(ctx); - memset(&addreq, 0, sizeof addreq); - strlcpy(addreq.ifra_name, ifp->name, sizeof(addreq.ifra_name)); + memset(&addreq, 0, sizeof(addreq)); + strlcpy((char *)&addreq.ifra_name, + dplane_ctx_get_ifname(ctx), sizeof(addreq.ifra_name)); memset(&addr, 0, sizeof(struct sockaddr_in6)); addr.sin6_addr = p->prefix; @@ -586,12 +605,15 @@ int if_prefix_delete_ipv6(struct interface *ifp, struct connected *ifc) return 0; } #else -int if_prefix_add_ipv6(struct interface *ifp, struct connected *ifc) +/* The old, pre-dataplane code here just returned, so we're retaining that + * choice. + */ +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx) { return 0; } -int if_prefix_delete_ipv6(struct interface *ifp, struct connected *ifc) +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx) { return 0; } diff --git a/zebra/ioctl.h b/zebra/ioctl.h index 67ffd45a08..03f3911d52 100644 --- a/zebra/ioctl.h +++ b/zebra/ioctl.h @@ -35,15 +35,9 @@ extern int if_set_flags(struct interface *, uint64_t); extern int if_unset_flags(struct interface *, uint64_t); extern void if_get_flags(struct interface *); -extern int if_set_prefix(struct interface *, struct connected *); -extern int if_unset_prefix(struct interface *, struct connected *); - extern void if_get_metric(struct interface *); extern void if_get_mtu(struct interface *); -extern int if_prefix_add_ipv6(struct interface *, struct connected *); -extern int if_prefix_delete_ipv6(struct interface *, struct connected *); - #ifdef SOLARIS_IPV6 extern int if_ioctl_ipv6(unsigned long, caddr_t); extern struct connected *if_lookup_linklocal(struct interface *); diff --git a/zebra/ioctl_solaris.c b/zebra/ioctl_solaris.c index c523ee983d..ccfa7a4a4c 100644 --- a/zebra/ioctl_solaris.c +++ b/zebra/ioctl_solaris.c @@ -38,9 +38,16 @@ #include "zebra/interface.h" #include "zebra/ioctl_solaris.h" #include "zebra/zebra_errors.h" +#include "zebra/debug.h" extern struct zebra_privs_t zserv_privs; +/* Prototypes */ +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx); +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx); +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx); + /* clear and set interface name string */ void lifreq_set_name(struct lifreq *lifreq, const char *ifname) { @@ -183,23 +190,52 @@ void if_get_mtu(struct interface *ifp) zebra_interface_up_update(ifp); } +/* + * + */ +enum zebra_dplane_result kernel_address_update_ctx( + struct zebra_dplane_ctx *ctx) +{ + int ret = -1; + const struct prefix *p; + + p = dplane_ctx_get_intf_addr(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_INSTALL) { + if (p->family == AF_INET) + ret = if_set_prefix_ctx(ctx); + else + ret = if_set_prefix6_ctx(ctx); + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ADDR_UNINSTALL) { + if (p->family == AF_INET) + ret = if_unset_prefix_ctx(ctx); + else + ret = if_unset_prefix6_ctx(ctx); + } else { + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Invalid op in interface-addr install"); + } + + return (ret == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); +} + /* Set up interface's address, netmask (and broadcast? ). Solaris uses ifname:number semantics to set IP address aliases. */ -int if_set_prefix(struct interface *ifp, struct connected *ifc) +static int if_set_prefix_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct ifreq ifreq; - struct sockaddr_in addr; - struct sockaddr_in broad; - struct sockaddr_in mask; + struct sockaddr_in addr, broad, mask; struct prefix_ipv4 ifaddr; struct prefix_ipv4 *p; - p = (struct prefix_ipv4 *)ifc->address; + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); ifaddr = *p; - strlcpy(ifreq.ifr_name, ifp->name, sizeof(ifreq.ifr_name)); + strlcpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); addr.sin_addr = p->prefix; addr.sin_family = p->family; @@ -213,7 +249,7 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) /* We need mask for make broadcast addr. */ masklen2ip(p->prefixlen, &mask.sin_addr); - if (if_is_broadcast(ifp)) { + if (dplane_ctx_intf_is_broadcast(ctx)) { apply_mask_ipv4(&ifaddr); addr.sin_addr = ifaddr.prefix; @@ -241,16 +277,17 @@ int if_set_prefix(struct interface *ifp, struct connected *ifc) /* Set up interface's address, netmask (and broadcast). Solaris uses ifname:number semantics to set IP address aliases. */ -int if_unset_prefix(struct interface *ifp, struct connected *ifc) +static int if_unset_prefix_ctx(const struct zebra_dplane_ctx *ctx) { int ret; struct ifreq ifreq; struct sockaddr_in addr; struct prefix_ipv4 *p; - p = (struct prefix_ipv4 *)ifc->address; + p = (struct prefix_ipv4 *)dplane_ctx_get_intf_addr(ctx); - strlcpy(ifreq.ifr_name, ifp->name, sizeof(ifreq.ifr_name)); + strncpy(ifreq.ifr_name, dplane_ctx_get_ifname(ctx), + sizeof(ifreq.ifr_name)); memset(&addr, 0, sizeof(struct sockaddr_in)); addr.sin_family = p->family; @@ -377,24 +414,26 @@ int if_unset_flags(struct interface *ifp, uint64_t flags) } /* Interface's address add/delete functions. */ -int if_prefix_add_ipv6(struct interface *ifp, struct connected *ifc) +static int if_set_prefix6_ctx(const struct zebra_dplane_ctx *ctx) { char addrbuf[PREFIX_STRLEN]; + prefix2str(dplane_ctx_get_intf_addr(ctx), addrbuf, sizeof(addrbuf)); + flog_warn(EC_LIB_DEVELOPMENT, "Can't set %s on interface %s", - prefix2str(ifc->address, addrbuf, sizeof(addrbuf)), - ifp->name); + addrbuf, dplane_ctx_get_ifname(ctx)); return 0; } -int if_prefix_delete_ipv6(struct interface *ifp, struct connected *ifc) +static int if_unset_prefix6_ctx(const struct zebra_dplane_ctx *ctx) { char addrbuf[PREFIX_STRLEN]; + prefix2str(dplane_ctx_get_intf_addr(ctx), addrbuf, sizeof(addrbuf)); + flog_warn(EC_LIB_DEVELOPMENT, "Can't delete %s on interface %s", - prefix2str(ifc->address, addrbuf, sizeof(addrbuf)), - ifp->name); + addrbuf, dplane_ctx_get_ifname(ctx)); return 0; } diff --git a/zebra/rt.h b/zebra/rt.h index 2c77af2aad..08b51fcc0b 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -50,10 +50,9 @@ extern enum zebra_dplane_result kernel_lsp_update( enum zebra_dplane_result kernel_pw_update(struct zebra_dplane_ctx *ctx); -extern int kernel_address_add_ipv4(struct interface *, struct connected *); -extern int kernel_address_delete_ipv4(struct interface *, struct connected *); -extern int kernel_address_add_ipv6(struct interface *, struct connected *); -extern int kernel_address_delete_ipv6(struct interface *, struct connected *); +enum zebra_dplane_result kernel_address_update_ctx( + struct zebra_dplane_ctx *ctx); + extern int kernel_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla, int llalen, ns_id_t ns_id); extern int kernel_interface_set_master(struct interface *master, diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index af54c3b5c7..d1b28227c3 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -122,6 +122,33 @@ struct dplane_pw_info { }; /* + * Interface/prefix info for the dataplane + */ +struct dplane_intf_info { + + char ifname[INTERFACE_NAMSIZ]; + ifindex_t ifindex; + + uint32_t metric; + uint32_t flags; + +#define DPLANE_INTF_CONNECTED (1 << 0) /* Connected peer, p2p */ +#define DPLANE_INTF_SECONDARY (1 << 1) +#define DPLANE_INTF_BROADCAST (1 << 2) +#define DPLANE_INTF_HAS_DEST (1 << 3) +#define DPLANE_INTF_HAS_LABEL (1 << 4) + + /* Interface address/prefix */ + struct prefix prefix; + + /* Dest address, for p2p, or broadcast prefix */ + struct prefix dest_prefix; + + char *label; + char label_buf[32]; +}; + +/* * The context block used to exchange info about route updates across * the boundary between the zebra main context (and pthread) and the * dataplane layer (and pthread). @@ -152,11 +179,12 @@ struct zebra_dplane_ctx { vrf_id_t zd_vrf_id; uint32_t zd_table_id; - /* Support info for either route or LSP update */ + /* Support info for different kinds of updates */ union { struct dplane_route_info rinfo; zebra_lsp_t lsp; struct dplane_pw_info pw; + struct dplane_intf_info intf; } u; /* Namespace info, used especially for netlink kernel communication */ @@ -266,6 +294,9 @@ static struct zebra_dplane_globals { _Atomic uint32_t dg_pws_in; _Atomic uint32_t dg_pw_errors; + _Atomic uint32_t dg_intf_addrs_in; + _Atomic uint32_t dg_intf_addr_errors; + _Atomic uint32_t dg_update_yields; /* Dataplane pthread */ @@ -303,6 +334,9 @@ static enum zebra_dplane_result lsp_update_internal(zebra_lsp_t *lsp, enum dplane_op_e op); static enum zebra_dplane_result pw_update_internal(struct zebra_pw *pw, enum dplane_op_e op); +static enum zebra_dplane_result intf_addr_update_internal( + const struct interface *ifp, const struct connected *ifc, + enum dplane_op_e op); /* * Public APIs @@ -409,6 +443,16 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) } break; + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + /* Maybe free label string, if allocated */ + if ((*pctx)->u.intf.label != NULL && + (*pctx)->u.intf.label != (*pctx)->u.intf.label_buf) { + free((*pctx)->u.intf.label); + (*pctx)->u.intf.label = NULL; + } + break; + case DPLANE_OP_NONE: break; } @@ -549,6 +593,14 @@ const char *dplane_op2str(enum dplane_op_e op) case DPLANE_OP_SYS_ROUTE_DELETE: ret = "SYS_ROUTE_DEL"; break; + + case DPLANE_OP_ADDR_INSTALL: + ret = "ADDR_INSTALL"; + break; + case DPLANE_OP_ADDR_UNINSTALL: + ret = "ADDR_UNINSTALL"; + break; + } return ret; @@ -868,6 +920,90 @@ dplane_ctx_get_pw_nhg(const struct zebra_dplane_ctx *ctx) return &(ctx->u.pw.nhg); } +/* Accessors for interface information */ +const char *dplane_ctx_get_ifname(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.ifname; +} + +ifindex_t dplane_ctx_get_ifindex(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.ifindex; +} + +uint32_t dplane_ctx_get_intf_metric(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.metric; +} + +/* Is interface addr p2p? */ +bool dplane_ctx_intf_is_connected(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_CONNECTED); +} + +bool dplane_ctx_intf_is_secondary(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_SECONDARY); +} + +bool dplane_ctx_intf_is_broadcast(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_BROADCAST); +} + +const struct prefix *dplane_ctx_get_intf_addr( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.intf.prefix); +} + +bool dplane_ctx_intf_has_dest(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_HAS_DEST); +} + +const struct prefix *dplane_ctx_get_intf_dest( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + if (ctx->u.intf.flags & DPLANE_INTF_HAS_DEST) + return &(ctx->u.intf.dest_prefix); + else + return NULL; +} + +bool dplane_ctx_intf_has_label(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return (ctx->u.intf.flags & DPLANE_INTF_HAS_LABEL); +} + +const char *dplane_ctx_get_intf_label(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.intf.label; +} + /* * End of dplane context accessors */ @@ -1494,6 +1630,140 @@ done: } /* + * Enqueue interface address add for the dataplane. + */ +enum zebra_dplane_result dplane_intf_addr_set(const struct interface *ifp, + const struct connected *ifc) +{ +#if !defined(HAVE_NETLINK) && defined(HAVE_STRUCT_IFALIASREQ) + /* Extra checks for this OS path. */ + + /* Don't configure PtP addresses on broadcast ifs or reverse */ + if (!(ifp->flags & IFF_POINTOPOINT) != !CONNECTED_PEER(ifc)) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Failed to set intf addr: mismatch p2p and connected"); + + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + /* Ensure that no existing installed v4 route conflicts with + * the new interface prefix. This check must be done in the + * zebra pthread context, and any route delete (if needed) + * is enqueued before the interface address programming attempt. + */ + if (ifc->address->family == AF_INET) { + struct prefix_ipv4 *p; + + p = (struct prefix_ipv4 *)ifc->address; + rib_lookup_and_pushup(p, ifp->vrf_id); + } +#endif + + return intf_addr_update_internal(ifp, ifc, DPLANE_OP_ADDR_INSTALL); +} + +/* + * Enqueue interface address remove/uninstall for the dataplane. + */ +enum zebra_dplane_result dplane_intf_addr_unset(const struct interface *ifp, + const struct connected *ifc) +{ + return intf_addr_update_internal(ifp, ifc, DPLANE_OP_ADDR_UNINSTALL); +} + +static enum zebra_dplane_result intf_addr_update_internal( + const struct interface *ifp, const struct connected *ifc, + enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + struct zebra_ns *zns; + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + char addr_str[PREFIX_STRLEN]; + + prefix2str(ifc->address, addr_str, sizeof(addr_str)); + + zlog_debug("init intf ctx %s: idx %d, addr %u:%s", + dplane_op2str(op), ifp->ifindex, ifp->vrf_id, + addr_str); + } + + ctx = dplane_ctx_alloc(); + if (ctx == NULL) { + ret = ENOMEM; + goto done; + } + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + ctx->zd_vrf_id = ifp->vrf_id; + + zns = zebra_ns_lookup(ifp->vrf_id); + dplane_ctx_ns_init(ctx, zns, false); + + /* Init the interface-addr-specific area */ + memset(&ctx->u.intf, 0, sizeof(ctx->u.intf)); + + strncpy(ctx->u.intf.ifname, ifp->name, sizeof(ctx->u.intf.ifname)); + ctx->u.intf.ifindex = ifp->ifindex; + ctx->u.intf.prefix = *(ifc->address); + + if (if_is_broadcast(ifp)) + ctx->u.intf.flags |= DPLANE_INTF_BROADCAST; + + if (CONNECTED_PEER(ifc)) { + ctx->u.intf.dest_prefix = *(ifc->destination); + ctx->u.intf.flags |= + (DPLANE_INTF_CONNECTED | DPLANE_INTF_HAS_DEST); + } else if (ifc->destination) { + ctx->u.intf.dest_prefix = *(ifc->destination); + ctx->u.intf.flags |= DPLANE_INTF_HAS_DEST; + } + + if (CHECK_FLAG(ifc->flags, ZEBRA_IFA_SECONDARY)) + ctx->u.intf.flags |= DPLANE_INTF_SECONDARY; + + if (ifc->label) { + size_t len; + + ctx->u.intf.flags |= DPLANE_INTF_HAS_LABEL; + + /* Use embedded buffer if it's adequate; else allocate. */ + len = strlen(ifc->label); + + if (len < sizeof(ctx->u.intf.label_buf)) { + strncpy(ctx->u.intf.label_buf, ifc->label, + sizeof(ctx->u.intf.label_buf)); + ctx->u.intf.label = ctx->u.intf.label_buf; + } else { + ctx->u.intf.label = strdup(ifc->label); + } + } + + ret = dplane_route_enqueue(ctx); + +done: + + /* Increment counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_intf_addrs_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else { + /* Error counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_intf_addr_errors, + 1, memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +/* * Handler for 'show dplane' */ int dplane_show_helper(struct vty *vty, bool detailed) @@ -1877,6 +2147,35 @@ kernel_dplane_route_update(struct zebra_dplane_ctx *ctx) } /* + * Handler for kernel-facing interface address updates + */ +static enum zebra_dplane_result +kernel_dplane_address_update(struct zebra_dplane_ctx *ctx) +{ + enum zebra_dplane_result res; + + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + char dest_str[PREFIX_STRLEN]; + + prefix2str(dplane_ctx_get_intf_addr(ctx), dest_str, + sizeof(dest_str)); + + zlog_debug("Dplane intf %s, idx %u, addr %s", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_ifindex(ctx), dest_str); + } + + res = kernel_address_update_ctx(ctx); + + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_intf_addr_errors, + 1, memory_order_relaxed); + + return res; +} + +/* * Kernel provider callback */ static int kernel_dplane_process_func(struct zebra_dplane_provider *prov) @@ -1925,6 +2224,11 @@ static int kernel_dplane_process_func(struct zebra_dplane_provider *prov) res = kernel_dplane_pw_update(ctx); break; + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + res = kernel_dplane_address_update(ctx); + break; + /* Ignore system 'notifications' - the kernel already knows */ case DPLANE_OP_SYS_ROUTE_ADD: case DPLANE_OP_SYS_ROUTE_DELETE: diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 4e089bc66b..d45628fdd0 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -118,6 +118,10 @@ enum dplane_op_e { /* System route notification */ DPLANE_OP_SYS_ROUTE_ADD, DPLANE_OP_SYS_ROUTE_DELETE, + + /* Interface address update */ + DPLANE_OP_ADDR_INSTALL, + DPLANE_OP_ADDR_UNINSTALL, }; /* Enable system route notifications */ @@ -234,6 +238,22 @@ const union pw_protocol_fields *dplane_ctx_get_pw_proto( const struct nexthop_group *dplane_ctx_get_pw_nhg( const struct zebra_dplane_ctx *ctx); +/* Accessors for interface information */ +const char *dplane_ctx_get_ifname(const struct zebra_dplane_ctx *ctx); +ifindex_t dplane_ctx_get_ifindex(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_intf_metric(const struct zebra_dplane_ctx *ctx); +/* Is interface addr p2p? */ +bool dplane_ctx_intf_is_connected(const struct zebra_dplane_ctx *ctx); +bool dplane_ctx_intf_is_secondary(const struct zebra_dplane_ctx *ctx); +bool dplane_ctx_intf_is_broadcast(const struct zebra_dplane_ctx *ctx); +const struct prefix *dplane_ctx_get_intf_addr( + const struct zebra_dplane_ctx *ctx); +bool dplane_ctx_intf_has_dest(const struct zebra_dplane_ctx *ctx); +const struct prefix *dplane_ctx_get_intf_dest( + const struct zebra_dplane_ctx *ctx); +bool dplane_ctx_intf_has_label(const struct zebra_dplane_ctx *ctx); +const char *dplane_ctx_get_intf_label(const struct zebra_dplane_ctx *ctx); + /* Namespace info - esp. for netlink communication */ const struct zebra_dplane_info *dplane_ctx_get_ns( const struct zebra_dplane_ctx *ctx); @@ -275,6 +295,15 @@ enum zebra_dplane_result dplane_lsp_delete(zebra_lsp_t *lsp); enum zebra_dplane_result dplane_pw_install(struct zebra_pw *pw); enum zebra_dplane_result dplane_pw_uninstall(struct zebra_pw *pw); +/* + * Enqueue interface address changes for the dataplane. + */ +enum zebra_dplane_result dplane_intf_addr_set(const struct interface *ifp, + const struct connected *ifc); +enum zebra_dplane_result dplane_intf_addr_unset(const struct interface *ifp, + const struct connected *ifc); + + /* Retrieve the limit on the number of pending, unprocessed updates. */ uint32_t dplane_get_in_queue_limit(void); diff --git a/zebra/zebra_l2.c b/zebra/zebra_l2.c index f4b2fe4794..ca37dd748e 100644 --- a/zebra/zebra_l2.c +++ b/zebra/zebra_l2.c @@ -172,6 +172,7 @@ void zebra_l2_vxlanif_add_update(struct interface *ifp, { struct zebra_if *zif; struct in_addr old_vtep_ip; + uint16_t chgflags = 0; zif = ifp->info; assert(zif); @@ -183,11 +184,20 @@ void zebra_l2_vxlanif_add_update(struct interface *ifp, } old_vtep_ip = zif->l2info.vxl.vtep_ip; - if (IPV4_ADDR_SAME(&old_vtep_ip, &vxlan_info->vtep_ip)) - return; - zif->l2info.vxl.vtep_ip = vxlan_info->vtep_ip; - zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_LOCAL_IP_CHANGE); + if (!IPV4_ADDR_SAME(&old_vtep_ip, &vxlan_info->vtep_ip)) { + chgflags |= ZEBRA_VXLIF_LOCAL_IP_CHANGE; + zif->l2info.vxl.vtep_ip = vxlan_info->vtep_ip; + } + + if (!IPV4_ADDR_SAME(&zif->l2info.vxl.mcast_grp, + &vxlan_info->mcast_grp)) { + chgflags |= ZEBRA_VXLIF_MCAST_GRP_CHANGE; + zif->l2info.vxl.mcast_grp = vxlan_info->mcast_grp; + } + + if (chgflags) + zebra_vxlan_if_update(ifp, chgflags); } /* diff --git a/zebra/zebra_l2.h b/zebra/zebra_l2.h index 2e3e5b4a85..33aa2e3746 100644 --- a/zebra/zebra_l2.h +++ b/zebra/zebra_l2.h @@ -54,6 +54,7 @@ struct zebra_l2info_vxlan { vni_t vni; /* VNI */ struct in_addr vtep_ip; /* Local tunnel IP */ vlanid_t access_vlan; /* Access VLAN - for VLAN-aware bridge. */ + struct in_addr mcast_grp; }; struct zebra_l2info_bondslave { diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index e47499b065..0e1df1cc35 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -24,7 +24,6 @@ #include "if.h" #include "linklist.h" #include "log.h" -#include "log_int.h" #include "memory.h" #include "mpls.h" #include "nexthop.h" @@ -403,10 +402,13 @@ static void nexthop_set_resolved(afi_t afi, const struct nexthop *newhop, nexthop_add(&nexthop->resolved, resolved_hop); } -/* If force flag is not set, do not modify falgs at all for uninstall - the route from FIB. */ +/* + * Given a nexthop we need to properly recursively resolve + * the route. As such, do a table lookup to find and match + * if at all possible. Set the nexthop->ifindex as appropriate + */ static int nexthop_active(afi_t afi, struct route_entry *re, - struct nexthop *nexthop, bool set, + struct nexthop *nexthop, struct route_node *top) { struct prefix p; @@ -422,12 +424,10 @@ static int nexthop_active(afi_t afi, struct route_entry *re, || nexthop->type == NEXTHOP_TYPE_IPV6) nexthop->ifindex = 0; - if (set) { - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE); - nexthops_free(nexthop->resolved); - nexthop->resolved = NULL; - re->nexthop_mtu = 0; - } + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE); + nexthops_free(nexthop->resolved); + nexthop->resolved = NULL; + re->nexthop_mtu = 0; /* * If the kernel has sent us a route, then @@ -437,16 +437,6 @@ static int nexthop_active(afi_t afi, struct route_entry *re, re->type == ZEBRA_ROUTE_SYSTEM) return 1; - /* Skip nexthops that have been filtered out due to route-map */ - /* The nexthops are specific to this route and so the same */ - /* nexthop for a different route may not have this flag set */ - if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FILTERED)) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug("\t%s: Nexthop Filtered", - __PRETTY_FUNCTION__); - return 0; - } - /* * Check to see if we should trust the passed in information * for UNNUMBERED interfaces as that we won't find the GW @@ -581,17 +571,14 @@ static int nexthop_active(afi_t afi, struct route_entry *re, NEXTHOP_FLAG_RECURSIVE)) continue; - if (set) { - SET_FLAG(nexthop->flags, - NEXTHOP_FLAG_RECURSIVE); - SET_FLAG(re->status, - ROUTE_ENTRY_NEXTHOPS_CHANGED); - nexthop_set_resolved(afi, newhop, - nexthop); - } + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE); + SET_FLAG(re->status, + ROUTE_ENTRY_NEXTHOPS_CHANGED); + nexthop_set_resolved(afi, newhop, nexthop); resolved = 1; } - if (resolved && set) + if (resolved) re->nexthop_mtu = match->mtu; if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug("\t%s: Recursion failed to find", @@ -607,15 +594,12 @@ static int nexthop_active(afi_t afi, struct route_entry *re, NEXTHOP_FLAG_RECURSIVE)) continue; - if (set) { - SET_FLAG(nexthop->flags, - NEXTHOP_FLAG_RECURSIVE); - nexthop_set_resolved(afi, newhop, - nexthop); - } + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE); + nexthop_set_resolved(afi, newhop, nexthop); resolved = 1; } - if (resolved && set) + if (resolved) re->nexthop_mtu = match->mtu; if (!resolved && IS_ZEBRA_DEBUG_RIB_DETAILED) @@ -819,17 +803,15 @@ struct route_entry *rib_lookup_ipv4(struct prefix_ipv4 *p, vrf_id_t vrf_id) /* This function verifies reachability of one given nexthop, which can be * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored - * in nexthop->flags field. If the 4th parameter, 'set', is non-zero, - * nexthop->ifindex will be updated appropriately as well. - * An existing route map can turn (otherwise active) nexthop into inactive, but - * not vice versa. + * in nexthop->flags field. The nexthop->ifindex will be updated + * appropriately as well. An existing route map can turn + * (otherwise active) nexthop into inactive, but not vice versa. * * The return value is the final value of 'ACTIVE' flag. */ - static unsigned nexthop_active_check(struct route_node *rn, struct route_entry *re, - struct nexthop *nexthop, bool set) + struct nexthop *nexthop) { struct interface *ifp; route_map_result_t ret = RMAP_MATCH; @@ -857,14 +839,14 @@ static unsigned nexthop_active_check(struct route_node *rn, case NEXTHOP_TYPE_IPV4: case NEXTHOP_TYPE_IPV4_IFINDEX: family = AFI_IP; - if (nexthop_active(AFI_IP, re, nexthop, set, rn)) + if (nexthop_active(AFI_IP, re, nexthop, rn)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); break; case NEXTHOP_TYPE_IPV6: family = AFI_IP6; - if (nexthop_active(AFI_IP6, re, nexthop, set, rn)) + if (nexthop_active(AFI_IP6, re, nexthop, rn)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); @@ -881,7 +863,7 @@ static unsigned nexthop_active_check(struct route_node *rn, else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); } else { - if (nexthop_active(AFI_IP6, re, nexthop, set, rn)) + if (nexthop_active(AFI_IP6, re, nexthop, rn)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); @@ -946,25 +928,21 @@ static unsigned nexthop_active_check(struct route_node *rn, return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); } -/* Iterate over all nexthops of the given RIB entry and refresh their +/* + * Iterate over all nexthops of the given RIB entry and refresh their * ACTIVE flag. re->nexthop_active_num is updated accordingly. If any * nexthop is found to toggle the ACTIVE flag, the whole re structure - * is flagged with ROUTE_ENTRY_CHANGED. The 4th 'set' argument is - * transparently passed to nexthop_active_check(). + * is flagged with ROUTE_ENTRY_CHANGED. * * Return value is the new number of active nexthops. */ - -static int nexthop_active_update(struct route_node *rn, struct route_entry *re, - bool set) +static int nexthop_active_update(struct route_node *rn, struct route_entry *re) { struct nexthop *nexthop; union g_addr prev_src; - unsigned int prev_active, new_active, old_num_nh; + unsigned int prev_active, new_active; ifindex_t prev_index; - old_num_nh = re->nexthop_active_num; - re->nexthop_active_num = 0; UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); @@ -980,7 +958,7 @@ static int nexthop_active_update(struct route_node *rn, struct route_entry *re, * a multipath perpsective should not be a data plane * decision point. */ - new_active = nexthop_active_check(rn, re, nexthop, set); + new_active = nexthop_active_check(rn, re, nexthop); if (new_active && re->nexthop_active_num >= multipath_num) { UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); new_active = 0; @@ -996,19 +974,13 @@ static int nexthop_active_update(struct route_node *rn, struct route_entry *re, || ((nexthop->type >= NEXTHOP_TYPE_IPV6 && nexthop->type < NEXTHOP_TYPE_BLACKHOLE) && !(IPV6_ADDR_SAME(&prev_src.ipv6, - &nexthop->rmap_src.ipv6)))) { + &nexthop->rmap_src.ipv6))) + || CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED)) { SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); SET_FLAG(re->status, ROUTE_ENTRY_NEXTHOPS_CHANGED); } } - if (old_num_nh != re->nexthop_active_num) - SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); - - if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) { - SET_FLAG(re->status, ROUTE_ENTRY_NEXTHOPS_CHANGED); - } - return re->nexthop_active_num; } @@ -1354,7 +1326,7 @@ static void rib_process_add_fib(struct zebra_vrf *zvrf, struct route_node *rn, /* Update real nexthop. This may actually determine if nexthop is active * or not. */ - if (!nexthop_active_update(rn, new, true)) { + if (!nexthop_group_active_nexthop_num(&new->ng)) { UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); return; } @@ -1401,8 +1373,7 @@ static void rib_process_del_fib(struct zebra_vrf *zvrf, struct route_node *rn, * down, causing the kernel to delete routes without sending DELROUTE * notifications */ - if (!nexthop_active_update(rn, old, true) && - (RIB_KERNEL_ROUTE(old))) + if (RIB_KERNEL_ROUTE(old)) SET_FLAG(old->status, ROUTE_ENTRY_REMOVED); else UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED); @@ -1424,7 +1395,7 @@ static void rib_process_update_fib(struct zebra_vrf *zvrf, /* Update the nexthop; we could determine here that nexthop is * inactive. */ - if (nexthop_active_update(rn, new, true)) + if (nexthop_group_active_nexthop_num(&new->ng)) nh_active = 1; /* If nexthop is active, install the selected route, if @@ -1509,11 +1480,8 @@ static void rib_process_update_fib(struct zebra_vrf *zvrf, } /* Update prior route. */ - if (new != old) { - /* Set real nexthop. */ - nexthop_active_update(rn, old, true); + if (new != old) UNSET_FLAG(old->status, ROUTE_ENTRY_CHANGED); - } /* Clear changed flag. */ UNSET_FLAG(new->status, ROUTE_ENTRY_CHANGED); @@ -1643,38 +1611,30 @@ static void rib_process(struct route_node *rn) /* Skip unreachable nexthop. */ /* This first call to nexthop_active_update is merely to - * determine if - * there's any change to nexthops associated with this RIB - * entry. Now, - * rib_process() can be invoked due to an external event such as - * link - * down or due to next-hop-tracking evaluation. In the latter - * case, + * determine if there's any change to nexthops associated + * with this RIB entry. Now, rib_process() can be invoked due + * to an external event such as link down or due to + * next-hop-tracking evaluation. In the latter case, * a decision has already been made that the NHs have changed. - * So, no - * need to invoke a potentially expensive call again. Further, - * since - * the change might be in a recursive NH which is not caught in - * the nexthop_active_update() code. Thus, we might miss changes - * to - * recursive NHs. + * So, no need to invoke a potentially expensive call again. + * Further, since the change might be in a recursive NH which + * is not caught in the nexthop_active_update() code. Thus, we + * might miss changes to recursive NHs. */ - if (!CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED) - && !nexthop_active_update(rn, re, false)) { + if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED) + && !nexthop_active_update(rn, re)) { if (re->type == ZEBRA_ROUTE_TABLE) { /* XXX: HERE BE DRAGONS!!!!! * In all honesty, I have not yet figured out - * what this part - * does or why the ROUTE_ENTRY_CHANGED test - * above is correct + * what this part does or why the + * ROUTE_ENTRY_CHANGED test above is correct * or why we need to delete a route here, and - * also not whether - * this concerns both selected and fib route, or - * only selected - * or only fib */ - /* This entry was denied by the 'ip protocol - * table' route-map, we - * need to delete it */ + * also not whether this concerns both selected + * and fib route, or only selected + * or only fib + * + * This entry was denied by the 'ip protocol + * table' route-map, we need to delete it */ if (re != old_selected) { if (IS_ZEBRA_DEBUG_RIB) zlog_debug( @@ -1751,10 +1711,8 @@ static void rib_process(struct route_node *rn) /* Update SELECTED entry */ if (old_selected != new_selected || selected_changed) { - if (new_selected && new_selected != new_fib) { - nexthop_active_update(rn, new_selected, true); + if (new_selected && new_selected != new_fib) UNSET_FLAG(new_selected->status, ROUTE_ENTRY_CHANGED); - } if (new_selected) SET_FLAG(new_selected->flags, ZEBRA_FLAG_SELECTED); @@ -2613,8 +2571,8 @@ void _route_entry_dump(const char *func, union prefixconstptr pp, INET6_ADDRSTRLEN); break; } - zlog_debug("%s: %s %s[%u] vrf %s(%u) with flags %s%s%s", func, - (nexthop->rparent ? " NH" : "NH"), straddr, + zlog_debug("%s: %s %s[%u] vrf %s(%u) with flags %s%s%s%s%s%s", + func, (nexthop->rparent ? " NH" : "NH"), straddr, nexthop->ifindex, vrf ? vrf->name : "Unknown", nexthop->vrf_id, (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE) @@ -2624,7 +2582,16 @@ void _route_entry_dump(const char *func, union prefixconstptr pp, ? "FIB " : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE) - ? "RECURSIVE" + ? "RECURSIVE " + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK) + ? "ONLINK " + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_MATCHED) + ? "MATCHED " + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE) + ? "DUPLICATE " : "")); } zlog_debug("%s: dump complete", func); @@ -2814,6 +2781,8 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, if (IS_ZEBRA_DEBUG_RIB_DETAILED) route_entry_dump(p, src_p, re); } + + SET_FLAG(re->status, ROUTE_ENTRY_CHANGED); rib_addnode(rn, re, 1); ret = 1; diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c index 040043146a..220a8006d0 100644 --- a/zebra/zebra_rnh.c +++ b/zebra/zebra_rnh.c @@ -377,6 +377,20 @@ void zebra_deregister_rnh_pseudowire(vrf_id_t vrf_id, struct zebra_pw *pw) zebra_delete_rnh(rnh, RNH_NEXTHOP_TYPE); } +/* Clear the NEXTHOP_FLAG_RNH_FILTERED flags on all nexthops + */ +static void zebra_rnh_clear_nexthop_rnh_filters(struct route_entry *re) +{ + struct nexthop *nexthop; + + if (re) { + for (nexthop = re->ng.nexthop; nexthop; + nexthop = nexthop->next) { + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RNH_FILTERED); + } + } +} + /* Apply the NHT route-map for a client to the route (and nexthops) * resolving a NH. */ @@ -393,11 +407,11 @@ static int zebra_rnh_apply_nht_rmap(afi_t afi, struct zebra_vrf *zvrf, nexthop = nexthop->next) { ret = zebra_nht_route_map_check( afi, proto, &prn->p, zvrf, re, nexthop); - if (ret != RMAP_DENYMATCH) { - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + if (ret != RMAP_DENYMATCH) at_least_one++; /* at least one valid NH */ - } else { - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); + else { + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_RNH_FILTERED); } } } @@ -546,6 +560,7 @@ static void zebra_rnh_notify_protocol_clients(struct zebra_vrf *zvrf, afi_t afi, * this * nexthop to see if it is filtered or not. */ + zebra_rnh_clear_nexthop_rnh_filters(re); num_resolving_nh = zebra_rnh_apply_nht_rmap( afi, zvrf, prn, re, client->proto); if (num_resolving_nh) @@ -572,6 +587,9 @@ static void zebra_rnh_notify_protocol_clients(struct zebra_vrf *zvrf, afi_t afi, send_client(rnh, client, RNH_NEXTHOP_TYPE, zvrf->vrf->vrf_id); } + + if (re) + zebra_rnh_clear_nexthop_rnh_filters(re); } static void zebra_rnh_process_pbr_tables(afi_t afi, struct route_node *nrn, @@ -631,7 +649,10 @@ static bool rnh_nexthop_valid(const struct route_entry *re, const struct nexthop *nh) { return (CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) - && CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE)); + && CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE) + && !CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE) + && !CHECK_FLAG(nh->flags, NEXTHOP_FLAG_DUPLICATE) + && !CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RNH_FILTERED)); } /* diff --git a/zebra/zebra_vrf.h b/zebra/zebra_vrf.h index 7113c160ac..c7a64d300a 100644 --- a/zebra/zebra_vrf.h +++ b/zebra/zebra_vrf.h @@ -130,6 +130,9 @@ struct zebra_vrf { /* l3-vni info */ vni_t l3vni; + /* pim mroutes installed for vxlan flooding */ + struct hash *vxlan_sg_table; + bool dup_addr_detect; int dad_time; diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index 3a8426e772..22c489e607 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -58,8 +58,19 @@ DEFINE_MTYPE_STATIC(ZEBRA, ZL3VNI, "L3 VNI hash"); DEFINE_MTYPE_STATIC(ZEBRA, ZVNI_VTEP, "VNI remote VTEP"); DEFINE_MTYPE_STATIC(ZEBRA, MAC, "VNI MAC"); DEFINE_MTYPE_STATIC(ZEBRA, NEIGH, "VNI Neighbor"); +DEFINE_MTYPE_STATIC(ZEBRA, ZVXLAN_SG, "zebra VxLAN multicast group"); /* definitions */ +/* PMSI strings. */ +#define VXLAN_FLOOD_STR_NO_INFO "-" +#define VXLAN_FLOOD_STR_DEFAULT VXLAN_FLOOD_STR_NO_INFO +static const struct message zvtep_flood_str[] = { + {VXLAN_FLOOD_DISABLED, VXLAN_FLOOD_STR_NO_INFO}, + {VXLAN_FLOOD_PIM_SM, "PIM-SM"}, + {VXLAN_FLOOD_HEAD_END_REPL, "HER"}, + {0} +}; + /* static function declarations */ static int ip_prefix_send_to_client(vrf_id_t vrf_id, struct prefix *p, @@ -167,10 +178,11 @@ static int zvni_send_del_to_client(vni_t vni); static void zvni_build_hash_table(void); static int zvni_vtep_match(struct in_addr *vtep_ip, zebra_vtep_t *zvtep); static zebra_vtep_t *zvni_vtep_find(zebra_vni_t *zvni, struct in_addr *vtep_ip); -static zebra_vtep_t *zvni_vtep_add(zebra_vni_t *zvni, struct in_addr *vtep_ip); +static zebra_vtep_t *zvni_vtep_add(zebra_vni_t *zvni, struct in_addr *vtep_ip, + int flood_control); static int zvni_vtep_del(zebra_vni_t *zvni, zebra_vtep_t *zvtep); static int zvni_vtep_del_all(zebra_vni_t *zvni, int uninstall); -static int zvni_vtep_install(zebra_vni_t *zvni, struct in_addr *vtep_ip); +static int zvni_vtep_install(zebra_vni_t *zvni, zebra_vtep_t *zvtep); static int zvni_vtep_uninstall(zebra_vni_t *zvni, struct in_addr *vtep_ip); static int zvni_del_macip_for_intf(struct interface *ifp, zebra_vni_t *zvni); static int zvni_add_macip_for_intf(struct interface *ifp, zebra_vni_t *zvni); @@ -201,6 +213,17 @@ static void zebra_vxlan_dup_addr_detect_for_mac(struct zebra_vrf *zvrf, bool do_dad, bool *is_dup_detect, bool is_local); +static unsigned int zebra_vxlan_sg_hash_key_make(void *p); +static bool zebra_vxlan_sg_hash_eq(const void *p1, const void *p2); +static void zebra_vxlan_sg_do_deref(struct zebra_vrf *zvrf, + struct in_addr sip, struct in_addr mcast_grp); +static zebra_vxlan_sg_t *zebra_vxlan_sg_do_ref(struct zebra_vrf *vrf, + struct in_addr sip, struct in_addr mcast_grp); +static void zebra_vxlan_sg_deref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp); +static void zebra_vxlan_sg_ref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp); +static void zebra_vxlan_sg_cleanup(struct hash_backet *backet, void *arg); /* Private functions */ static int host_rb_entry_compare(const struct host_rb_entry *hle1, @@ -1858,12 +1881,16 @@ static void zvni_print(zebra_vni_t *zvni, void **ctxt) vty_out(vty, " VxLAN ifIndex: %u\n", zvni->vxlan_if->ifindex); vty_out(vty, " Local VTEP IP: %s\n", inet_ntoa(zvni->local_vtep_ip)); + vty_out(vty, " Mcast group: %s\n", + inet_ntoa(zvni->mcast_grp)); } else { json_object_string_add(json, "vxlanInterface", zvni->vxlan_if->name); json_object_int_add(json, "ifindex", zvni->vxlan_if->ifindex); json_object_string_add(json, "vtepIp", inet_ntoa(zvni->local_vtep_ip)); + json_object_string_add(json, "mcastGroup", + inet_ntoa(zvni->mcast_grp)); json_object_string_add(json, "advertiseGatewayMacip", zvni->advertise_gw_macip ? "Yes" : "No"); json_object_int_add(json, "numMacs", num_macs); @@ -1878,14 +1905,19 @@ static void zvni_print(zebra_vni_t *zvni, void **ctxt) else json_vtep_list = json_object_new_array(); for (zvtep = zvni->vteps; zvtep; zvtep = zvtep->next) { - if (json == NULL) - vty_out(vty, " %s\n", - inet_ntoa(zvtep->vtep_ip)); - else { + const char *flood_str = lookup_msg(zvtep_flood_str, + zvtep->flood_control, + VXLAN_FLOOD_STR_DEFAULT); + + if (json == NULL) { + vty_out(vty, " %s flood: %s\n", + inet_ntoa(zvtep->vtep_ip), + flood_str); + } else { json_ip_str = json_object_new_string( - inet_ntoa(zvtep->vtep_ip)); + inet_ntoa(zvtep->vtep_ip)); json_object_array_add(json_vtep_list, - json_ip_str); + json_ip_str); } } if (json) @@ -3882,6 +3914,9 @@ static int zvni_del(zebra_vni_t *zvni) zvni->vxlan_if = NULL; + /* Remove references to the BUM mcast grp */ + zebra_vxlan_sg_deref(zvni->local_vtep_ip, zvni->mcast_grp); + /* Free the neighbor hash table. */ hash_free(zvni->neigh_table); zvni->neigh_table = NULL; @@ -3916,6 +3951,7 @@ static int zvni_send_add_to_client(zebra_vni_t *zvni) stream_putl(s, zvni->vni); stream_put_in_addr(s, &zvni->local_vtep_ip); stream_put(s, &zvni->vrf_id, sizeof(vrf_id_t)); /* tenant vrf */ + stream_put_in_addr(s, &zvni->mcast_grp); /* Write packet size. */ stream_putw_at(s, 0, stream_get_endp(s)); @@ -4038,7 +4074,15 @@ static void zvni_build_hash_table(void) return; } - zvni->local_vtep_ip = vxl->vtep_ip; + if (zvni->local_vtep_ip.s_addr != vxl->vtep_ip.s_addr || + zvni->mcast_grp.s_addr != vxl->mcast_grp.s_addr) { + zebra_vxlan_sg_deref(zvni->local_vtep_ip, + zvni->mcast_grp); + zebra_vxlan_sg_ref(vxl->vtep_ip, + vxl->mcast_grp); + zvni->local_vtep_ip = vxl->vtep_ip; + zvni->mcast_grp = vxl->mcast_grp; + } zvni->vxlan_if = ifp; vlan_if = zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if); @@ -4086,13 +4130,16 @@ static zebra_vtep_t *zvni_vtep_find(zebra_vni_t *zvni, struct in_addr *vtep_ip) /* * Add remote VTEP to VNI hash table. */ -static zebra_vtep_t *zvni_vtep_add(zebra_vni_t *zvni, struct in_addr *vtep_ip) +static zebra_vtep_t *zvni_vtep_add(zebra_vni_t *zvni, struct in_addr *vtep_ip, + int flood_control) + { zebra_vtep_t *zvtep; zvtep = XCALLOC(MTYPE_ZVNI_VTEP, sizeof(zebra_vtep_t)); zvtep->vtep_ip = *vtep_ip; + zvtep->flood_control = flood_control; if (zvni->vteps) zvni->vteps->prev = zvtep; @@ -4142,12 +4189,15 @@ static int zvni_vtep_del_all(zebra_vni_t *zvni, int uninstall) } /* - * Install remote VTEP into the kernel. + * Install remote VTEP into the kernel if the remote VTEP has asked + * for head-end-replication. */ -static int zvni_vtep_install(zebra_vni_t *zvni, struct in_addr *vtep_ip) +static int zvni_vtep_install(zebra_vni_t *zvni, zebra_vtep_t *zvtep) { - if (is_vxlan_flooding_head_end()) - return kernel_add_vtep(zvni->vni, zvni->vxlan_if, vtep_ip); + if (is_vxlan_flooding_head_end() && + (zvtep->flood_control == VXLAN_FLOOD_HEAD_END_REPL)) + return kernel_add_vtep(zvni->vni, zvni->vxlan_if, + &zvtep->vtep_ip); return 0; } @@ -4181,7 +4231,7 @@ static void zvni_handle_flooding_remote_vteps(struct hash_bucket *bucket, for (zvtep = zvni->vteps; zvtep; zvtep = zvtep->next) { if (is_vxlan_flooding_head_end()) - zvni_vtep_install(zvni, &zvtep->vtep_ip); + zvni_vtep_install(zvni, zvtep); else zvni_vtep_uninstall(zvni, &zvtep->vtep_ip); } @@ -5159,7 +5209,8 @@ static void process_remote_macip_add(vni_t vni, */ zvtep = zvni_vtep_find(zvni, &vtep_ip); if (!zvtep) { - if (zvni_vtep_add(zvni, &vtep_ip) == NULL) { + zvtep = zvni_vtep_add(zvni, &vtep_ip, VXLAN_FLOOD_DISABLED); + if (!zvtep) { flog_err( EC_ZEBRA_VTEP_ADD_FAILED, "Failed to add remote VTEP, VNI %u zvni %p upon remote MACIP ADD", @@ -5167,7 +5218,7 @@ static void process_remote_macip_add(vni_t vni, return; } - zvni_vtep_install(zvni, &vtep_ip); + zvni_vtep_install(zvni, zvtep); } sticky = !!CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); @@ -7874,6 +7925,8 @@ void zebra_vxlan_remote_vtep_add(ZAPI_HANDLER_ARGS) zebra_vni_t *zvni; struct interface *ifp; struct zebra_if *zif; + int flood_control; + zebra_vtep_t *zvtep; if (!is_evpn_enabled()) { zlog_debug( @@ -7895,12 +7948,13 @@ void zebra_vxlan_remote_vtep_add(ZAPI_HANDLER_ARGS) STREAM_GETL(s, vni); l += 4; STREAM_GET(&vtep_ip.s_addr, s, IPV4_MAX_BYTELEN); + STREAM_GETL(s, flood_control); l += IPV4_MAX_BYTELEN; if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug("Recv VTEP_ADD %s VNI %u from %s", - inet_ntoa(vtep_ip), vni, - zebra_route_string(client->proto)); + zlog_debug("Recv VTEP_ADD %s VNI %u flood %d from %s", + inet_ntoa(vtep_ip), vni, flood_control, + zebra_route_string(client->proto)); /* Locate VNI hash entry - expected to exist. */ zvni = zvni_lookup(vni); @@ -7927,19 +7981,31 @@ void zebra_vxlan_remote_vtep_add(ZAPI_HANDLER_ARGS) if (!if_is_operative(ifp) || !zif->brslave_info.br_if) continue; - /* If the remote VTEP already exists, - there's nothing more to do. */ - if (zvni_vtep_find(zvni, &vtep_ip)) - continue; - - if (zvni_vtep_add(zvni, &vtep_ip) == NULL) { - flog_err(EC_ZEBRA_VTEP_ADD_FAILED, - "Failed to add remote VTEP, VNI %u zvni %p", - vni, zvni); - continue; + zvtep = zvni_vtep_find(zvni, &vtep_ip); + if (zvtep) { + /* If the remote VTEP already exists check if + * the flood mode has changed + */ + if (zvtep->flood_control != flood_control) { + if (zvtep->flood_control + == VXLAN_FLOOD_DISABLED) + /* old mode was head-end-replication but + * is no longer; get rid of the HER fdb + * entry installed before + */ + zvni_vtep_uninstall(zvni, &vtep_ip); + zvtep->flood_control = flood_control; + zvni_vtep_install(zvni, zvtep); + } + } else { + zvtep = zvni_vtep_add(zvni, &vtep_ip, flood_control); + if (zvtep) + zvni_vtep_install(zvni, zvtep); + else + flog_err(EC_ZEBRA_VTEP_ADD_FAILED, + "Failed to add remote VTEP, VNI %u zvni %p", + vni, zvni); } - - zvni_vtep_install(zvni, &vtep_ip); } stream_failure: @@ -8476,7 +8542,14 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags) zvni_mac_del_all(zvni, 0, 1, DEL_LOCAL_MAC); } - zvni->local_vtep_ip = vxl->vtep_ip; + if (zvni->local_vtep_ip.s_addr != vxl->vtep_ip.s_addr || + zvni->mcast_grp.s_addr != vxl->mcast_grp.s_addr) { + zebra_vxlan_sg_deref(zvni->local_vtep_ip, + zvni->mcast_grp); + zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp); + zvni->local_vtep_ip = vxl->vtep_ip; + zvni->mcast_grp = vxl->mcast_grp; + } zvni->vxlan_if = ifp; /* Take further actions needed. @@ -8488,7 +8561,9 @@ int zebra_vxlan_if_update(struct interface *ifp, uint16_t chgflags) /* Inform BGP, if there is a change of interest. */ if (chgflags - & (ZEBRA_VXLIF_MASTER_CHANGE | ZEBRA_VXLIF_LOCAL_IP_CHANGE)) + & (ZEBRA_VXLIF_MASTER_CHANGE | + ZEBRA_VXLIF_LOCAL_IP_CHANGE | + ZEBRA_VXLIF_MCAST_GRP_CHANGE)) zvni_send_add_to_client(zvni); /* If there is a valid new master or a VLAN mapping change, @@ -8578,7 +8653,14 @@ int zebra_vxlan_if_add(struct interface *ifp) } } - zvni->local_vtep_ip = vxl->vtep_ip; + if (zvni->local_vtep_ip.s_addr != vxl->vtep_ip.s_addr || + zvni->mcast_grp.s_addr != vxl->mcast_grp.s_addr) { + zebra_vxlan_sg_deref(zvni->local_vtep_ip, + zvni->mcast_grp); + zebra_vxlan_sg_ref(vxl->vtep_ip, vxl->mcast_grp); + zvni->local_vtep_ip = vxl->vtep_ip; + zvni->mcast_grp = vxl->mcast_grp; + } zvni->vxlan_if = ifp; vlan_if = zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if); @@ -8589,15 +8671,24 @@ int zebra_vxlan_if_add(struct interface *ifp) listnode_add_sort(zl3vni->l2vnis, zvni); } - if (IS_ZEBRA_DEBUG_VXLAN) + if (IS_ZEBRA_DEBUG_VXLAN) { + char addr_buf1[INET_ADDRSTRLEN]; + char addr_buf2[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, &vxl->vtep_ip, + addr_buf1, INET_ADDRSTRLEN); + inet_ntop(AF_INET, &vxl->mcast_grp, + addr_buf2, INET_ADDRSTRLEN); + zlog_debug( - "Add L2-VNI %u VRF %s intf %s(%u) VLAN %u local IP %s master %u", + "Add L2-VNI %u VRF %s intf %s(%u) VLAN %u local IP %s mcast_grp %s master %u", vni, vlan_if ? vrf_id_to_name(vlan_if->vrf_id) : VRF_DEFAULT_NAME, ifp->name, ifp->ifindex, vxl->access_vlan, - inet_ntoa(vxl->vtep_ip), + addr_buf1, addr_buf2, zif->brslave_info.bridge_ifindex); + } /* If down or not mapped to a bridge, we're done. */ if (!if_is_operative(ifp) || !zif->brslave_info.br_if) @@ -9144,6 +9235,8 @@ void zebra_vxlan_init_tables(struct zebra_vrf *zvrf) return; zvrf->vni_table = hash_create(vni_hash_keymake, vni_hash_cmp, "Zebra VRF VNI Table"); + zvrf->vxlan_sg_table = hash_create(zebra_vxlan_sg_hash_key_make, + zebra_vxlan_sg_hash_eq, "Zebra VxLAN SG Table"); } /* Cleanup VNI info, but don't free the table. */ @@ -9152,6 +9245,7 @@ void zebra_vxlan_cleanup_tables(struct zebra_vrf *zvrf) if (!zvrf) return; hash_iterate(zvrf->vni_table, zvni_cleanup_all, zvrf); + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_cleanup, NULL); } /* Close all VNI handling */ @@ -9311,3 +9405,221 @@ static int zebra_vxlan_dad_mac_auto_recovery_exp(struct thread *t) return 0; } + +/************************** vxlan SG cache management ************************/ +/* Inform PIM about the mcast group */ +static int zebra_vxlan_sg_send(struct prefix_sg *sg, + char *sg_str, uint16_t cmd) +{ + struct zserv *client = NULL; + struct stream *s = NULL; + + client = zserv_find_client(ZEBRA_ROUTE_PIM, 0); + if (!client) + return 0; + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, cmd, VRF_DEFAULT); + stream_putl(s, IPV4_MAX_BYTELEN); + stream_put(s, &sg->src.s_addr, IPV4_MAX_BYTELEN); + stream_put(s, &sg->grp.s_addr, IPV4_MAX_BYTELEN); + + /* Write packet size. */ + stream_putw_at(s, 0, stream_get_endp(s)); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Send %s %s to %s", + (cmd == ZEBRA_VXLAN_SG_ADD) ? "add" : "del", sg_str, + zebra_route_string(client->proto)); + + if (cmd == ZEBRA_VXLAN_SG_ADD) + client->vxlan_sg_add_cnt++; + else + client->vxlan_sg_del_cnt++; + + return zserv_send_message(client, s); +} + +static unsigned int zebra_vxlan_sg_hash_key_make(void *p) +{ + zebra_vxlan_sg_t *vxlan_sg = p; + + return (jhash_2words(vxlan_sg->sg.src.s_addr, + vxlan_sg->sg.grp.s_addr, 0)); +} + +static bool zebra_vxlan_sg_hash_eq(const void *p1, const void *p2) +{ + const zebra_vxlan_sg_t *sg1 = p1; + const zebra_vxlan_sg_t *sg2 = p2; + + return ((sg1->sg.src.s_addr == sg2->sg.src.s_addr) + && (sg1->sg.grp.s_addr == sg2->sg.grp.s_addr)); +} + +static zebra_vxlan_sg_t *zebra_vxlan_sg_new(struct zebra_vrf *zvrf, + struct prefix_sg *sg) +{ + zebra_vxlan_sg_t *vxlan_sg; + + vxlan_sg = XCALLOC(MTYPE_ZVXLAN_SG, sizeof(*vxlan_sg)); + + vxlan_sg->zvrf = zvrf; + vxlan_sg->sg = *sg; + prefix_sg2str(sg, vxlan_sg->sg_str); + + vxlan_sg = hash_get(zvrf->vxlan_sg_table, vxlan_sg, hash_alloc_intern); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("vxlan SG %s created", vxlan_sg->sg_str); + + return vxlan_sg; +} + +static zebra_vxlan_sg_t *zebra_vxlan_sg_find(struct zebra_vrf *zvrf, + struct prefix_sg *sg) +{ + zebra_vxlan_sg_t lookup; + + lookup.sg = *sg; + return hash_lookup(zvrf->vxlan_sg_table, &lookup); +} + +static zebra_vxlan_sg_t *zebra_vxlan_sg_add(struct zebra_vrf *zvrf, + struct prefix_sg *sg) +{ + zebra_vxlan_sg_t *vxlan_sg; + zebra_vxlan_sg_t *parent = NULL; + struct in_addr sip; + + vxlan_sg = zebra_vxlan_sg_find(zvrf, sg); + if (vxlan_sg) + return vxlan_sg; + + /* create a *G entry for every BUM group implicitly - + * 1. The SG entry is used by pimd to setup the vxlan-origination-mroute + * 2. the XG entry is used by pimd to setup the + * vxlan-termination-mroute + */ + if (sg->src.s_addr) { + memset(&sip, 0, sizeof(sip)); + parent = zebra_vxlan_sg_do_ref(zvrf, sip, sg->grp); + if (!parent) + return NULL; + } + + vxlan_sg = zebra_vxlan_sg_new(zvrf, sg); + if (!vxlan_sg) { + if (parent) + zebra_vxlan_sg_do_deref(zvrf, sip, sg->grp); + return vxlan_sg; + } + + zebra_vxlan_sg_send(sg, vxlan_sg->sg_str, ZEBRA_VXLAN_SG_ADD); + + return vxlan_sg; +} + +static void zebra_vxlan_sg_del(zebra_vxlan_sg_t *vxlan_sg) +{ + struct in_addr sip; + struct zebra_vrf *zvrf; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + /* On SG entry deletion remove the reference to its parent XG + * entry + */ + if (vxlan_sg->sg.src.s_addr) { + memset(&sip, 0, sizeof(sip)); + zebra_vxlan_sg_do_deref(zvrf, sip, vxlan_sg->sg.grp); + } + + zebra_vxlan_sg_send(&vxlan_sg->sg, vxlan_sg->sg_str, + ZEBRA_VXLAN_SG_DEL); + + hash_release(vxlan_sg->zvrf->vxlan_sg_table, vxlan_sg); + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("VXLAN SG %s deleted", vxlan_sg->sg_str); + + XFREE(MTYPE_ZVXLAN_SG, vxlan_sg); +} + +static void zebra_vxlan_sg_do_deref(struct zebra_vrf *zvrf, + struct in_addr sip, struct in_addr mcast_grp) +{ + zebra_vxlan_sg_t *vxlan_sg; + struct prefix_sg sg; + + sg.family = AF_INET; + sg.prefixlen = IPV4_MAX_BYTELEN; + sg.src = sip; + sg.grp = mcast_grp; + vxlan_sg = zebra_vxlan_sg_find(zvrf, &sg); + if (!vxlan_sg) + return; + + if (vxlan_sg->ref_cnt) + --vxlan_sg->ref_cnt; + + if (!vxlan_sg->ref_cnt) + zebra_vxlan_sg_del(vxlan_sg); +} + +static zebra_vxlan_sg_t *zebra_vxlan_sg_do_ref(struct zebra_vrf *zvrf, + struct in_addr sip, struct in_addr mcast_grp) +{ + zebra_vxlan_sg_t *vxlan_sg; + struct prefix_sg sg; + + sg.family = AF_INET; + sg.prefixlen = IPV4_MAX_BYTELEN; + sg.src = sip; + sg.grp = mcast_grp; + vxlan_sg = zebra_vxlan_sg_add(zvrf, &sg); + if (vxlan_sg) + ++vxlan_sg->ref_cnt; + + return vxlan_sg; +} + +static void zebra_vxlan_sg_deref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp) +{ + struct zebra_vrf *zvrf; + + if (!local_vtep_ip.s_addr || !mcast_grp.s_addr) + return; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + + zebra_vxlan_sg_do_deref(zvrf, local_vtep_ip, mcast_grp); +} + +static void zebra_vxlan_sg_ref(struct in_addr local_vtep_ip, + struct in_addr mcast_grp) +{ + struct zebra_vrf *zvrf; + + if (!local_vtep_ip.s_addr || !mcast_grp.s_addr) + return; + + zvrf = vrf_info_lookup(VRF_DEFAULT); + if (!zvrf) + return; + zebra_vxlan_sg_do_ref(zvrf, local_vtep_ip, mcast_grp); +} + +static void zebra_vxlan_sg_cleanup(struct hash_backet *backet, void *arg) +{ + zebra_vxlan_sg_t *vxlan_sg = (zebra_vxlan_sg_t *)backet->data; + + zebra_vxlan_sg_del(vxlan_sg); +} diff --git a/zebra/zebra_vxlan.h b/zebra/zebra_vxlan.h index 2ff92970d7..f752bdd690 100644 --- a/zebra/zebra_vxlan.h +++ b/zebra/zebra_vxlan.h @@ -60,9 +60,11 @@ is_vxlan_flooding_head_end(void) } /* VxLAN interface change flags of interest. */ -#define ZEBRA_VXLIF_LOCAL_IP_CHANGE 0x1 -#define ZEBRA_VXLIF_MASTER_CHANGE 0x2 -#define ZEBRA_VXLIF_VLAN_CHANGE 0x4 +#define ZEBRA_VXLIF_LOCAL_IP_CHANGE (1 << 0) +#define ZEBRA_VXLIF_MASTER_CHANGE (1 << 1) +#define ZEBRA_VXLIF_VLAN_CHANGE (1 << 2) +#define ZEBRA_VXLIF_MCAST_GRP_CHANGE (1 << 3) + #define VNI_STR_LEN 32 diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h index 5081c08d19..9f945442bb 100644 --- a/zebra/zebra_vxlan_private.h +++ b/zebra/zebra_vxlan_private.h @@ -52,6 +52,10 @@ struct zebra_vtep_t_ { /* Remote IP. */ /* NOTE: Can only be IPv4 right now. */ struct in_addr vtep_ip; + /* Flood mode (one of enum vxlan_flood_control) based on the PMSI + * tunnel type advertised by the remote VTEP + */ + int flood_control; /* Links. */ struct zebra_vtep_t_ *next; @@ -87,6 +91,9 @@ struct zebra_vni_t_ { /* Local IP */ struct in_addr local_vtep_ip; + /* PIM-SM MDT group for BUM flooding */ + struct in_addr mcast_grp; + /* tenant VRF, if any */ vrf_id_t vrf_id; @@ -427,4 +434,26 @@ struct nh_walk_ctx { } #endif +/* + * Multicast hash table. + * + * This table contains - + * 1. The (S, G) entries used for encapsulating and forwarding BUM traffic. + * S is the local VTEP-IP and G is a BUM mcast group address. + * 2. The (X, G) entries used for terminating a BUM flow. + * Multiple L2-VNIs can share the same MDT hence the need to maintain + * an aggregated table that pimd can consume without much + * re-interpretation. + */ +typedef struct zebra_vxlan_sg_ { + struct zebra_vrf *zvrf; + + struct prefix_sg sg; + char sg_str[PREFIX_SG_STR_LEN]; + + /* For SG - num of L2 VNIs using this entry for sending BUM traffic */ + /* For XG - num of SG using this as parent */ + uint32_t ref_cnt; +} zebra_vxlan_sg_t; + #endif /* _ZEBRA_VXLAN_PRIVATE_H */ diff --git a/zebra/zserv.c b/zebra/zserv.c index 80fdbefcd5..df5f236c04 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -970,6 +970,8 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client) client->v4_nh_watch_add_cnt, 0, client->v4_nh_watch_rem_cnt); vty_out(vty, "NHT v6 %-12d%-12d%-12d\n", client->v6_nh_watch_add_cnt, 0, client->v6_nh_watch_rem_cnt); + vty_out(vty, "VxLAN SG %-12d%-12d%-12d\n", client->vxlan_sg_add_cnt, + 0, client->vxlan_sg_del_cnt); vty_out(vty, "Interface Up Notifications: %d\n", client->ifup_cnt); vty_out(vty, "Interface Down Notifications: %d\n", client->ifdown_cnt); vty_out(vty, "VNI add notifications: %d\n", client->vniadd_cnt); diff --git a/zebra/zserv.h b/zebra/zserv.h index 86863d961c..90fd195712 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -141,6 +141,8 @@ struct zserv { uint32_t v4_nh_watch_rem_cnt; uint32_t v6_nh_watch_add_cnt; uint32_t v6_nh_watch_rem_cnt; + uint32_t vxlan_sg_add_cnt; + uint32_t vxlan_sg_del_cnt; time_t nh_reg_time; time_t nh_dereg_time; |
