From 1fdc9eae2d80ec361e2a22d588944013a5d9b0b7 Mon Sep 17 00:00:00 2001 From: vivek Date: Mon, 17 Oct 2016 12:39:55 -0700 Subject: [PATCH] zebra: Refactor netlink interactions Separate core netlink functions and library functions from route-related interactions and interface-related interactions. Signed-off-by: Vivek Venkatraman Reviewed-by: Donald Sharp Reviewed-by: Don Slice Ticket: CM-13199 Reviewed By: CCR-5254 Testing Done: bgp-min, ospf-min --- configure.ac | 1 + zebra/if_netlink.c | 758 ++++++++++++++++- zebra/if_netlink.h | 35 + zebra/kernel_netlink.c | 738 +++++++++++++++++ zebra/kernel_netlink.h | 53 ++ zebra/rt_netlink.c | 1607 ++----------------------------------- zebra/rt_netlink.h | 18 +- zebra/zebra_fpm_netlink.c | 11 +- 8 files changed, 1669 insertions(+), 1552 deletions(-) create mode 100644 zebra/if_netlink.h create mode 100644 zebra/kernel_netlink.h diff --git a/configure.ac b/configure.ac index 703948553b..e22dab8d04 100755 --- a/configure.ac +++ b/configure.ac @@ -935,6 +935,7 @@ AC_MSG_CHECKING(zebra between kernel interface method) if test x"$opsys" = x"gnu-linux"; then AC_MSG_RESULT(netlink) RT_METHOD=rt_netlink.o + KERNEL_METHOD=kernel_netlink.o AC_DEFINE(HAVE_NETLINK,,netlink) netlink=yes AC_CHECK_DECLS([IFLA_INFO_SLAVE_KIND], [], [], [#include ]) diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c index dffa6568ea..069c0dda27 100644 --- a/zebra/if_netlink.c +++ b/zebra/if_netlink.c @@ -21,9 +21,765 @@ */ #include +#include + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "zebra_memory.h" +#include "rib.h" +#include "thread.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "mpls.h" +#include "rtnetlink.h" #include "zebra/zserv.h" -#include "zebra/rt_netlink.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/redistribute.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/zebra_ptm.h" +#include "zebra/zebra_mpls.h" +#include "zebra/kernel_netlink.h" +#include "zebra/if_netlink.h" + + +/* Note: on netlink systems, there should be a 1-to-1 mapping between interface + names and ifindex values. */ +static void +set_ifindex(struct interface *ifp, ifindex_t ifi_index, struct zebra_ns *zns) +{ + struct interface *oifp; + + if (((oifp = if_lookup_by_index_per_ns (zns, ifi_index)) != NULL) && (oifp != ifp)) + { + if (ifi_index == IFINDEX_INTERNAL) + zlog_err("Netlink is setting interface %s ifindex to reserved " + "internal value %u", ifp->name, ifi_index); + else + { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("interface index %d was renamed from %s to %s", + ifi_index, oifp->name, ifp->name); + if (if_is_up(oifp)) + zlog_err("interface rename detected on up interface: index %d " + "was renamed from %s to %s, results are uncertain!", + ifi_index, oifp->name, ifp->name); + if_delete_update(oifp); + } + } + ifp->ifindex = ifi_index; +} + +/* Utility function to parse hardware link-layer address and update ifp */ +static void +netlink_interface_update_hw_addr (struct rtattr **tb, struct interface *ifp) +{ + int i; + + if (tb[IFLA_ADDRESS]) + { + int hw_addr_len; + + hw_addr_len = RTA_PAYLOAD (tb[IFLA_ADDRESS]); + + if (hw_addr_len > INTERFACE_HWADDR_MAX) + zlog_warn ("Hardware address is too large: %d", hw_addr_len); + else + { + ifp->hw_addr_len = hw_addr_len; + memcpy (ifp->hw_addr, RTA_DATA (tb[IFLA_ADDRESS]), hw_addr_len); + + for (i = 0; i < hw_addr_len; i++) + if (ifp->hw_addr[i] != 0) + break; + + if (i == hw_addr_len) + ifp->hw_addr_len = 0; + else + ifp->hw_addr_len = hw_addr_len; + } + } +} + +static enum zebra_link_type +netlink_to_zebra_link_type (unsigned int hwt) +{ + switch (hwt) + { + case ARPHRD_ETHER: return ZEBRA_LLT_ETHER; + case ARPHRD_EETHER: return ZEBRA_LLT_EETHER; + case ARPHRD_AX25: return ZEBRA_LLT_AX25; + case ARPHRD_PRONET: return ZEBRA_LLT_PRONET; + case ARPHRD_IEEE802: return ZEBRA_LLT_IEEE802; + case ARPHRD_ARCNET: return ZEBRA_LLT_ARCNET; + case ARPHRD_APPLETLK: return ZEBRA_LLT_APPLETLK; + case ARPHRD_DLCI: return ZEBRA_LLT_DLCI; + case ARPHRD_ATM: return ZEBRA_LLT_ATM; + case ARPHRD_METRICOM: return ZEBRA_LLT_METRICOM; + case ARPHRD_IEEE1394: return ZEBRA_LLT_IEEE1394; + case ARPHRD_EUI64: return ZEBRA_LLT_EUI64; + case ARPHRD_INFINIBAND: return ZEBRA_LLT_INFINIBAND; + case ARPHRD_SLIP: return ZEBRA_LLT_SLIP; + case ARPHRD_CSLIP: return ZEBRA_LLT_CSLIP; + case ARPHRD_SLIP6: return ZEBRA_LLT_SLIP6; + case ARPHRD_CSLIP6: return ZEBRA_LLT_CSLIP6; + case ARPHRD_RSRVD: return ZEBRA_LLT_RSRVD; + case ARPHRD_ADAPT: return ZEBRA_LLT_ADAPT; + case ARPHRD_ROSE: return ZEBRA_LLT_ROSE; + case ARPHRD_X25: return ZEBRA_LLT_X25; + case ARPHRD_PPP: return ZEBRA_LLT_PPP; + case ARPHRD_CISCO: return ZEBRA_LLT_CHDLC; + case ARPHRD_LAPB: return ZEBRA_LLT_LAPB; + case ARPHRD_RAWHDLC: return ZEBRA_LLT_RAWHDLC; + case ARPHRD_TUNNEL: return ZEBRA_LLT_IPIP; + case ARPHRD_TUNNEL6: return ZEBRA_LLT_IPIP6; + case ARPHRD_FRAD: return ZEBRA_LLT_FRAD; + case ARPHRD_SKIP: return ZEBRA_LLT_SKIP; + case ARPHRD_LOOPBACK: return ZEBRA_LLT_LOOPBACK; + case ARPHRD_LOCALTLK: return ZEBRA_LLT_LOCALTLK; + case ARPHRD_FDDI: return ZEBRA_LLT_FDDI; + case ARPHRD_SIT: return ZEBRA_LLT_SIT; + case ARPHRD_IPDDP: return ZEBRA_LLT_IPDDP; + case ARPHRD_IPGRE: return ZEBRA_LLT_IPGRE; + case ARPHRD_PIMREG: return ZEBRA_LLT_PIMREG; + case ARPHRD_HIPPI: return ZEBRA_LLT_HIPPI; + case ARPHRD_ECONET: return ZEBRA_LLT_ECONET; + case ARPHRD_IRDA: return ZEBRA_LLT_IRDA; + case ARPHRD_FCPP: return ZEBRA_LLT_FCPP; + case ARPHRD_FCAL: return ZEBRA_LLT_FCAL; + case ARPHRD_FCPL: return ZEBRA_LLT_FCPL; + case ARPHRD_FCFABRIC: return ZEBRA_LLT_FCFABRIC; + case ARPHRD_IEEE802_TR: return ZEBRA_LLT_IEEE802_TR; + case ARPHRD_IEEE80211: return ZEBRA_LLT_IEEE80211; + case ARPHRD_IEEE802154: return ZEBRA_LLT_IEEE802154; +#ifdef ARPHRD_IP6GRE + case ARPHRD_IP6GRE: return ZEBRA_LLT_IP6GRE; +#endif +#ifdef ARPHRD_IEEE802154_PHY + case ARPHRD_IEEE802154_PHY: return ZEBRA_LLT_IEEE802154_PHY; +#endif + + default: return ZEBRA_LLT_UNKNOWN; + } +} + +#define parse_rtattr_nested(tb, max, rta) \ + netlink_parse_rtattr((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta)) + +static void +netlink_vrf_change (struct nlmsghdr *h, struct rtattr *tb, const char *name) +{ + struct ifinfomsg *ifi; + struct rtattr *linkinfo[IFLA_INFO_MAX+1]; + struct rtattr *attr[IFLA_VRF_MAX+1]; + struct vrf *vrf; + struct zebra_vrf *zvrf; + u_int32_t nl_table_id; + + ifi = NLMSG_DATA (h); + + memset (linkinfo, 0, sizeof linkinfo); + parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb); + + if (!linkinfo[IFLA_INFO_DATA]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("%s: IFLA_INFO_DATA missing from VRF message: %s", __func__, name); + return; + } + + memset (attr, 0, sizeof attr); + parse_rtattr_nested(attr, IFLA_VRF_MAX, linkinfo[IFLA_INFO_DATA]); + if (!attr[IFLA_VRF_TABLE]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("%s: IFLA_VRF_TABLE missing from VRF message: %s", __func__, name); + return; + } + + nl_table_id = *(u_int32_t *)RTA_DATA(attr[IFLA_VRF_TABLE]); + + if (h->nlmsg_type == RTM_NEWLINK) + { + /* If VRF already exists, we just return; status changes are handled + * against the VRF "interface". + */ + vrf = vrf_lookup ((vrf_id_t)ifi->ifi_index); + if (vrf && vrf->info) + return; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("RTM_NEWLINK for VRF %s(%u) table %u", + name, ifi->ifi_index, nl_table_id); + + /* + * vrf_get is implied creation if it does not exist + */ + vrf = vrf_get((vrf_id_t)ifi->ifi_index, name); // It would create vrf + if (!vrf) + { + zlog_err ("VRF %s id %u not created", name, ifi->ifi_index); + return; + } + + /* Enable the created VRF. */ + if (!vrf_enable (vrf)) + { + zlog_err ("Failed to enable VRF %s id %u", name, ifi->ifi_index); + return; + } + + /* + * This is the only place that we get the actual kernel table_id + * being used. We need it to set the table_id of the routes + * we are passing to the kernel.... And to throw some totally + * awesome parties. that too. + */ + zvrf = (struct zebra_vrf *)vrf->info; + zvrf->table_id = nl_table_id; + } + else //h->nlmsg_type == RTM_DELLINK + { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("RTM_DELLINK for VRF %s(%u)", name, ifi->ifi_index); + + vrf = vrf_lookup ((vrf_id_t)ifi->ifi_index); + + if (!vrf) + { + zlog_warn ("%s: vrf not found", __func__); + return; + } + + vrf_delete (vrf); + } +} + +/* Called from interface_lookup_netlink(). This function is only used + during bootstrap. */ +static int +netlink_interface (struct sockaddr_nl *snl, struct nlmsghdr *h, + ns_id_t ns_id) +{ + int len; + struct ifinfomsg *ifi; + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_MAX + 1]; + struct interface *ifp; + char *name = NULL; + char *kind = NULL; + char *slave_kind = NULL; + int vrf_device = 0; + struct zebra_ns *zns; + vrf_id_t vrf_id = VRF_DEFAULT; + + zns = zebra_ns_lookup (ns_id); + ifi = NLMSG_DATA (h); + + if (h->nlmsg_type != RTM_NEWLINK) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg)); + if (len < 0) + return -1; + + if (ifi->ifi_family == AF_BRIDGE) + return 0; + + /* Looking up interface name. */ + memset (tb, 0, sizeof tb); + netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len); + +#ifdef IFLA_WIRELESS + /* check for wireless messages to ignore */ + if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0)) + { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__); + return 0; + } +#endif /* IFLA_WIRELESS */ + + if (tb[IFLA_IFNAME] == NULL) + return -1; + name = (char *) RTA_DATA (tb[IFLA_IFNAME]); + + if (tb[IFLA_LINKINFO]) + { + memset (linkinfo, 0, sizeof linkinfo); + parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); + + if (linkinfo[IFLA_INFO_KIND]) + kind = RTA_DATA(linkinfo[IFLA_INFO_KIND]); + +#if HAVE_DECL_IFLA_INFO_SLAVE_KIND + if (linkinfo[IFLA_INFO_SLAVE_KIND]) + slave_kind = RTA_DATA(linkinfo[IFLA_INFO_SLAVE_KIND]); +#endif + + if (kind && strcmp(kind, "vrf") == 0) + { + vrf_device = 1; + netlink_vrf_change(h, tb[IFLA_LINKINFO], name); + vrf_id = (vrf_id_t)ifi->ifi_index; + } + } + + if (tb[IFLA_MASTER]) + { + if (slave_kind && (strcmp(slave_kind, "vrf") == 0)) + vrf_id = *(u_int32_t *)RTA_DATA(tb[IFLA_MASTER]); + } + + /* Add interface. */ + ifp = if_get_by_name_vrf (name, vrf_id); + set_ifindex(ifp, ifi->ifi_index, zns); + ifp->flags = ifi->ifi_flags & 0x0000fffff; + if (vrf_device) + SET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); + ifp->mtu6 = ifp->mtu = *(uint32_t *) RTA_DATA (tb[IFLA_MTU]); + ifp->metric = 0; + ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; + + /* Hardware type and address. */ + ifp->ll_type = netlink_to_zebra_link_type (ifi->ifi_type); + netlink_interface_update_hw_addr (tb, ifp); + + if_add_update (ifp); + + return 0; +} + +/* Interface lookup by netlink socket. */ +int +interface_lookup_netlink (struct zebra_ns *zns) +{ + int ret; + + /* Get interface information. */ + ret = netlink_request (AF_PACKET, RTM_GETLINK, &zns->netlink_cmd); + if (ret < 0) + return ret; + ret = netlink_parse_info (netlink_interface, &zns->netlink_cmd, zns, 0); + if (ret < 0) + return ret; + + /* Get IPv4 address of the interfaces. */ + ret = netlink_request (AF_INET, RTM_GETADDR, &zns->netlink_cmd); + if (ret < 0) + return ret; + ret = netlink_parse_info (netlink_interface_addr, &zns->netlink_cmd, zns, 0); + if (ret < 0) + return ret; + +#ifdef HAVE_IPV6 + /* Get IPv6 address of the interfaces. */ + ret = netlink_request (AF_INET6, RTM_GETADDR, &zns->netlink_cmd); + if (ret < 0) + return ret; + ret = netlink_parse_info (netlink_interface_addr, &zns->netlink_cmd, zns, 0); + if (ret < 0) + return ret; +#endif /* HAVE_IPV6 */ + + return 0; +} + +/* Interface address modification. */ +static int +netlink_address (int cmd, int family, struct interface *ifp, + struct connected *ifc) +{ + int bytelen; + struct prefix *p; + + struct + { + struct nlmsghdr n; + struct ifaddrmsg ifa; + char buf[NL_PKT_BUF_SIZE]; + } req; + + struct zebra_ns *zns = zebra_ns_lookup (NS_DEFAULT); + + p = ifc->address; + memset (&req, 0, sizeof req - NL_PKT_BUF_SIZE); + + bytelen = (family == AF_INET ? 4 : 16); + + req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifaddrmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = cmd; + req.ifa.ifa_family = family; + + req.ifa.ifa_index = ifp->ifindex; + req.ifa.ifa_prefixlen = p->prefixlen; + + addattr_l (&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen); + + if (family == AF_INET && cmd == RTM_NEWADDR) + { + if (!CONNECTED_PEER(ifc) && ifc->destination) + { + p = ifc->destination; + addattr_l (&req.n, sizeof req, IFA_BROADCAST, &p->u.prefix, + bytelen); + } + } + + if (CHECK_FLAG (ifc->flags, ZEBRA_IFA_SECONDARY)) + SET_FLAG (req.ifa.ifa_flags, IFA_F_SECONDARY); + + if (ifc->label) + addattr_l (&req.n, sizeof req, IFA_LABEL, ifc->label, + strlen (ifc->label) + 1); + + return netlink_talk (&req.n, &zns->netlink_cmd, zns); +} + +int +kernel_address_add_ipv4 (struct interface *ifp, struct connected *ifc) +{ + return netlink_address (RTM_NEWADDR, AF_INET, ifp, ifc); +} + +int +kernel_address_delete_ipv4 (struct interface *ifp, struct connected *ifc) +{ + return netlink_address (RTM_DELADDR, AF_INET, ifp, ifc); +} + +int +netlink_interface_addr (struct sockaddr_nl *snl, struct nlmsghdr *h, + ns_id_t ns_id) +{ + int len; + struct ifaddrmsg *ifa; + struct rtattr *tb[IFA_MAX + 1]; + struct interface *ifp; + void *addr; + void *broad; + u_char flags = 0; + char *label = NULL; + struct zebra_ns *zns; + + zns = zebra_ns_lookup (ns_id); + ifa = NLMSG_DATA (h); + + if (ifa->ifa_family != AF_INET +#ifdef HAVE_IPV6 + && ifa->ifa_family != AF_INET6 +#endif /* HAVE_IPV6 */ + ) + return 0; + + if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifaddrmsg)); + if (len < 0) + return -1; + + memset (tb, 0, sizeof tb); + netlink_parse_rtattr (tb, IFA_MAX, IFA_RTA (ifa), len); + + ifp = if_lookup_by_index_per_ns (zns, ifa->ifa_index); + if (ifp == NULL) + { + zlog_err ("netlink_interface_addr can't find interface by index %d", + ifa->ifa_index); + return -1; + } + + if (IS_ZEBRA_DEBUG_KERNEL) /* remove this line to see initial ifcfg */ + { + char buf[BUFSIZ]; + zlog_debug ("netlink_interface_addr %s %s flags 0x%x:", + nl_msg_type_to_str (h->nlmsg_type), ifp->name, + ifa->ifa_flags); + if (tb[IFA_LOCAL]) + zlog_debug (" IFA_LOCAL %s/%d", + inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_LOCAL]), + buf, BUFSIZ), ifa->ifa_prefixlen); + if (tb[IFA_ADDRESS]) + zlog_debug (" IFA_ADDRESS %s/%d", + inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_ADDRESS]), + buf, BUFSIZ), ifa->ifa_prefixlen); + if (tb[IFA_BROADCAST]) + zlog_debug (" IFA_BROADCAST %s/%d", + inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_BROADCAST]), + buf, BUFSIZ), ifa->ifa_prefixlen); + if (tb[IFA_LABEL] && strcmp (ifp->name, RTA_DATA (tb[IFA_LABEL]))) + zlog_debug (" IFA_LABEL %s", (char *)RTA_DATA (tb[IFA_LABEL])); + + if (tb[IFA_CACHEINFO]) + { + struct ifa_cacheinfo *ci = RTA_DATA (tb[IFA_CACHEINFO]); + zlog_debug (" IFA_CACHEINFO pref %d, valid %d", + ci->ifa_prefered, ci->ifa_valid); + } + } + + /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */ + if (tb[IFA_LOCAL] == NULL) + tb[IFA_LOCAL] = tb[IFA_ADDRESS]; + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + + /* local interface address */ + addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL); + + /* is there a peer address? */ + if (tb[IFA_ADDRESS] && + memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_ADDRESS]))) + { + broad = RTA_DATA(tb[IFA_ADDRESS]); + SET_FLAG (flags, ZEBRA_IFA_PEER); + } + else + /* seeking a broadcast address */ + broad = (tb[IFA_BROADCAST] ? RTA_DATA(tb[IFA_BROADCAST]) : NULL); + + /* addr is primary key, SOL if we don't have one */ + if (addr == NULL) + { + zlog_debug ("%s: NULL address", __func__); + return -1; + } + + /* Flags. */ + if (ifa->ifa_flags & IFA_F_SECONDARY) + SET_FLAG (flags, ZEBRA_IFA_SECONDARY); + + /* Label */ + if (tb[IFA_LABEL]) + label = (char *) RTA_DATA (tb[IFA_LABEL]); + + if (ifp && label && strcmp (ifp->name, label) == 0) + label = NULL; + + /* Register interface address to the interface. */ + if (ifa->ifa_family == AF_INET) + { + if (h->nlmsg_type == RTM_NEWADDR) + connected_add_ipv4 (ifp, flags, + (struct in_addr *) addr, ifa->ifa_prefixlen, + (struct in_addr *) broad, label); + else + connected_delete_ipv4 (ifp, flags, + (struct in_addr *) addr, ifa->ifa_prefixlen, + (struct in_addr *) broad); + } +#ifdef HAVE_IPV6 + if (ifa->ifa_family == AF_INET6) + { + if (h->nlmsg_type == RTM_NEWADDR) + { + /* Only consider valid addresses; we'll not get a notification from + * the kernel till IPv6 DAD has completed, but at init time, Quagga + * does query for and will receive all addresses. + */ + if (!(ifa->ifa_flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))) + connected_add_ipv6 (ifp, flags, (struct in6_addr *) addr, + ifa->ifa_prefixlen, (struct in6_addr *) broad, label); + } + else + connected_delete_ipv6 (ifp, + (struct in6_addr *) addr, ifa->ifa_prefixlen, + (struct in6_addr *) broad); + } +#endif /* HAVE_IPV6 */ + + return 0; +} + +int +netlink_link_change (struct sockaddr_nl *snl, struct nlmsghdr *h, + ns_id_t ns_id) +{ + int len; + struct ifinfomsg *ifi; + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_MAX + 1]; + struct interface *ifp; + char *name = NULL; + char *kind = NULL; + char *slave_kind = NULL; + int vrf_device = 0; + struct zebra_ns *zns; + vrf_id_t vrf_id = VRF_DEFAULT; + + zns = zebra_ns_lookup (ns_id); + ifi = NLMSG_DATA (h); + + if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)) + { + /* If this is not link add/delete message so print warning. */ + zlog_warn ("netlink_link_change: wrong kernel message %d", + h->nlmsg_type); + return 0; + } + + len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg)); + if (len < 0) + return -1; + + if (ifi->ifi_family == AF_BRIDGE) + return 0; + + /* Looking up interface name. */ + memset (tb, 0, sizeof tb); + netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len); + +#ifdef IFLA_WIRELESS + /* check for wireless messages to ignore */ + if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0)) + { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__); + return 0; + } +#endif /* IFLA_WIRELESS */ + + if (tb[IFLA_IFNAME] == NULL) + return -1; + name = (char *) RTA_DATA (tb[IFLA_IFNAME]); + + if (tb[IFLA_LINKINFO]) + { + memset (linkinfo, 0, sizeof linkinfo); + parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); + + if (linkinfo[IFLA_INFO_KIND]) + kind = RTA_DATA(linkinfo[IFLA_INFO_KIND]); + +#if HAVE_DECL_IFLA_INFO_SLAVE_KIND + if (linkinfo[IFLA_INFO_SLAVE_KIND]) + slave_kind = RTA_DATA(linkinfo[IFLA_INFO_SLAVE_KIND]); +#endif + + if (kind && strcmp(kind, "vrf") == 0) + { + vrf_device = 1; + netlink_vrf_change(h, tb[IFLA_LINKINFO], name); + vrf_id = (vrf_id_t)ifi->ifi_index; + } + } + + /* See if interface is present. */ + ifp = if_lookup_by_index_per_ns (zns, ifi->ifi_index); + + if (h->nlmsg_type == RTM_NEWLINK) + { + if (tb[IFLA_MASTER]) + { + if (slave_kind && (strcmp(slave_kind, "vrf") == 0)) + vrf_id = *(u_int32_t *)RTA_DATA(tb[IFLA_MASTER]); + } + + if (ifp == NULL || !CHECK_FLAG (ifp->status, ZEBRA_INTERFACE_ACTIVE)) + { + /* Add interface notification from kernel */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("RTM_NEWLINK for %s(%u) (ifp %p) vrf_id %u flags 0x%x", + name, ifi->ifi_index, ifp, vrf_id, ifi->ifi_flags); + + if (ifp == NULL) + { + /* unknown interface */ + ifp = if_get_by_name_vrf (name, vrf_id); + } + else + { + /* pre-configured interface, learnt now */ + if (ifp->vrf_id != vrf_id) + if_update_vrf (ifp, name, strlen(name), vrf_id); + } + + /* Update interface information. */ + set_ifindex(ifp, ifi->ifi_index, zns); + ifp->flags = ifi->ifi_flags & 0x0000fffff; + if (vrf_device) + SET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); + ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]); + ifp->metric = 0; + ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; + + netlink_interface_update_hw_addr (tb, ifp); + + /* Inform clients, install any configured addresses. */ + if_add_update (ifp); + } + else if (ifp->vrf_id != vrf_id) + { + /* VRF change for an interface. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("RTM_NEWLINK vrf-change for %s(%u) " + "vrf_id %u -> %u flags 0x%x", + name, ifp->ifindex, ifp->vrf_id, + vrf_id, ifi->ifi_flags); + + if_handle_vrf_change (ifp, vrf_id); + } + else + { + /* Interface status change. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("RTM_NEWLINK status for %s(%u) flags 0x%x", + name, ifp->ifindex, ifi->ifi_flags); + + set_ifindex(ifp, ifi->ifi_index, zns); + ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]); + ifp->metric = 0; + + netlink_interface_update_hw_addr (tb, ifp); + + if (if_is_no_ptm_operative (ifp)) + { + ifp->flags = ifi->ifi_flags & 0x0000fffff; + if (!if_is_no_ptm_operative (ifp)) + if_down (ifp); + else if (if_is_operative (ifp)) + /* Must notify client daemons of new interface status. */ + zebra_interface_up_update (ifp); + } + else + { + ifp->flags = ifi->ifi_flags & 0x0000fffff; + if (if_is_operative (ifp)) + if_up (ifp); + } + } + } + else + { + /* Delete interface notification from kernel */ + if (ifp == NULL) + { + zlog_warn ("RTM_DELLINK for unknown interface %s(%u)", + name, ifi->ifi_index); + return 0; + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("RTM_DELLINK for %s(%u)", name, ifp->ifindex); + + UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); + + if (!vrf_device) + if_delete_update (ifp); + } + + return 0; +} /* Interface information read by netlink. */ void diff --git a/zebra/if_netlink.h b/zebra/if_netlink.h new file mode 100644 index 0000000000..aac67916c0 --- /dev/null +++ b/zebra/if_netlink.h @@ -0,0 +1,35 @@ +/* Header file exported by if_netlink.c to zebra. + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_IF_NETLINK_H +#define _ZEBRA_IF_NETLINK_H + +#ifdef HAVE_NETLINK + +extern int netlink_interface_addr (struct sockaddr_nl *snl, + struct nlmsghdr *h, ns_id_t ns_id); +extern int netlink_link_change (struct sockaddr_nl *snl, struct nlmsghdr *h, + ns_id_t ns_id); +extern int interface_lookup_netlink (struct zebra_ns *zns); + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_IF_NETLINK_H */ diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 23b2153ec2..37152c6254 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -18,3 +18,741 @@ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ + +#include + +#include "linklist.h" +#include "if.h" +#include "log.h" +#include "prefix.h" +#include "connected.h" +#include "table.h" +#include "memory.h" +#include "zebra_memory.h" +#include "rib.h" +#include "thread.h" +#include "privs.h" +#include "nexthop.h" +#include "vrf.h" +#include "mpls.h" + +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/debug.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" +#include "zebra/if_netlink.h" + +#ifndef SO_RCVBUFFORCE +#define SO_RCVBUFFORCE (33) +#endif + +/* Hack for GNU libc version 2. */ +#ifndef MSG_TRUNC +#define MSG_TRUNC 0x20 +#endif /* MSG_TRUNC */ + +#ifndef NLMSG_TAIL +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *) (((u_char *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) +#endif + +#ifndef RTA_TAIL +#define RTA_TAIL(rta) \ + ((struct rtattr *) (((u_char *) (rta)) + RTA_ALIGN((rta)->rta_len))) +#endif + +static const struct message nlmsg_str[] = { + {RTM_NEWROUTE, "RTM_NEWROUTE"}, + {RTM_DELROUTE, "RTM_DELROUTE"}, + {RTM_GETROUTE, "RTM_GETROUTE"}, + {RTM_NEWLINK, "RTM_NEWLINK"}, + {RTM_DELLINK, "RTM_DELLINK"}, + {RTM_GETLINK, "RTM_GETLINK"}, + {RTM_NEWADDR, "RTM_NEWADDR"}, + {RTM_DELADDR, "RTM_DELADDR"}, + {RTM_GETADDR, "RTM_GETADDR"}, + {RTM_NEWNEIGH, "RTM_NEWNEIGH"}, + {RTM_DELNEIGH, "RTM_DELNEIGH"}, + {RTM_GETNEIGH, "RTM_GETNEIGH"}, + {0, NULL} +}; + +static const struct message rtproto_str[] = { + {RTPROT_REDIRECT, "redirect"}, + {RTPROT_KERNEL, "kernel"}, + {RTPROT_BOOT, "boot"}, + {RTPROT_STATIC, "static"}, + {RTPROT_GATED, "GateD"}, + {RTPROT_RA, "router advertisement"}, + {RTPROT_MRT, "MRT"}, + {RTPROT_ZEBRA, "Zebra"}, +#ifdef RTPROT_BIRD + {RTPROT_BIRD, "BIRD"}, +#endif /* RTPROT_BIRD */ + {0, NULL} +}; + +extern struct thread_master *master; +extern u_int32_t nl_rcvbufsize; + +extern struct zebra_privs_t zserv_privs; + +static int +netlink_talk_filter (struct sockaddr_nl *snl, struct nlmsghdr *h, + ns_id_t ns_id) +{ + zlog_warn ("netlink_talk: ignoring message type 0x%04x NS %u", h->nlmsg_type, + ns_id); + return 0; +} + +static int +netlink_recvbuf (struct nlsock *nl, uint32_t newsize) +{ + u_int32_t oldsize; + socklen_t newlen = sizeof(newsize); + socklen_t oldlen = sizeof(oldsize); + int ret; + + ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen); + if (ret < 0) + { + zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name, + safe_strerror (errno)); + return -1; + } + + /* Try force option (linux >= 2.6.14) and fall back to normal set */ + if ( zserv_privs.change (ZPRIVS_RAISE) ) + zlog_err ("routing_socket: Can't raise privileges"); + ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, &nl_rcvbufsize, + sizeof(nl_rcvbufsize)); + if ( zserv_privs.change (ZPRIVS_LOWER) ) + zlog_err ("routing_socket: Can't lower privileges"); + if (ret < 0) + ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &nl_rcvbufsize, + sizeof(nl_rcvbufsize)); + if (ret < 0) + { + zlog (NULL, LOG_ERR, "Can't set %s receive buffer size: %s", nl->name, + safe_strerror (errno)); + return -1; + } + + ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen); + if (ret < 0) + { + zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name, + safe_strerror (errno)); + return -1; + } + + zlog (NULL, LOG_INFO, + "Setting netlink socket receive buffer size: %u -> %u", + oldsize, newsize); + return 0; +} + +/* Make socket for Linux netlink interface. */ +static int +netlink_socket (struct nlsock *nl, unsigned long groups, ns_id_t ns_id) +{ + int ret; + struct sockaddr_nl snl; + int sock; + int namelen; + int save_errno; + + if (zserv_privs.change (ZPRIVS_RAISE)) + { + zlog (NULL, LOG_ERR, "Can't raise privileges"); + return -1; + } + + sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) + { + zlog (NULL, LOG_ERR, "Can't open %s socket: %s", nl->name, + safe_strerror (errno)); + return -1; + } + + memset (&snl, 0, sizeof snl); + snl.nl_family = AF_NETLINK; + snl.nl_groups = groups; + + /* Bind the socket to the netlink structure for anything. */ + ret = bind (sock, (struct sockaddr *) &snl, sizeof snl); + save_errno = errno; + if (zserv_privs.change (ZPRIVS_LOWER)) + zlog (NULL, LOG_ERR, "Can't lower privileges"); + + if (ret < 0) + { + zlog (NULL, LOG_ERR, "Can't bind %s socket to group 0x%x: %s", + nl->name, snl.nl_groups, safe_strerror (save_errno)); + close (sock); + return -1; + } + + /* multiple netlink sockets will have different nl_pid */ + namelen = sizeof snl; + ret = getsockname (sock, (struct sockaddr *) &snl, (socklen_t *) &namelen); + if (ret < 0 || namelen != sizeof snl) + { + zlog (NULL, LOG_ERR, "Can't get %s socket name: %s", nl->name, + safe_strerror (errno)); + close (sock); + return -1; + } + + nl->snl = snl; + nl->sock = sock; + return ret; +} + +static int +netlink_information_fetch (struct sockaddr_nl *snl, struct nlmsghdr *h, + ns_id_t ns_id) +{ + /* JF: Ignore messages that aren't from the kernel */ + if ( snl->nl_pid != 0 ) + { + zlog ( NULL, LOG_ERR, "Ignoring message from pid %u", snl->nl_pid ); + return 0; + } + + switch (h->nlmsg_type) + { + case RTM_NEWROUTE: + return netlink_route_change (snl, h, ns_id); + break; + case RTM_DELROUTE: + return netlink_route_change (snl, h, ns_id); + break; + case RTM_NEWLINK: + return netlink_link_change (snl, h, ns_id); + break; + case RTM_DELLINK: + return netlink_link_change (snl, h, ns_id); + break; + case RTM_NEWADDR: + return netlink_interface_addr (snl, h, ns_id); + break; + case RTM_DELADDR: + return netlink_interface_addr (snl, h, ns_id); + break; + default: + zlog_warn ("Unknown netlink nlmsg_type %d vrf %u\n", h->nlmsg_type, + ns_id); + break; + } + return 0; +} + +static int +kernel_read (struct thread *thread) +{ + struct zebra_ns *zns = (struct zebra_ns *)THREAD_ARG (thread); + netlink_parse_info (netlink_information_fetch, &zns->netlink, zns, 5); + zns->t_netlink = thread_add_read (zebrad.master, kernel_read, zns, + zns->netlink.sock); + + return 0; +} + +/* Filter out messages from self that occur on listener socket, + * caused by our actions on the command socket + */ +static void netlink_install_filter (int sock, __u32 pid) +{ + struct sock_filter filter[] = { + /* 0: ldh [4] */ + BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)), + /* 1: jeq 0x18 jt 3 jf 6 */ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 1, 0), + /* 2: jeq 0x19 jt 3 jf 6 */ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_DELROUTE), 0, 3), + /* 3: ldw [12] */ + BPF_STMT(BPF_LD|BPF_ABS|BPF_W, offsetof(struct nlmsghdr, nlmsg_pid)), + /* 4: jeq XX jt 5 jf 6 */ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htonl(pid), 0, 1), + /* 5: ret 0 (skip) */ + BPF_STMT(BPF_RET|BPF_K, 0), + /* 6: ret 0xffff (keep) */ + BPF_STMT(BPF_RET|BPF_K, 0xffff), + }; + + struct sock_fprog prog = { + .len = array_size(filter), + .filter = filter, + }; + + if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) < 0) + zlog_warn ("Can't install socket filter: %s\n", safe_strerror(errno)); +} + +void +netlink_parse_rtattr (struct rtattr **tb, int max, struct rtattr *rta, + int len) +{ + while (RTA_OK (rta, len)) + { + if (rta->rta_type <= max) + tb[rta->rta_type] = rta; + rta = RTA_NEXT (rta, len); + } +} + +int +addattr_l (struct nlmsghdr *n, unsigned int maxlen, int type, void *data, int alen) +{ + int len; + struct rtattr *rta; + + len = RTA_LENGTH (alen); + + if (NLMSG_ALIGN (n->nlmsg_len) + RTA_ALIGN (len) > maxlen) + return -1; + + rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = len; + memcpy (RTA_DATA (rta), data, alen); + n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + RTA_ALIGN (len); + + return 0; +} + +int +rta_addattr_l (struct rtattr *rta, unsigned int maxlen, int type, + void *data, int alen) +{ + unsigned int len; + struct rtattr *subrta; + + len = RTA_LENGTH (alen); + + if (RTA_ALIGN (rta->rta_len) + RTA_ALIGN (len) > maxlen) + return -1; + + subrta = (struct rtattr *) (((char *) rta) + RTA_ALIGN (rta->rta_len)); + subrta->rta_type = type; + subrta->rta_len = len; + memcpy (RTA_DATA (subrta), data, alen); + rta->rta_len = NLMSG_ALIGN (rta->rta_len) + RTA_ALIGN (len); + + return 0; +} + +int +addattr32 (struct nlmsghdr *n, unsigned int maxlen, int type, int data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(u_int32_t)); +} + +struct rtattr * +addattr_nest(struct nlmsghdr *n, int maxlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(n); + + addattr_l(n, maxlen, type, NULL, 0); + return nest; +} + +int +addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) +{ + nest->rta_len = (u_char *)NLMSG_TAIL(n) - (u_char *)nest; + return n->nlmsg_len; +} + +struct rtattr * +rta_nest(struct rtattr *rta, int maxlen, int type) +{ + struct rtattr *nest = RTA_TAIL(rta); + + rta_addattr_l(rta, maxlen, type, NULL, 0); + return nest; +} + +int +rta_nest_end(struct rtattr *rta, struct rtattr *nest) +{ + nest->rta_len = (u_char *)RTA_TAIL(rta) - (u_char *)nest; + return rta->rta_len; +} + +const char * +nl_msg_type_to_str (uint16_t msg_type) +{ + return lookup (nlmsg_str, msg_type); +} + +const char * +nl_rtproto_to_str (u_char rtproto) +{ + return lookup (rtproto_str, rtproto); +} +/* Receive message from netlink interface and pass those information + to the given function. */ +int +netlink_parse_info (int (*filter) (struct sockaddr_nl *, struct nlmsghdr *, + ns_id_t), + struct nlsock *nl, struct zebra_ns *zns, int count) +{ + int status; + int ret = 0; + int error; + int read_in = 0; + + while (1) + { + char buf[NL_PKT_BUF_SIZE]; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof buf + }; + struct sockaddr_nl snl; + struct msghdr msg = { + .msg_name = (void *) &snl, + .msg_namelen = sizeof snl, + .msg_iov = &iov, + .msg_iovlen = 1 + }; + struct nlmsghdr *h; + + if (count && read_in >= count) + return 0; + + status = recvmsg (nl->sock, &msg, 0); + if (status < 0) + { + if (errno == EINTR) + continue; + if (errno == EWOULDBLOCK || errno == EAGAIN) + break; + zlog (NULL, LOG_ERR, "%s recvmsg overrun: %s", + nl->name, safe_strerror(errno)); + /* + * In this case we are screwed. + * There is no good way to + * recover zebra at this point. + */ + exit (-1); + continue; + } + + if (status == 0) + { + zlog (NULL, LOG_ERR, "%s EOF", nl->name); + return -1; + } + + if (msg.msg_namelen != sizeof snl) + { + zlog (NULL, LOG_ERR, "%s sender address length error: length %d", + nl->name, msg.msg_namelen); + return -1; + } + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) + { + zlog_debug("%s: << netlink message dump [recv]", __func__); + zlog_hexdump(&msg, sizeof(msg)); + } + + read_in++; + for (h = (struct nlmsghdr *) buf; NLMSG_OK (h, (unsigned int) status); + h = NLMSG_NEXT (h, status)) + { + /* Finish of reading. */ + if (h->nlmsg_type == NLMSG_DONE) + return ret; + + /* Error handling. */ + if (h->nlmsg_type == NLMSG_ERROR) + { + struct nlmsgerr *err = (struct nlmsgerr *) NLMSG_DATA (h); + int errnum = err->error; + int msg_type = err->msg.nlmsg_type; + + /* If the error field is zero, then this is an ACK */ + if (err->error == 0) + { + if (IS_ZEBRA_DEBUG_KERNEL) + { + zlog_debug ("%s: %s ACK: type=%s(%u), seq=%u, pid=%u", + __FUNCTION__, nl->name, + nl_msg_type_to_str (err->msg.nlmsg_type), + err->msg.nlmsg_type, err->msg.nlmsg_seq, + err->msg.nlmsg_pid); + } + + /* return if not a multipart message, otherwise continue */ + if (!(h->nlmsg_flags & NLM_F_MULTI)) + return 0; + continue; + } + + if (h->nlmsg_len < NLMSG_LENGTH (sizeof (struct nlmsgerr))) + { + zlog (NULL, LOG_ERR, "%s error: message truncated", + nl->name); + return -1; + } + + /* Deal with errors that occur because of races in link handling */ + if (nl == &zns->netlink_cmd + && ((msg_type == RTM_DELROUTE && + (-errnum == ENODEV || -errnum == ESRCH)) + || (msg_type == RTM_NEWROUTE && -errnum == EEXIST))) + { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("%s: error: %s type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror (-errnum), + nl_msg_type_to_str (msg_type), + msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); + return 0; + } + + /* We see RTM_DELNEIGH when shutting down an interface with an IPv4 + * link-local. The kernel should have already deleted the neighbor + * so do not log these as an error. + */ + if (msg_type == RTM_DELNEIGH || + (nl == &zns->netlink_cmd && msg_type == RTM_NEWROUTE && + (-errnum == ESRCH || -errnum == ENETUNREACH))) + { + /* This is known to happen in some situations, don't log + * as error. + */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("%s error: %s, type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror (-errnum), + nl_msg_type_to_str (msg_type), + msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); + } + else + zlog_err ("%s error: %s, type=%s(%u), seq=%u, pid=%u", + nl->name, safe_strerror (-errnum), + nl_msg_type_to_str (msg_type), + msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); + + return -1; + } + + /* OK we got netlink message. */ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("netlink_parse_info: %s type %s(%u), len=%d, seq=%u, pid=%u", + nl->name, + nl_msg_type_to_str (h->nlmsg_type), h->nlmsg_type, + h->nlmsg_len, h->nlmsg_seq, h->nlmsg_pid); + + /* skip unsolicited messages originating from command socket + * linux sets the originators port-id for {NEW|DEL}ADDR messages, + * so this has to be checked here. */ + if (nl != &zns->netlink_cmd + && h->nlmsg_pid == zns->netlink_cmd.snl.nl_pid + && (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)) + { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("netlink_parse_info: %s packet comes from %s", + zns->netlink_cmd.name, nl->name); + continue; + } + + error = (*filter) (&snl, h, zns->ns_id); + if (error < 0) + { + zlog (NULL, LOG_ERR, "%s filter function error", nl->name); + ret = error; + } + } + + /* After error care. */ + if (msg.msg_flags & MSG_TRUNC) + { + zlog (NULL, LOG_ERR, "%s error: message truncated", nl->name); + continue; + } + if (status) + { + zlog (NULL, LOG_ERR, "%s error: data remnant size %d", nl->name, + status); + return -1; + } + } + return ret; +} + +/* sendmsg() to netlink socket then recvmsg(). */ +int +netlink_talk (struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns) +{ + int status; + struct sockaddr_nl snl; + struct iovec iov = { + .iov_base = (void *) n, + .iov_len = n->nlmsg_len + }; + struct msghdr msg = { + .msg_name = (void *) &snl, + .msg_namelen = sizeof snl, + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int save_errno; + + memset (&snl, 0, sizeof snl); + snl.nl_family = AF_NETLINK; + + n->nlmsg_seq = ++nl->seq; + + /* Request an acknowledgement by setting NLM_F_ACK */ + n->nlmsg_flags |= NLM_F_ACK; + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug ("netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x", + nl->name, + nl_msg_type_to_str (n->nlmsg_type), n->nlmsg_type, + n->nlmsg_len, n->nlmsg_seq, n->nlmsg_flags); + + /* Send message to netlink interface. */ + if (zserv_privs.change (ZPRIVS_RAISE)) + zlog (NULL, LOG_ERR, "Can't raise privileges"); + status = sendmsg (nl->sock, &msg, 0); + save_errno = errno; + if (zserv_privs.change (ZPRIVS_LOWER)) + zlog (NULL, LOG_ERR, "Can't lower privileges"); + + if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) + { + zlog_debug("%s: >> netlink message dump [sent]", __func__); + zlog_hexdump(&msg, sizeof(msg)); + } + + if (status < 0) + { + zlog (NULL, LOG_ERR, "netlink_talk sendmsg() error: %s", + safe_strerror (save_errno)); + return -1; + } + + + /* + * Get reply from netlink socket. + * The reply should either be an acknowlegement or an error. + */ + return netlink_parse_info (netlink_talk_filter, nl, zns, 0); +} + +/* Get type specified information from netlink. */ +int +netlink_request (int family, int type, struct nlsock *nl) +{ + int ret; + struct sockaddr_nl snl; + int save_errno; + + struct + { + struct nlmsghdr nlh; + struct rtgenmsg g; + } req; + + /* Check netlink socket. */ + if (nl->sock < 0) + { + zlog (NULL, LOG_ERR, "%s socket isn't active.", nl->name); + return -1; + } + + memset (&snl, 0, sizeof snl); + snl.nl_family = AF_NETLINK; + + memset (&req, 0, sizeof req); + req.nlh.nlmsg_len = sizeof req; + req.nlh.nlmsg_type = type; + req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.nlh.nlmsg_pid = nl->snl.nl_pid; + req.nlh.nlmsg_seq = ++nl->seq; + req.g.rtgen_family = family; + + /* linux appears to check capabilities on every message + * have to raise caps for every message sent + */ + if (zserv_privs.change (ZPRIVS_RAISE)) + { + zlog (NULL, LOG_ERR, "Can't raise privileges"); + return -1; + } + + ret = sendto (nl->sock, (void *) &req, sizeof req, 0, + (struct sockaddr *) &snl, sizeof snl); + save_errno = errno; + + if (zserv_privs.change (ZPRIVS_LOWER)) + zlog (NULL, LOG_ERR, "Can't lower privileges"); + + if (ret < 0) + { + zlog (NULL, LOG_ERR, "%s sendto failed: %s", nl->name, + safe_strerror (save_errno)); + return -1; + } + + return 0; +} + +/* Exported interface function. This function simply calls + netlink_socket (). */ +void +kernel_init (struct zebra_ns *zns) +{ + unsigned long groups; + + groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR; +#ifdef HAVE_IPV6 + groups |= RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR; +#endif /* HAVE_IPV6 */ + netlink_socket (&zns->netlink, groups, zns->ns_id); + netlink_socket (&zns->netlink_cmd, 0, zns->ns_id); + + /* Register kernel socket. */ + if (zns->netlink.sock > 0) + { + /* Only want non-blocking on the netlink event socket */ + if (fcntl (zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err ("Can't set %s socket flags: %s", zns->netlink.name, + safe_strerror (errno)); + + /* Set receive buffer size if it's set from command line */ + if (nl_rcvbufsize) + netlink_recvbuf (&zns->netlink, nl_rcvbufsize); + + netlink_install_filter (zns->netlink.sock, zns->netlink_cmd.snl.nl_pid); + zns->t_netlink = thread_add_read (zebrad.master, kernel_read, zns, + zns->netlink.sock); + } +} + +void +kernel_terminate (struct zebra_ns *zns) +{ + THREAD_READ_OFF (zns->t_netlink); + + if (zns->netlink.sock >= 0) + { + close (zns->netlink.sock); + zns->netlink.sock = -1; + } + + if (zns->netlink_cmd.sock >= 0) + { + close (zns->netlink_cmd.sock); + zns->netlink_cmd.sock = -1; + } +} diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h new file mode 100644 index 0000000000..0efd89f125 --- /dev/null +++ b/zebra/kernel_netlink.h @@ -0,0 +1,53 @@ +/* Declarations and definitions for kernel interaction over netlink + * Copyright (C) 2016 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_KERNEL_NETLINK_H +#define _ZEBRA_KERNEL_NETLINK_H + +#ifdef HAVE_NETLINK + +#define NL_PKT_BUF_SIZE 8192 + +extern void netlink_parse_rtattr (struct rtattr **tb, int max, + struct rtattr *rta, int len); +extern int addattr_l (struct nlmsghdr *n, unsigned int maxlen, + int type, void *data, int alen); +extern int rta_addattr_l (struct rtattr *rta, unsigned int maxlen, + int type, void *data, int alen); +extern int addattr32 (struct nlmsghdr *n, unsigned int maxlen, + int type, int data); +extern struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type); +extern int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest); +extern struct rtattr * rta_nest(struct rtattr *rta, int maxlen, int type); +extern int rta_nest_end(struct rtattr *rta, struct rtattr *nest); +extern const char * nl_msg_type_to_str (uint16_t msg_type); +extern const char * nl_rtproto_to_str (u_char rtproto); + +extern int netlink_parse_info (int (*filter) (struct sockaddr_nl *, + struct nlmsghdr *, ns_id_t), struct nlsock *nl, + struct zebra_ns *zns, int count); +extern int netlink_talk (struct nlmsghdr *n, struct nlsock *nl, + struct zebra_ns *zns); +extern int netlink_request (int family, int type, struct nlsock *nl); + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_KERNEL_NETLINK_H */ diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index b6bccf2f7b..4d60ceda70 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -52,24 +52,8 @@ #include "zebra/rtadv.h" #include "zebra/zebra_ptm.h" #include "zebra/zebra_mpls.h" - -#include "rt_netlink.h" - -static const struct message nlmsg_str[] = { - {RTM_NEWROUTE, "RTM_NEWROUTE"}, - {RTM_DELROUTE, "RTM_DELROUTE"}, - {RTM_GETROUTE, "RTM_GETROUTE"}, - {RTM_NEWLINK, "RTM_NEWLINK"}, - {RTM_DELLINK, "RTM_DELLINK"}, - {RTM_GETLINK, "RTM_GETLINK"}, - {RTM_NEWADDR, "RTM_NEWADDR"}, - {RTM_DELADDR, "RTM_DELADDR"}, - {RTM_GETADDR, "RTM_GETADDR"}, - {RTM_NEWNEIGH, "RTM_NEWNEIGH"}, - {RTM_DELNEIGH, "RTM_DELNEIGH"}, - {RTM_GETNEIGH, "RTM_GETNEIGH"}, - {0, NULL} -}; +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" /* TODO - Temporary definitions, need to refine. */ #ifndef AF_MPLS @@ -101,16 +85,6 @@ static const struct message nlmsg_str[] = { #endif /* End of temporary definitions */ -#ifndef NLMSG_TAIL -#define NLMSG_TAIL(nmsg) \ - ((struct rtattr *) (((u_char *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) -#endif - -#ifndef RTA_TAIL -#define RTA_TAIL(rta) \ - ((struct rtattr *) (((u_char *) (rta)) + RTA_ALIGN((rta)->rta_len))) -#endif - struct gw_family_t { u_int16_t filler; @@ -118,204 +92,6 @@ struct gw_family_t union g_addr gate; }; -extern struct zebra_privs_t zserv_privs; - -extern u_int32_t nl_rcvbufsize; - -/* Note: on netlink systems, there should be a 1-to-1 mapping between interface - names and ifindex values. */ -static void -set_ifindex(struct interface *ifp, ifindex_t ifi_index, struct zebra_ns *zns) -{ - struct interface *oifp; - - if (((oifp = if_lookup_by_index_per_ns (zns, ifi_index)) != NULL) && (oifp != ifp)) - { - if (ifi_index == IFINDEX_INTERNAL) - zlog_err("Netlink is setting interface %s ifindex to reserved " - "internal value %u", ifp->name, ifi_index); - else - { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug("interface index %d was renamed from %s to %s", - ifi_index, oifp->name, ifp->name); - if (if_is_up(oifp)) - zlog_err("interface rename detected on up interface: index %d " - "was renamed from %s to %s, results are uncertain!", - ifi_index, oifp->name, ifp->name); - if_delete_update(oifp); - } - } - ifp->ifindex = ifi_index; -} - -#ifndef SO_RCVBUFFORCE -#define SO_RCVBUFFORCE (33) -#endif - -static int -netlink_recvbuf (struct nlsock *nl, uint32_t newsize) -{ - u_int32_t oldsize; - socklen_t newlen = sizeof(newsize); - socklen_t oldlen = sizeof(oldsize); - int ret; - - ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen); - if (ret < 0) - { - zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name, - safe_strerror (errno)); - return -1; - } - - /* Try force option (linux >= 2.6.14) and fall back to normal set */ - if ( zserv_privs.change (ZPRIVS_RAISE) ) - zlog_err ("routing_socket: Can't raise privileges"); - ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, &nl_rcvbufsize, - sizeof(nl_rcvbufsize)); - if ( zserv_privs.change (ZPRIVS_LOWER) ) - zlog_err ("routing_socket: Can't lower privileges"); - if (ret < 0) - ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &nl_rcvbufsize, - sizeof(nl_rcvbufsize)); - if (ret < 0) - { - zlog (NULL, LOG_ERR, "Can't set %s receive buffer size: %s", nl->name, - safe_strerror (errno)); - return -1; - } - - ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen); - if (ret < 0) - { - zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name, - safe_strerror (errno)); - return -1; - } - - zlog (NULL, LOG_INFO, - "Setting netlink socket receive buffer size: %u -> %u", - oldsize, newsize); - return 0; -} - -/* Make socket for Linux netlink interface. */ -static int -netlink_socket (struct nlsock *nl, unsigned long groups, ns_id_t ns_id) -{ - int ret; - struct sockaddr_nl snl; - int sock; - int namelen; - int save_errno; - - if (zserv_privs.change (ZPRIVS_RAISE)) - { - zlog (NULL, LOG_ERR, "Can't raise privileges"); - return -1; - } - - sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (sock < 0) - { - zlog (NULL, LOG_ERR, "Can't open %s socket: %s", nl->name, - safe_strerror (errno)); - return -1; - } - - memset (&snl, 0, sizeof snl); - snl.nl_family = AF_NETLINK; - snl.nl_groups = groups; - - /* Bind the socket to the netlink structure for anything. */ - ret = bind (sock, (struct sockaddr *) &snl, sizeof snl); - save_errno = errno; - if (zserv_privs.change (ZPRIVS_LOWER)) - zlog (NULL, LOG_ERR, "Can't lower privileges"); - - if (ret < 0) - { - zlog (NULL, LOG_ERR, "Can't bind %s socket to group 0x%x: %s", - nl->name, snl.nl_groups, safe_strerror (save_errno)); - close (sock); - return -1; - } - - /* multiple netlink sockets will have different nl_pid */ - namelen = sizeof snl; - ret = getsockname (sock, (struct sockaddr *) &snl, (socklen_t *) &namelen); - if (ret < 0 || namelen != sizeof snl) - { - zlog (NULL, LOG_ERR, "Can't get %s socket name: %s", nl->name, - safe_strerror (errno)); - close (sock); - return -1; - } - - nl->snl = snl; - nl->sock = sock; - return ret; -} - -/* Get type specified information from netlink. */ -static int -netlink_request (int family, int type, struct nlsock *nl) -{ - int ret; - struct sockaddr_nl snl; - int save_errno; - - struct - { - struct nlmsghdr nlh; - struct rtgenmsg g; - } req; - - /* Check netlink socket. */ - if (nl->sock < 0) - { - zlog (NULL, LOG_ERR, "%s socket isn't active.", nl->name); - return -1; - } - - memset (&snl, 0, sizeof snl); - snl.nl_family = AF_NETLINK; - - memset (&req, 0, sizeof req); - req.nlh.nlmsg_len = sizeof req; - req.nlh.nlmsg_type = type; - req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; - req.nlh.nlmsg_pid = nl->snl.nl_pid; - req.nlh.nlmsg_seq = ++nl->seq; - req.g.rtgen_family = family; - - /* linux appears to check capabilities on every message - * have to raise caps for every message sent - */ - if (zserv_privs.change (ZPRIVS_RAISE)) - { - zlog (NULL, LOG_ERR, "Can't raise privileges"); - return -1; - } - - ret = sendto (nl->sock, (void *) &req, sizeof req, 0, - (struct sockaddr *) &snl, sizeof snl); - save_errno = errno; - - if (zserv_privs.change (ZPRIVS_LOWER)) - zlog (NULL, LOG_ERR, "Can't lower privileges"); - - if (ret < 0) - { - zlog (NULL, LOG_ERR, "%s sendto failed: %s", nl->name, - safe_strerror (save_errno)); - return -1; - } - - return 0; -} - /* Pending: create an efficient table_id (in a tree/hash) based lookup) */ @@ -337,634 +113,6 @@ vrf_lookup_by_table (u_int32_t table_id) return VRF_DEFAULT; } -/* Receive message from netlink interface and pass those information - to the given function. */ -static int -netlink_parse_info (int (*filter) (struct sockaddr_nl *, struct nlmsghdr *, - ns_id_t), - struct nlsock *nl, struct zebra_ns *zns, int count) -{ - int status; - int ret = 0; - int error; - int read_in = 0; - - while (1) - { - char buf[NL_PKT_BUF_SIZE]; - struct iovec iov = { - .iov_base = buf, - .iov_len = sizeof buf - }; - struct sockaddr_nl snl; - struct msghdr msg = { - .msg_name = (void *) &snl, - .msg_namelen = sizeof snl, - .msg_iov = &iov, - .msg_iovlen = 1 - }; - struct nlmsghdr *h; - - if (count && read_in >= count) - return 0; - - status = recvmsg (nl->sock, &msg, 0); - if (status < 0) - { - if (errno == EINTR) - continue; - if (errno == EWOULDBLOCK || errno == EAGAIN) - break; - zlog (NULL, LOG_ERR, "%s recvmsg overrun: %s", - nl->name, safe_strerror(errno)); - /* - * In this case we are screwed. - * There is no good way to - * recover zebra at this point. - */ - exit (-1); - continue; - } - - if (status == 0) - { - zlog (NULL, LOG_ERR, "%s EOF", nl->name); - return -1; - } - - if (msg.msg_namelen != sizeof snl) - { - zlog (NULL, LOG_ERR, "%s sender address length error: length %d", - nl->name, msg.msg_namelen); - return -1; - } - - if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) - { - zlog_debug("%s: << netlink message dump [recv]", __func__); - zlog_hexdump(&msg, sizeof(msg)); - } - - read_in++; - for (h = (struct nlmsghdr *) buf; NLMSG_OK (h, (unsigned int) status); - h = NLMSG_NEXT (h, status)) - { - /* Finish of reading. */ - if (h->nlmsg_type == NLMSG_DONE) - return ret; - - /* Error handling. */ - if (h->nlmsg_type == NLMSG_ERROR) - { - struct nlmsgerr *err = (struct nlmsgerr *) NLMSG_DATA (h); - int errnum = err->error; - int msg_type = err->msg.nlmsg_type; - - /* If the error field is zero, then this is an ACK */ - if (err->error == 0) - { - if (IS_ZEBRA_DEBUG_KERNEL) - { - zlog_debug ("%s: %s ACK: type=%s(%u), seq=%u, pid=%u", - __FUNCTION__, nl->name, - lookup (nlmsg_str, err->msg.nlmsg_type), - err->msg.nlmsg_type, err->msg.nlmsg_seq, - err->msg.nlmsg_pid); - } - - /* return if not a multipart message, otherwise continue */ - if (!(h->nlmsg_flags & NLM_F_MULTI)) - return 0; - continue; - } - - if (h->nlmsg_len < NLMSG_LENGTH (sizeof (struct nlmsgerr))) - { - zlog (NULL, LOG_ERR, "%s error: message truncated", - nl->name); - return -1; - } - - /* Deal with errors that occur because of races in link handling */ - if (nl == &zns->netlink_cmd - && ((msg_type == RTM_DELROUTE && - (-errnum == ENODEV || -errnum == ESRCH)) - || (msg_type == RTM_NEWROUTE && -errnum == EEXIST))) - { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("%s: error: %s type=%s(%u), seq=%u, pid=%u", - nl->name, safe_strerror (-errnum), - lookup (nlmsg_str, msg_type), - msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); - return 0; - } - - /* We see RTM_DELNEIGH when shutting down an interface with an IPv4 - * link-local. The kernel should have already deleted the neighbor - * so do not log these as an error. - */ - if (msg_type == RTM_DELNEIGH || - (nl == &zns->netlink_cmd && msg_type == RTM_NEWROUTE && - (-errnum == ESRCH || -errnum == ENETUNREACH))) - { - /* This is known to happen in some situations, don't log - * as error. - */ - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("%s error: %s, type=%s(%u), seq=%u, pid=%u", - nl->name, safe_strerror (-errnum), - lookup (nlmsg_str, msg_type), - msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); - } - else - zlog_err ("%s error: %s, type=%s(%u), seq=%u, pid=%u", - nl->name, safe_strerror (-errnum), - lookup (nlmsg_str, msg_type), - msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); - - return -1; - } - - /* OK we got netlink message. */ - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("netlink_parse_info: %s type %s(%u), len=%d, seq=%u, pid=%u", - nl->name, - lookup (nlmsg_str, h->nlmsg_type), h->nlmsg_type, - h->nlmsg_len, h->nlmsg_seq, h->nlmsg_pid); - - /* skip unsolicited messages originating from command socket - * linux sets the originators port-id for {NEW|DEL}ADDR messages, - * so this has to be checked here. */ - if (nl != &zns->netlink_cmd - && h->nlmsg_pid == zns->netlink_cmd.snl.nl_pid - && (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)) - { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("netlink_parse_info: %s packet comes from %s", - zns->netlink_cmd.name, nl->name); - continue; - } - - error = (*filter) (&snl, h, zns->ns_id); - if (error < 0) - { - zlog (NULL, LOG_ERR, "%s filter function error", nl->name); - ret = error; - } - } - - /* After error care. */ - if (msg.msg_flags & MSG_TRUNC) - { - zlog (NULL, LOG_ERR, "%s error: message truncated", nl->name); - continue; - } - if (status) - { - zlog (NULL, LOG_ERR, "%s error: data remnant size %d", nl->name, - status); - return -1; - } - } - return ret; -} - -/* Utility function for parse rtattr. */ -static void -netlink_parse_rtattr (struct rtattr **tb, int max, struct rtattr *rta, - int len) -{ - while (RTA_OK (rta, len)) - { - if (rta->rta_type <= max) - tb[rta->rta_type] = rta; - rta = RTA_NEXT (rta, len); - } -} - -/* Utility function to parse hardware link-layer address and update ifp */ -static void -netlink_interface_update_hw_addr (struct rtattr **tb, struct interface *ifp) -{ - int i; - - if (tb[IFLA_ADDRESS]) - { - int hw_addr_len; - - hw_addr_len = RTA_PAYLOAD (tb[IFLA_ADDRESS]); - - if (hw_addr_len > INTERFACE_HWADDR_MAX) - zlog_warn ("Hardware address is too large: %d", hw_addr_len); - else - { - ifp->hw_addr_len = hw_addr_len; - memcpy (ifp->hw_addr, RTA_DATA (tb[IFLA_ADDRESS]), hw_addr_len); - - for (i = 0; i < hw_addr_len; i++) - if (ifp->hw_addr[i] != 0) - break; - - if (i == hw_addr_len) - ifp->hw_addr_len = 0; - else - ifp->hw_addr_len = hw_addr_len; - } - } -} - -static enum zebra_link_type -netlink_to_zebra_link_type (unsigned int hwt) -{ - switch (hwt) - { - case ARPHRD_ETHER: return ZEBRA_LLT_ETHER; - case ARPHRD_EETHER: return ZEBRA_LLT_EETHER; - case ARPHRD_AX25: return ZEBRA_LLT_AX25; - case ARPHRD_PRONET: return ZEBRA_LLT_PRONET; - case ARPHRD_IEEE802: return ZEBRA_LLT_IEEE802; - case ARPHRD_ARCNET: return ZEBRA_LLT_ARCNET; - case ARPHRD_APPLETLK: return ZEBRA_LLT_APPLETLK; - case ARPHRD_DLCI: return ZEBRA_LLT_DLCI; - case ARPHRD_ATM: return ZEBRA_LLT_ATM; - case ARPHRD_METRICOM: return ZEBRA_LLT_METRICOM; - case ARPHRD_IEEE1394: return ZEBRA_LLT_IEEE1394; - case ARPHRD_EUI64: return ZEBRA_LLT_EUI64; - case ARPHRD_INFINIBAND: return ZEBRA_LLT_INFINIBAND; - case ARPHRD_SLIP: return ZEBRA_LLT_SLIP; - case ARPHRD_CSLIP: return ZEBRA_LLT_CSLIP; - case ARPHRD_SLIP6: return ZEBRA_LLT_SLIP6; - case ARPHRD_CSLIP6: return ZEBRA_LLT_CSLIP6; - case ARPHRD_RSRVD: return ZEBRA_LLT_RSRVD; - case ARPHRD_ADAPT: return ZEBRA_LLT_ADAPT; - case ARPHRD_ROSE: return ZEBRA_LLT_ROSE; - case ARPHRD_X25: return ZEBRA_LLT_X25; - case ARPHRD_PPP: return ZEBRA_LLT_PPP; - case ARPHRD_CISCO: return ZEBRA_LLT_CHDLC; - case ARPHRD_LAPB: return ZEBRA_LLT_LAPB; - case ARPHRD_RAWHDLC: return ZEBRA_LLT_RAWHDLC; - case ARPHRD_TUNNEL: return ZEBRA_LLT_IPIP; - case ARPHRD_TUNNEL6: return ZEBRA_LLT_IPIP6; - case ARPHRD_FRAD: return ZEBRA_LLT_FRAD; - case ARPHRD_SKIP: return ZEBRA_LLT_SKIP; - case ARPHRD_LOOPBACK: return ZEBRA_LLT_LOOPBACK; - case ARPHRD_LOCALTLK: return ZEBRA_LLT_LOCALTLK; - case ARPHRD_FDDI: return ZEBRA_LLT_FDDI; - case ARPHRD_SIT: return ZEBRA_LLT_SIT; - case ARPHRD_IPDDP: return ZEBRA_LLT_IPDDP; - case ARPHRD_IPGRE: return ZEBRA_LLT_IPGRE; - case ARPHRD_PIMREG: return ZEBRA_LLT_PIMREG; - case ARPHRD_HIPPI: return ZEBRA_LLT_HIPPI; - case ARPHRD_ECONET: return ZEBRA_LLT_ECONET; - case ARPHRD_IRDA: return ZEBRA_LLT_IRDA; - case ARPHRD_FCPP: return ZEBRA_LLT_FCPP; - case ARPHRD_FCAL: return ZEBRA_LLT_FCAL; - case ARPHRD_FCPL: return ZEBRA_LLT_FCPL; - case ARPHRD_FCFABRIC: return ZEBRA_LLT_FCFABRIC; - case ARPHRD_IEEE802_TR: return ZEBRA_LLT_IEEE802_TR; - case ARPHRD_IEEE80211: return ZEBRA_LLT_IEEE80211; - case ARPHRD_IEEE802154: return ZEBRA_LLT_IEEE802154; -#ifdef ARPHRD_IP6GRE - case ARPHRD_IP6GRE: return ZEBRA_LLT_IP6GRE; -#endif -#ifdef ARPHRD_IEEE802154_PHY - case ARPHRD_IEEE802154_PHY: return ZEBRA_LLT_IEEE802154_PHY; -#endif - - default: return ZEBRA_LLT_UNKNOWN; - } -} - -#define parse_rtattr_nested(tb, max, rta) \ - netlink_parse_rtattr((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta)) - -static void -netlink_vrf_change (struct nlmsghdr *h, struct rtattr *tb, const char *name) -{ - struct ifinfomsg *ifi; - struct rtattr *linkinfo[IFLA_INFO_MAX+1]; - struct rtattr *attr[IFLA_VRF_MAX+1]; - struct vrf *vrf; - struct zebra_vrf *zvrf; - u_int32_t nl_table_id; - - ifi = NLMSG_DATA (h); - - memset (linkinfo, 0, sizeof linkinfo); - parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb); - - if (!linkinfo[IFLA_INFO_DATA]) { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("%s: IFLA_INFO_DATA missing from VRF message: %s", __func__, name); - return; - } - - memset (attr, 0, sizeof attr); - parse_rtattr_nested(attr, IFLA_VRF_MAX, linkinfo[IFLA_INFO_DATA]); - if (!attr[IFLA_VRF_TABLE]) { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("%s: IFLA_VRF_TABLE missing from VRF message: %s", __func__, name); - return; - } - - nl_table_id = *(u_int32_t *)RTA_DATA(attr[IFLA_VRF_TABLE]); - - if (h->nlmsg_type == RTM_NEWLINK) - { - /* If VRF already exists, we just return; status changes are handled - * against the VRF "interface". - */ - vrf = vrf_lookup ((vrf_id_t)ifi->ifi_index); - if (vrf && vrf->info) - return; - - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("RTM_NEWLINK for VRF %s(%u) table %u", - name, ifi->ifi_index, nl_table_id); - - /* - * vrf_get is implied creation if it does not exist - */ - vrf = vrf_get((vrf_id_t)ifi->ifi_index, name); // It would create vrf - if (!vrf) - { - zlog_err ("VRF %s id %u not created", name, ifi->ifi_index); - return; - } - - /* Enable the created VRF. */ - if (!vrf_enable (vrf)) - { - zlog_err ("Failed to enable VRF %s id %u", name, ifi->ifi_index); - return; - } - - /* - * This is the only place that we get the actual kernel table_id - * being used. We need it to set the table_id of the routes - * we are passing to the kernel.... And to throw some totally - * awesome parties. that too. - */ - zvrf = (struct zebra_vrf *)vrf->info; - zvrf->table_id = nl_table_id; - } - else //h->nlmsg_type == RTM_DELLINK - { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("RTM_DELLINK for VRF %s(%u)", name, ifi->ifi_index); - - vrf = vrf_lookup ((vrf_id_t)ifi->ifi_index); - - if (!vrf) - { - zlog_warn ("%s: vrf not found", __func__); - return; - } - - vrf_delete (vrf); - } -} - -/* Called from interface_lookup_netlink(). This function is only used - during bootstrap. */ -static int -netlink_interface (struct sockaddr_nl *snl, struct nlmsghdr *h, - ns_id_t ns_id) -{ - int len; - struct ifinfomsg *ifi; - struct rtattr *tb[IFLA_MAX + 1]; - struct rtattr *linkinfo[IFLA_MAX + 1]; - struct interface *ifp; - char *name = NULL; - char *kind = NULL; - char *slave_kind = NULL; - int vrf_device = 0; - struct zebra_ns *zns; - vrf_id_t vrf_id = VRF_DEFAULT; - - zns = zebra_ns_lookup (ns_id); - ifi = NLMSG_DATA (h); - - if (h->nlmsg_type != RTM_NEWLINK) - return 0; - - len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg)); - if (len < 0) - return -1; - - if (ifi->ifi_family == AF_BRIDGE) - return 0; - - /* Looking up interface name. */ - memset (tb, 0, sizeof tb); - netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len); - -#ifdef IFLA_WIRELESS - /* check for wireless messages to ignore */ - if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0)) - { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__); - return 0; - } -#endif /* IFLA_WIRELESS */ - - if (tb[IFLA_IFNAME] == NULL) - return -1; - name = (char *) RTA_DATA (tb[IFLA_IFNAME]); - - if (tb[IFLA_LINKINFO]) - { - memset (linkinfo, 0, sizeof linkinfo); - parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); - - if (linkinfo[IFLA_INFO_KIND]) - kind = RTA_DATA(linkinfo[IFLA_INFO_KIND]); - -#if HAVE_DECL_IFLA_INFO_SLAVE_KIND - if (linkinfo[IFLA_INFO_SLAVE_KIND]) - slave_kind = RTA_DATA(linkinfo[IFLA_INFO_SLAVE_KIND]); -#endif - - if (kind && strcmp(kind, "vrf") == 0) - { - vrf_device = 1; - netlink_vrf_change(h, tb[IFLA_LINKINFO], name); - vrf_id = (vrf_id_t)ifi->ifi_index; - } - } - - if (tb[IFLA_MASTER]) - { - if (slave_kind && (strcmp(slave_kind, "vrf") == 0)) - vrf_id = *(u_int32_t *)RTA_DATA(tb[IFLA_MASTER]); - } - - /* Add interface. */ - ifp = if_get_by_name_vrf (name, vrf_id); - set_ifindex(ifp, ifi->ifi_index, zns); - ifp->flags = ifi->ifi_flags & 0x0000fffff; - if (vrf_device) - SET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); - ifp->mtu6 = ifp->mtu = *(uint32_t *) RTA_DATA (tb[IFLA_MTU]); - ifp->metric = 0; - ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; - - /* Hardware type and address. */ - ifp->ll_type = netlink_to_zebra_link_type (ifi->ifi_type); - netlink_interface_update_hw_addr (tb, ifp); - - if_add_update (ifp); - - return 0; -} - -/* Lookup interface IPv4/IPv6 address. */ -static int -netlink_interface_addr (struct sockaddr_nl *snl, struct nlmsghdr *h, - ns_id_t ns_id) -{ - int len; - struct ifaddrmsg *ifa; - struct rtattr *tb[IFA_MAX + 1]; - struct interface *ifp; - void *addr; - void *broad; - u_char flags = 0; - char *label = NULL; - struct zebra_ns *zns; - - zns = zebra_ns_lookup (ns_id); - ifa = NLMSG_DATA (h); - - if (ifa->ifa_family != AF_INET -#ifdef HAVE_IPV6 - && ifa->ifa_family != AF_INET6 -#endif /* HAVE_IPV6 */ - ) - return 0; - - if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR) - return 0; - - len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifaddrmsg)); - if (len < 0) - return -1; - - memset (tb, 0, sizeof tb); - netlink_parse_rtattr (tb, IFA_MAX, IFA_RTA (ifa), len); - - ifp = if_lookup_by_index_per_ns (zns, ifa->ifa_index); - if (ifp == NULL) - { - zlog_err ("netlink_interface_addr can't find interface by index %d", - ifa->ifa_index); - return -1; - } - - if (IS_ZEBRA_DEBUG_KERNEL) /* remove this line to see initial ifcfg */ - { - char buf[BUFSIZ]; - zlog_debug ("netlink_interface_addr %s %s flags 0x%x:", - lookup (nlmsg_str, h->nlmsg_type), ifp->name, - ifa->ifa_flags); - if (tb[IFA_LOCAL]) - zlog_debug (" IFA_LOCAL %s/%d", - inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_LOCAL]), - buf, BUFSIZ), ifa->ifa_prefixlen); - if (tb[IFA_ADDRESS]) - zlog_debug (" IFA_ADDRESS %s/%d", - inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_ADDRESS]), - buf, BUFSIZ), ifa->ifa_prefixlen); - if (tb[IFA_BROADCAST]) - zlog_debug (" IFA_BROADCAST %s/%d", - inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_BROADCAST]), - buf, BUFSIZ), ifa->ifa_prefixlen); - if (tb[IFA_LABEL] && strcmp (ifp->name, RTA_DATA (tb[IFA_LABEL]))) - zlog_debug (" IFA_LABEL %s", (char *)RTA_DATA (tb[IFA_LABEL])); - - if (tb[IFA_CACHEINFO]) - { - struct ifa_cacheinfo *ci = RTA_DATA (tb[IFA_CACHEINFO]); - zlog_debug (" IFA_CACHEINFO pref %d, valid %d", - ci->ifa_prefered, ci->ifa_valid); - } - } - - /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */ - if (tb[IFA_LOCAL] == NULL) - tb[IFA_LOCAL] = tb[IFA_ADDRESS]; - if (tb[IFA_ADDRESS] == NULL) - tb[IFA_ADDRESS] = tb[IFA_LOCAL]; - - /* local interface address */ - addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL); - - /* is there a peer address? */ - if (tb[IFA_ADDRESS] && - memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_ADDRESS]))) - { - broad = RTA_DATA(tb[IFA_ADDRESS]); - SET_FLAG (flags, ZEBRA_IFA_PEER); - } - else - /* seeking a broadcast address */ - broad = (tb[IFA_BROADCAST] ? RTA_DATA(tb[IFA_BROADCAST]) : NULL); - - /* addr is primary key, SOL if we don't have one */ - if (addr == NULL) - { - zlog_debug ("%s: NULL address", __func__); - return -1; - } - - /* Flags. */ - if (ifa->ifa_flags & IFA_F_SECONDARY) - SET_FLAG (flags, ZEBRA_IFA_SECONDARY); - - /* Label */ - if (tb[IFA_LABEL]) - label = (char *) RTA_DATA (tb[IFA_LABEL]); - - if (ifp && label && strcmp (ifp->name, label) == 0) - label = NULL; - - /* Register interface address to the interface. */ - if (ifa->ifa_family == AF_INET) - { - if (h->nlmsg_type == RTM_NEWADDR) - connected_add_ipv4 (ifp, flags, - (struct in_addr *) addr, ifa->ifa_prefixlen, - (struct in_addr *) broad, label); - else - connected_delete_ipv4 (ifp, flags, - (struct in_addr *) addr, ifa->ifa_prefixlen, - (struct in_addr *) broad); - } -#ifdef HAVE_IPV6 - if (ifa->ifa_family == AF_INET6) - { - if (h->nlmsg_type == RTM_NEWADDR) - { - /* Only consider valid addresses; we'll not get a notification from - * the kernel till IPv6 DAD has completed, but at init time, Quagga - * does query for and will receive all addresses. - */ - if (!(ifa->ifa_flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))) - connected_add_ipv6 (ifp, flags, (struct in6_addr *) addr, - ifa->ifa_prefixlen, (struct in6_addr *) broad, label); - } - else - connected_delete_ipv6 (ifp, - (struct in6_addr *) addr, ifa->ifa_prefixlen, - (struct in6_addr *) broad); - } -#endif /* HAVE_IPV6 */ - - return 0; -} - /* Looking up routing table by netlink interface. */ static int netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h, @@ -1154,23 +302,8 @@ netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h, return 0; } -static const struct message rtproto_str[] = { - {RTPROT_REDIRECT, "redirect"}, - {RTPROT_KERNEL, "kernel"}, - {RTPROT_BOOT, "boot"}, - {RTPROT_STATIC, "static"}, - {RTPROT_GATED, "GateD"}, - {RTPROT_RA, "router advertisement"}, - {RTPROT_MRT, "MRT"}, - {RTPROT_ZEBRA, "Zebra"}, -#ifdef RTPROT_BIRD - {RTPROT_BIRD, "BIRD"}, -#endif /* RTPROT_BIRD */ - {0, NULL} -}; - /* Routing information change from the kernel. */ -static int +int netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, ns_id_t ns_id) { @@ -1208,7 +341,7 @@ netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE", rtm->rtm_family == AF_INET ? "ipv4" : "ipv6", rtm->rtm_type == RTN_UNICAST ? "unicast" : "multicast", - lookup (rtproto_str, rtm->rtm_protocol)); + nl_rtproto_to_str (rtm->rtm_protocol)); if (rtm->rtm_type != RTN_UNICAST) { @@ -1322,346 +455,91 @@ netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, { /* This is a multipath route */ - struct rib *rib; - struct rtnexthop *rtnh = - (struct rtnexthop *) RTA_DATA (tb[RTA_MULTIPATH]); - - len = RTA_PAYLOAD (tb[RTA_MULTIPATH]); - - rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); - rib->type = ZEBRA_ROUTE_KERNEL; - rib->distance = 0; - rib->flags = 0; - rib->metric = metric; - rib->mtu = mtu; - rib->vrf_id = vrf_id; - rib->table = table; - rib->nexthop_num = 0; - rib->uptime = time (NULL); - - for (;;) - { - if (len < (int) sizeof (*rtnh) || rtnh->rtnh_len > len) - break; - - index = rtnh->rtnh_ifindex; - gate = 0; - if (rtnh->rtnh_len > sizeof (*rtnh)) - { - memset (tb, 0, sizeof (tb)); - netlink_parse_rtattr (tb, RTA_MAX, RTNH_DATA (rtnh), - rtnh->rtnh_len - sizeof (*rtnh)); - if (tb[RTA_GATEWAY]) - gate = RTA_DATA (tb[RTA_GATEWAY]); - } - - if (gate) - { - if (index) - rib_nexthop_ipv4_ifindex_add (rib, gate, src, index); - else - rib_nexthop_ipv4_add (rib, gate, src); - } - else - rib_nexthop_ifindex_add (rib, index); - - len -= NLMSG_ALIGN(rtnh->rtnh_len); - rtnh = RTNH_NEXT(rtnh); - } - - zserv_nexthop_num_warn(__func__, (const struct prefix *)&p, - rib->nexthop_num); - - if (rib->nexthop_num == 0) - XFREE (MTYPE_RIB, rib); - else - rib_add_multipath (AFI_IP, SAFI_UNICAST, &p, rib); - } - } - else - rib_delete (AFI_IP, SAFI_UNICAST, vrf_id, ZEBRA_ROUTE_KERNEL, 0, zebra_flags, - &p, gate, index, table); - } - - if (rtm->rtm_family == AF_INET6) - { - struct prefix p; - - p.family = AF_INET6; - memcpy (&p.u.prefix6, dest, 16); - p.prefixlen = rtm->rtm_dst_len; - - if (IS_ZEBRA_DEBUG_KERNEL) - { - char buf[PREFIX_STRLEN]; - zlog_debug ("%s %s vrf %u", - h->nlmsg_type == RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE", - prefix2str (&p, buf, sizeof(buf)), vrf_id); - } - - if (h->nlmsg_type == RTM_NEWROUTE) - rib_add (AFI_IP6, SAFI_UNICAST, vrf_id, ZEBRA_ROUTE_KERNEL, - 0, 0, &p, gate, src, index, - table, metric, mtu, 0); - else - rib_delete (AFI_IP6, SAFI_UNICAST, vrf_id, ZEBRA_ROUTE_KERNEL, - 0, zebra_flags, &p, gate, index, table); - } - - return 0; -} - -static int -netlink_link_change (struct sockaddr_nl *snl, struct nlmsghdr *h, - ns_id_t ns_id) -{ - int len; - struct ifinfomsg *ifi; - struct rtattr *tb[IFLA_MAX + 1]; - struct rtattr *linkinfo[IFLA_MAX + 1]; - struct interface *ifp; - char *name = NULL; - char *kind = NULL; - char *slave_kind = NULL; - int vrf_device = 0; - struct zebra_ns *zns; - vrf_id_t vrf_id = VRF_DEFAULT; - - - zns = zebra_ns_lookup (ns_id); - ifi = NLMSG_DATA (h); - - if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)) - { - /* If this is not link add/delete message so print warning. */ - zlog_warn ("netlink_link_change: wrong kernel message %d", - h->nlmsg_type); - return 0; - } - - len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg)); - if (len < 0) - return -1; - - if (ifi->ifi_family == AF_BRIDGE) - return 0; - - /* Looking up interface name. */ - memset (tb, 0, sizeof tb); - netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len); - -#ifdef IFLA_WIRELESS - /* check for wireless messages to ignore */ - if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0)) - { - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__); - return 0; - } -#endif /* IFLA_WIRELESS */ - - if (tb[IFLA_IFNAME] == NULL) - return -1; - name = (char *) RTA_DATA (tb[IFLA_IFNAME]); - - if (tb[IFLA_LINKINFO]) - { - memset (linkinfo, 0, sizeof linkinfo); - parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); - - if (linkinfo[IFLA_INFO_KIND]) - kind = RTA_DATA(linkinfo[IFLA_INFO_KIND]); - -#if HAVE_DECL_IFLA_INFO_SLAVE_KIND - if (linkinfo[IFLA_INFO_SLAVE_KIND]) - slave_kind = RTA_DATA(linkinfo[IFLA_INFO_SLAVE_KIND]); -#endif - - if (kind && strcmp(kind, "vrf") == 0) - { - vrf_device = 1; - netlink_vrf_change(h, tb[IFLA_LINKINFO], name); - vrf_id = (vrf_id_t)ifi->ifi_index; - } - } - - /* See if interface is present. */ - ifp = if_lookup_by_index_per_ns (zns, ifi->ifi_index); - - if (h->nlmsg_type == RTM_NEWLINK) - { - if (tb[IFLA_MASTER]) - { - if (slave_kind && (strcmp(slave_kind, "vrf") == 0)) - vrf_id = *(u_int32_t *)RTA_DATA(tb[IFLA_MASTER]); - } - - if (ifp == NULL || !CHECK_FLAG (ifp->status, ZEBRA_INTERFACE_ACTIVE)) - { - /* Add interface notification from kernel */ - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("RTM_NEWLINK for %s(%u) (ifp %p) vrf_id %u flags 0x%x", - name, ifi->ifi_index, ifp, vrf_id, ifi->ifi_flags); - - if (ifp == NULL) - { - /* unknown interface */ - ifp = if_get_by_name_vrf (name, vrf_id); - } - else - { - /* pre-configured interface, learnt now */ - if (ifp->vrf_id != vrf_id) - if_update_vrf (ifp, name, strlen(name), vrf_id); - } - - /* Update interface information. */ - set_ifindex(ifp, ifi->ifi_index, zns); - ifp->flags = ifi->ifi_flags & 0x0000fffff; - if (vrf_device) - SET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); - ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]); - ifp->metric = 0; - ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN; - - netlink_interface_update_hw_addr (tb, ifp); - - /* Inform clients, install any configured addresses. */ - if_add_update (ifp); - } - else if (ifp->vrf_id != vrf_id) - { - /* VRF change for an interface. */ - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("RTM_NEWLINK vrf-change for %s(%u) " - "vrf_id %u -> %u flags 0x%x", - name, ifp->ifindex, ifp->vrf_id, - vrf_id, ifi->ifi_flags); - - if_handle_vrf_change (ifp, vrf_id); - } - else - { - /* Interface status change. */ - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("RTM_NEWLINK status for %s(%u) flags 0x%x", - name, ifp->ifindex, ifi->ifi_flags); + struct rib *rib; + struct rtnexthop *rtnh = + (struct rtnexthop *) RTA_DATA (tb[RTA_MULTIPATH]); - set_ifindex(ifp, ifi->ifi_index, zns); - ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]); - ifp->metric = 0; + len = RTA_PAYLOAD (tb[RTA_MULTIPATH]); - netlink_interface_update_hw_addr (tb, ifp); + rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); + rib->type = ZEBRA_ROUTE_KERNEL; + rib->distance = 0; + rib->flags = 0; + rib->metric = metric; + rib->mtu = mtu; + rib->vrf_id = vrf_id; + rib->table = table; + rib->nexthop_num = 0; + rib->uptime = time (NULL); - if (if_is_no_ptm_operative (ifp)) - { - ifp->flags = ifi->ifi_flags & 0x0000fffff; - if (!if_is_no_ptm_operative (ifp)) - if_down (ifp); - else if (if_is_operative (ifp)) - /* Must notify client daemons of new interface status. */ - zebra_interface_up_update (ifp); - } - else - { - ifp->flags = ifi->ifi_flags & 0x0000fffff; - if (if_is_operative (ifp)) - if_up (ifp); - } - } - } - else - { - /* Delete interface notification from kernel */ - if (ifp == NULL) - { - zlog_warn ("RTM_DELLINK for unknown interface %s(%u)", - name, ifi->ifi_index); - return 0; - } + for (;;) + { + if (len < (int) sizeof (*rtnh) || rtnh->rtnh_len > len) + break; - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("RTM_DELLINK for %s(%u)", name, ifp->ifindex); + index = rtnh->rtnh_ifindex; + gate = 0; + if (rtnh->rtnh_len > sizeof (*rtnh)) + { + memset (tb, 0, sizeof (tb)); + netlink_parse_rtattr (tb, RTA_MAX, RTNH_DATA (rtnh), + rtnh->rtnh_len - sizeof (*rtnh)); + if (tb[RTA_GATEWAY]) + gate = RTA_DATA (tb[RTA_GATEWAY]); + } - UNSET_FLAG(ifp->status, ZEBRA_INTERFACE_VRF_LOOPBACK); + if (gate) + { + if (index) + rib_nexthop_ipv4_ifindex_add (rib, gate, src, index); + else + rib_nexthop_ipv4_add (rib, gate, src); + } + else + rib_nexthop_ifindex_add (rib, index); - if (!vrf_device) - if_delete_update (ifp); - } + len -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } - return 0; -} + zserv_nexthop_num_warn(__func__, (const struct prefix *)&p, + rib->nexthop_num); -static int -netlink_information_fetch (struct sockaddr_nl *snl, struct nlmsghdr *h, - ns_id_t ns_id) -{ - /* JF: Ignore messages that aren't from the kernel */ - if ( snl->nl_pid != 0 ) - { - zlog ( NULL, LOG_ERR, "Ignoring message from pid %u", snl->nl_pid ); - return 0; + if (rib->nexthop_num == 0) + XFREE (MTYPE_RIB, rib); + else + rib_add_multipath (AFI_IP, SAFI_UNICAST, &p, rib); + } + } + else + rib_delete (AFI_IP, SAFI_UNICAST, vrf_id, ZEBRA_ROUTE_KERNEL, 0, zebra_flags, + &p, gate, index, table); } - switch (h->nlmsg_type) + if (rtm->rtm_family == AF_INET6) { - case RTM_NEWROUTE: - return netlink_route_change (snl, h, ns_id); - break; - case RTM_DELROUTE: - return netlink_route_change (snl, h, ns_id); - break; - case RTM_NEWLINK: - return netlink_link_change (snl, h, ns_id); - break; - case RTM_DELLINK: - return netlink_link_change (snl, h, ns_id); - break; - case RTM_NEWADDR: - return netlink_interface_addr (snl, h, ns_id); - break; - case RTM_DELADDR: - return netlink_interface_addr (snl, h, ns_id); - break; - default: - zlog_warn ("Unknown netlink nlmsg_type %d vrf %u\n", h->nlmsg_type, - ns_id); - break; - } - return 0; -} - -/* Interface lookup by netlink socket. */ -int -interface_lookup_netlink (struct zebra_ns *zns) -{ - int ret; + struct prefix p; - /* Get interface information. */ - ret = netlink_request (AF_PACKET, RTM_GETLINK, &zns->netlink_cmd); - if (ret < 0) - return ret; - ret = netlink_parse_info (netlink_interface, &zns->netlink_cmd, zns, 0); - if (ret < 0) - return ret; + p.family = AF_INET6; + memcpy (&p.u.prefix6, dest, 16); + p.prefixlen = rtm->rtm_dst_len; - /* Get IPv4 address of the interfaces. */ - ret = netlink_request (AF_INET, RTM_GETADDR, &zns->netlink_cmd); - if (ret < 0) - return ret; - ret = netlink_parse_info (netlink_interface_addr, &zns->netlink_cmd, zns, 0); - if (ret < 0) - return ret; + if (IS_ZEBRA_DEBUG_KERNEL) + { + char buf[PREFIX_STRLEN]; + zlog_debug ("%s %s vrf %u", + h->nlmsg_type == RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE", + prefix2str (&p, buf, sizeof(buf)), vrf_id); + } -#ifdef HAVE_IPV6 - /* Get IPv6 address of the interfaces. */ - ret = netlink_request (AF_INET6, RTM_GETADDR, &zns->netlink_cmd); - if (ret < 0) - return ret; - ret = netlink_parse_info (netlink_interface_addr, &zns->netlink_cmd, zns, 0); - if (ret < 0) - return ret; -#endif /* HAVE_IPV6 */ + if (h->nlmsg_type == RTM_NEWROUTE) + rib_add (AFI_IP6, SAFI_UNICAST, vrf_id, ZEBRA_ROUTE_KERNEL, + 0, 0, &p, gate, src, index, + table, metric, mtu, 0); + else + rib_delete (AFI_IP6, SAFI_UNICAST, vrf_id, ZEBRA_ROUTE_KERNEL, + 0, zebra_flags, &p, gate, index, table); + } return 0; } @@ -1694,160 +572,6 @@ netlink_route_read (struct zebra_ns *zns) return 0; } -/* Utility function comes from iproute2. - Authors: Alexey Kuznetsov, */ -int -addattr_l (struct nlmsghdr *n, unsigned int maxlen, int type, void *data, int alen) -{ - int len; - struct rtattr *rta; - - len = RTA_LENGTH (alen); - - if (NLMSG_ALIGN (n->nlmsg_len) + RTA_ALIGN (len) > maxlen) - return -1; - - rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len)); - rta->rta_type = type; - rta->rta_len = len; - memcpy (RTA_DATA (rta), data, alen); - n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + RTA_ALIGN (len); - - return 0; -} - -int -rta_addattr_l (struct rtattr *rta, unsigned int maxlen, int type, - void *data, int alen) -{ - unsigned int len; - struct rtattr *subrta; - - len = RTA_LENGTH (alen); - - if (RTA_ALIGN (rta->rta_len) + RTA_ALIGN (len) > maxlen) - return -1; - - subrta = (struct rtattr *) (((char *) rta) + RTA_ALIGN (rta->rta_len)); - subrta->rta_type = type; - subrta->rta_len = len; - memcpy (RTA_DATA (subrta), data, alen); - rta->rta_len = NLMSG_ALIGN (rta->rta_len) + RTA_ALIGN (len); - - return 0; -} - -/* Utility function comes from iproute2. - Authors: Alexey Kuznetsov, */ -int -addattr32 (struct nlmsghdr *n, unsigned int maxlen, int type, int data) -{ - return addattr_l(n, maxlen, type, &data, sizeof(u_int32_t)); -} - -/* Some more utility functions from iproute2 */ -static struct rtattr * -addattr_nest(struct nlmsghdr *n, int maxlen, int type) -{ - struct rtattr *nest = NLMSG_TAIL(n); - - addattr_l(n, maxlen, type, NULL, 0); - return nest; -} - -static int -addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) -{ - nest->rta_len = (u_char *)NLMSG_TAIL(n) - (u_char *)nest; - return n->nlmsg_len; -} - -static struct rtattr * -rta_nest(struct rtattr *rta, int maxlen, int type) -{ - struct rtattr *nest = RTA_TAIL(rta); - - rta_addattr_l(rta, maxlen, type, NULL, 0); - return nest; -} - -static int -rta_nest_end(struct rtattr *rta, struct rtattr *nest) -{ - nest->rta_len = (u_char *)RTA_TAIL(rta) - (u_char *)nest; - return rta->rta_len; -} - -static int -netlink_talk_filter (struct sockaddr_nl *snl, struct nlmsghdr *h, - ns_id_t ns_id) -{ - zlog_warn ("netlink_talk: ignoring message type 0x%04x NS %u", h->nlmsg_type, - ns_id); - return 0; -} - -/* sendmsg() to netlink socket then recvmsg(). */ -static int -netlink_talk (struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns) -{ - int status; - struct sockaddr_nl snl; - struct iovec iov = { - .iov_base = (void *) n, - .iov_len = n->nlmsg_len - }; - struct msghdr msg = { - .msg_name = (void *) &snl, - .msg_namelen = sizeof snl, - .msg_iov = &iov, - .msg_iovlen = 1, - }; - int save_errno; - - memset (&snl, 0, sizeof snl); - snl.nl_family = AF_NETLINK; - - n->nlmsg_seq = ++nl->seq; - - /* Request an acknowledgement by setting NLM_F_ACK */ - n->nlmsg_flags |= NLM_F_ACK; - - if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug ("netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x", - nl->name, - lookup (nlmsg_str, n->nlmsg_type), n->nlmsg_type, - n->nlmsg_len, n->nlmsg_seq, n->nlmsg_flags); - - /* Send message to netlink interface. */ - if (zserv_privs.change (ZPRIVS_RAISE)) - zlog (NULL, LOG_ERR, "Can't raise privileges"); - status = sendmsg (nl->sock, &msg, 0); - save_errno = errno; - if (zserv_privs.change (ZPRIVS_LOWER)) - zlog (NULL, LOG_ERR, "Can't lower privileges"); - - if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) - { - zlog_debug("%s: >> netlink message dump [sent]", __func__); - zlog_hexdump(&msg, sizeof(msg)); - } - - if (status < 0) - { - zlog (NULL, LOG_ERR, "netlink_talk sendmsg() error: %s", - safe_strerror (save_errno)); - return -1; - } - - - /* - * Get reply from netlink socket. - * The reply should either be an acknowlegement or an error. - */ - return netlink_parse_info (netlink_talk_filter, nl, zns, 0); -} - static void _netlink_route_nl_add_gateway_info (u_char route_family, u_char gw_family, struct nlmsghdr *nlmsg, @@ -2344,7 +1068,7 @@ _netlink_route_debug( char buf[PREFIX_STRLEN]; zlog_debug ("netlink_route_multipath() (%s): %s %s vrf %u type %s", routedesc, - lookup (nlmsg_str, cmd), + nl_msg_type_to_str (cmd), prefix2str (p, buf, sizeof(buf)), zvrf->vrf_id, (nexthop) ? nexthop_type_to_str (nexthop->type) : "UNK"); } @@ -2358,7 +1082,7 @@ _netlink_mpls_debug( { if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug ("netlink_mpls_multipath() (%s): %s %u/20", - routedesc, lookup (nlmsg_str, cmd), label); + routedesc, nl_msg_type_to_str (cmd), label); } static int @@ -2730,70 +1454,6 @@ kernel_delete_ipv6 (struct prefix *p, struct rib *rib) } #endif /* HAVE_IPV6 */ -/* Interface address modification. */ -static int -netlink_address (int cmd, int family, struct interface *ifp, - struct connected *ifc) -{ - int bytelen; - struct prefix *p; - - struct - { - struct nlmsghdr n; - struct ifaddrmsg ifa; - char buf[NL_PKT_BUF_SIZE]; - } req; - - struct zebra_ns *zns = zebra_ns_lookup (NS_DEFAULT); - - p = ifc->address; - memset (&req, 0, sizeof req - NL_PKT_BUF_SIZE); - - bytelen = (family == AF_INET ? 4 : 16); - - req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifaddrmsg)); - req.n.nlmsg_flags = NLM_F_REQUEST; - req.n.nlmsg_type = cmd; - req.ifa.ifa_family = family; - - req.ifa.ifa_index = ifp->ifindex; - req.ifa.ifa_prefixlen = p->prefixlen; - - addattr_l (&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen); - - if (family == AF_INET && cmd == RTM_NEWADDR) - { - if (!CONNECTED_PEER(ifc) && ifc->destination) - { - p = ifc->destination; - addattr_l (&req.n, sizeof req, IFA_BROADCAST, &p->u.prefix, - bytelen); - } - } - - if (CHECK_FLAG (ifc->flags, ZEBRA_IFA_SECONDARY)) - SET_FLAG (req.ifa.ifa_flags, IFA_F_SECONDARY); - - if (ifc->label) - addattr_l (&req.n, sizeof req, IFA_LABEL, ifc->label, - strlen (ifc->label) + 1); - - return netlink_talk (&req.n, &zns->netlink_cmd, zns); -} - -int -kernel_address_add_ipv4 (struct interface *ifp, struct connected *ifc) -{ - return netlink_address (RTM_NEWADDR, AF_INET, ifp, ifc); -} - -int -kernel_address_delete_ipv4 (struct interface *ifp, struct connected *ifc) -{ - return netlink_address (RTM_DELADDR, AF_INET, ifp, ifc); -} - int kernel_neigh_update (int add, int ifindex, uint32_t addr, char *lla, int llalen) { @@ -2992,116 +1652,3 @@ clear_nhlfe_installed (zebra_lsp_t *lsp) UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); } } - -extern struct thread_master *master; - -/* Kernel route reflection. */ -static int -kernel_read (struct thread *thread) -{ - struct zebra_ns *zns = (struct zebra_ns *)THREAD_ARG (thread); - netlink_parse_info (netlink_information_fetch, &zns->netlink, zns, 5); - zns->t_netlink = thread_add_read (zebrad.master, kernel_read, zns, - zns->netlink.sock); - - return 0; -} - -/* Filter out messages from self that occur on listener socket, - caused by our actions on the command socket - */ -static void netlink_install_filter (int sock, __u32 pid) -{ - struct sock_filter filter[] = { - /* 0: ldh [4] */ - BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)), - /* 1: jeq 0x18 jt 3 jf 6 */ - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 1, 0), - /* 2: jeq 0x19 jt 3 jf 6 */ - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_DELROUTE), 0, 3), - /* 3: ldw [12] */ - BPF_STMT(BPF_LD|BPF_ABS|BPF_W, offsetof(struct nlmsghdr, nlmsg_pid)), - /* 4: jeq XX jt 5 jf 6 */ - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htonl(pid), 0, 1), - /* 5: ret 0 (skip) */ - BPF_STMT(BPF_RET|BPF_K, 0), - /* 6: ret 0xffff (keep) */ - BPF_STMT(BPF_RET|BPF_K, 0xffff), - }; - - struct sock_fprog prog = { - .len = array_size(filter), - .filter = filter, - }; - - if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) < 0) - zlog_warn ("Can't install socket filter: %s\n", safe_strerror(errno)); -} - -/* Exported interface function. This function simply calls - netlink_socket (). */ -void -kernel_init (struct zebra_ns *zns) -{ - unsigned long groups; - - groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR; -#ifdef HAVE_IPV6 - groups |= RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR; -#endif /* HAVE_IPV6 */ - netlink_socket (&zns->netlink, groups, zns->ns_id); - netlink_socket (&zns->netlink_cmd, 0, zns->ns_id); - - /* Register kernel socket. */ - if (zns->netlink.sock > 0) - { - /* Only want non-blocking on the netlink event socket */ - if (fcntl (zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0) - zlog_err ("Can't set %s socket flags: %s", zns->netlink.name, - safe_strerror (errno)); - - /* Set receive buffer size if it's set from command line */ - if (nl_rcvbufsize) - netlink_recvbuf (&zns->netlink, nl_rcvbufsize); - - netlink_install_filter (zns->netlink.sock, zns->netlink_cmd.snl.nl_pid); - zns->t_netlink = thread_add_read (zebrad.master, kernel_read, zns, - zns->netlink.sock); - } -} - -void -kernel_terminate (struct zebra_ns *zns) -{ - THREAD_READ_OFF (zns->t_netlink); - - if (zns->netlink.sock >= 0) - { - close (zns->netlink.sock); - zns->netlink.sock = -1; - } - - if (zns->netlink_cmd.sock >= 0) - { - close (zns->netlink_cmd.sock); - zns->netlink_cmd.sock = -1; - } -} - -/* - * nl_msg_type_to_str - */ -const char * -nl_msg_type_to_str (uint16_t msg_type) -{ - return lookup (nlmsg_str, msg_type); -} - -/* - * nl_rtproto_to_str - */ -const char * -nl_rtproto_to_str (u_char rtproto) -{ - return lookup (rtproto_str, rtproto); -} diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h index 55af237b64..7183525fba 100644 --- a/zebra/rt_netlink.h +++ b/zebra/rt_netlink.h @@ -26,29 +26,15 @@ #include "zebra/zebra_mpls.h" -#define NL_PKT_BUF_SIZE 8192 #define NL_DEFAULT_ROUTE_METRIC 20 -extern int -addattr32 (struct nlmsghdr *n, unsigned int maxlen, int type, int data); -extern int -addattr_l (struct nlmsghdr *n, unsigned int maxlen, int type, void *data, int alen); - -extern int -rta_addattr_l (struct rtattr *rta, unsigned int maxlen, int type, void *data, int alen); - -extern const char * -nl_msg_type_to_str (uint16_t msg_type); - -extern const char * -nl_rtproto_to_str (u_char rtproto); - extern void clear_nhlfe_installed (zebra_lsp_t *lsp); extern int netlink_mpls_multipath (int cmd, zebra_lsp_t *lsp); -extern int interface_lookup_netlink (struct zebra_ns *zns); +extern int netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, + ns_id_t ns_id); extern int netlink_route_read (struct zebra_ns *zns); #endif /* HAVE_NETLINK */ diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c index f7d0136286..02880586b1 100644 --- a/zebra/zebra_fpm_netlink.c +++ b/zebra/zebra_fpm_netlink.c @@ -28,13 +28,14 @@ #include "log.h" #include "rib.h" -#include "zserv.h" -#include "zebra_ns.h" -#include "zebra_vrf.h" -#include "rt_netlink.h" +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/kernel_netlink.h" +#include "zebra/rt_netlink.h" #include "nexthop.h" -#include "zebra_fpm_private.h" +#include "zebra/zebra_fpm_private.h" /* * addr_to_a -- 2.39.5