diff options
| -rw-r--r-- | bgpd/bgp_route.c | 2 | ||||
| -rw-r--r-- | include/linux/netlink.h | 247 | ||||
| -rw-r--r-- | include/subdir.am | 1 | ||||
| -rw-r--r-- | lib/sockunion.c | 3 | ||||
| -rw-r--r-- | zebra/kernel_netlink.c | 117 |
5 files changed, 363 insertions, 7 deletions
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 084cdfeb4b..0b1deba517 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -1318,6 +1318,8 @@ void bgp_attr_add_gshut_community(struct attr *attr) old = attr->community; gshut = community_str2com("graceful-shutdown"); + assert(gshut); + if (old) { merge = community_merge(community_dup(old), gshut); diff --git a/include/linux/netlink.h b/include/linux/netlink.h new file mode 100644 index 0000000000..0b2c29bd08 --- /dev/null +++ b/include/linux/netlink.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NETLINK_H +#define __LINUX_NETLINK_H + +#include <linux/kernel.h> +#include <linux/socket.h> /* for __kernel_sa_family_t */ +#include <linux/types.h> + +#define NETLINK_ROUTE 0 /* Routing/device hook */ +#define NETLINK_UNUSED 1 /* Unused number */ +#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ +#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ +#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ +#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ +#define NETLINK_XFRM 6 /* ipsec */ +#define NETLINK_SELINUX 7 /* SELinux event notifications */ +#define NETLINK_ISCSI 8 /* Open-iSCSI */ +#define NETLINK_AUDIT 9 /* auditing */ +#define NETLINK_FIB_LOOKUP 10 +#define NETLINK_CONNECTOR 11 +#define NETLINK_NETFILTER 12 /* netfilter subsystem */ +#define NETLINK_IP6_FW 13 +#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ +#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ +#define NETLINK_GENERIC 16 +/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ +#define NETLINK_ECRYPTFS 19 +#define NETLINK_RDMA 20 +#define NETLINK_CRYPTO 21 /* Crypto layer */ +#define NETLINK_SMC 22 /* SMC monitoring */ + +#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG + +#define MAX_LINKS 32 + +struct sockaddr_nl { + __kernel_sa_family_t nl_family; /* AF_NETLINK */ + unsigned short nl_pad; /* zero */ + __u32 nl_pid; /* port ID */ + __u32 nl_groups; /* multicast groups mask */ +}; + +struct nlmsghdr { + __u32 nlmsg_len; /* Length of message including header */ + __u16 nlmsg_type; /* Message content */ + __u16 nlmsg_flags; /* Additional flags */ + __u32 nlmsg_seq; /* Sequence number */ + __u32 nlmsg_pid; /* Sending process port ID */ +}; + +/* Flags values */ + +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ + +/* Modifiers to GET request */ +#define NLM_F_ROOT 0x100 /* specify tree root */ +#define NLM_F_MATCH 0x200 /* return all matching */ +#define NLM_F_ATOMIC 0x400 /* atomic GET */ +#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) + +/* Modifiers to NEW request */ +#define NLM_F_REPLACE 0x100 /* Override existing */ +#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ +#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ +#define NLM_F_APPEND 0x800 /* Add to end of list */ + +/* Modifiers to DELETE request */ +#define NLM_F_NONREC 0x100 /* Do not delete recursively */ + +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + +/* + 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL + 4.4BSD CHANGE NLM_F_REPLACE + + True CHANGE NLM_F_CREATE|NLM_F_REPLACE + Append NLM_F_CREATE + Check NLM_F_EXCL + */ + +#define NLMSG_ALIGNTO 4U +#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) +#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) +#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ + (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) +#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len <= (len)) +#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len))) + +#define NLMSG_NOOP 0x1 /* Nothing. */ +#define NLMSG_ERROR 0x2 /* Error */ +#define NLMSG_DONE 0x3 /* End of a dump */ +#define NLMSG_OVERRUN 0x4 /* Data lost */ + +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + +struct nlmsgerr { + int error; + struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 +}; + +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 +#define NETLINK_BROADCAST_ERROR 4 +#define NETLINK_NO_ENOBUFS 5 +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 +#define NETLINK_LISTEN_ALL_NSID 8 +#define NETLINK_LIST_MEMBERSHIPS 9 +#define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 + +struct nl_pktinfo { + __u32 group; +}; + +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, + NL_MMAP_STATUS_VALID, + NL_MMAP_STATUS_COPY, + NL_MMAP_STATUS_SKIP, +}; + +#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO +#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) +#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) + +#define NET_MAJOR 36 /* Major 36 is reserved for networking */ + +enum { + NETLINK_UNCONNECTED = 0, + NETLINK_CONNECTED, +}; + +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr { + __u16 nla_len; + __u16 nla_type; +}; + +/* + * nla_type (16 bits) + * +---+---+-------------------------------+ + * | N | O | Attribute Type | + * +---+---+-------------------------------+ + * N := Carries nested attributes + * O := Payload stored in network byte order + * + * Note: The N and O flag are mutually exclusive. + */ +#define NLA_F_NESTED (1 << 15) +#define NLA_F_NET_BYTEORDER (1 << 14) +#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; + +#endif /* __LINUX_NETLINK_H */ diff --git a/include/subdir.am b/include/subdir.am index db5ed06c61..731785d4b4 100644 --- a/include/subdir.am +++ b/include/subdir.am @@ -4,6 +4,7 @@ noinst_HEADERS += \ include/linux/lwtunnel.h \ include/linux/mpls_iptunnel.h \ include/linux/neighbour.h \ + include/linux/netlink.h \ include/linux/rtnetlink.h \ include/linux/socket.h \ include/linux/net_namespace.h \ diff --git a/lib/sockunion.c b/lib/sockunion.c index 28a7f647cb..44378b5363 100644 --- a/lib/sockunion.c +++ b/lib/sockunion.c @@ -46,6 +46,9 @@ int str2sockunion(const char *str, union sockunion *su) { int ret; + if (str == NULL) + return -1; + memset(su, 0, sizeof(union sockunion)); ret = inet_pton(AF_INET, str, &su->sin.sin_addr); diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 0e79b82533..6d164cfdab 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -498,6 +498,76 @@ const char *nl_rttype_to_str(uint8_t rttype) return lookup_msg(rttype_str, rttype, ""); } +#define NL_OK(nla, len) \ + ((len) >= (int)sizeof(struct nlattr) \ + && (nla)->nla_len >= sizeof(struct nlattr) \ + && (nla)->nla_len <= (len)) +#define NL_NEXT(nla, attrlen) \ + ((attrlen) -= RTA_ALIGN((nla)->nla_len), \ + (struct nlattr *)(((char *)(nla)) + RTA_ALIGN((nla)->nla_len))) +#define NL_RTA(r) \ + ((struct nlattr *)(((char *)(r)) \ + + NLMSG_ALIGN(sizeof(struct nlmsgerr)))) + +static void netlink_parse_nlattr(struct nlattr **tb, int max, + struct nlattr *nla, int len) +{ + while (NL_OK(nla, len)) { + if (nla->nla_type <= max) + tb[nla->nla_type] = nla; + nla = NL_NEXT(nla, len); + } +} + +static void netlink_parse_extended_ack(struct nlmsghdr *h) +{ + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1]; + const struct nlmsgerr *err = + (const struct nlmsgerr *)((uint8_t *)h + + NLMSG_ALIGN( + sizeof(struct nlmsghdr))); + const struct nlmsghdr *err_nlh = NULL; + uint32_t hlen = sizeof(*err); + const char *msg = NULL; + uint32_t off = 0; + + if (!(h->nlmsg_flags & NLM_F_CAPPED)) + hlen += h->nlmsg_len - NLMSG_ALIGN(sizeof(struct nlmsghdr)); + + memset(tb, 0, sizeof(tb)); + netlink_parse_nlattr(tb, NLMSGERR_ATTR_MAX, NL_RTA(h), hlen); + + if (tb[NLMSGERR_ATTR_MSG]) + msg = (const char *)RTA_DATA(tb[NLMSGERR_ATTR_MSG]); + + if (tb[NLMSGERR_ATTR_OFFS]) { + off = *(uint32_t *)RTA_DATA(tb[NLMSGERR_ATTR_OFFS]); + + if (off > h->nlmsg_len) { + zlog_err("Invalid offset for NLMSGERR_ATTR_OFFS\n"); + off = 0; + } else if (!(h->nlmsg_flags & NLM_F_CAPPED)) { + /* + * Header of failed message + * we are not doing anything currently with it + * but noticing it for later. + */ + err_nlh = &err->msg; + zlog_warn("%s: Received %d extended Ack", + __PRETTY_FUNCTION__, err_nlh->nlmsg_type); + } + } + + if (msg && *msg != '\0') { + bool is_err = !!err->error; + + if (is_err) + zlog_err("Extended Error: %s", msg); + else + zlog_warn("Extended Warning: %s", msg); + } +} + /* * netlink_parse_info * @@ -582,6 +652,23 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), int errnum = err->error; int msg_type = err->msg.nlmsg_type; + if (h->nlmsg_len + < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + zlog_err("%s error: message truncated", + nl->name); + return -1; + } + + /* + * Parse the extended information before + * we actually handle it. + * At this point in time we do not + * do anything other than report the + * issue. + */ + if (h->nlmsg_flags & NLM_F_ACK_TLVS) + netlink_parse_extended_ack(h); + /* If the error field is zero, then this is an * ACK */ if (err->error == 0) { @@ -603,13 +690,6 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), continue; } - if (h->nlmsg_len - < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { - zlog_err("%s error: message truncated", - nl->name); - return -1; - } - /* Deal with errors that occur because of races * in link handling */ if (nl == &zns->netlink_cmd @@ -836,6 +916,9 @@ int netlink_request(struct nlsock *nl, struct nlmsghdr *n) void kernel_init(struct zebra_ns *zns) { unsigned long groups; +#if defined SOL_NETLINK + int one, ret; +#endif /* * Initialize netlink sockets @@ -866,6 +949,25 @@ void kernel_init(struct zebra_ns *zns) zns->netlink_cmd.sock = -1; netlink_socket(&zns->netlink_cmd, 0, zns->ns_id); + /* + * SOL_NETLINK is not available on all platforms yet + * apparently. It's in bits/socket.h which I am not + * sure that we want to pull into our build system. + */ +#if defined SOL_NETLINK + /* + * Let's tell the kernel that we want to receive extended + * ACKS over our command socket + */ + one = 1; + ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + if (ret < 0) + zlog_notice("Registration for extended ACK failed : %d %s", + errno, safe_strerror(errno)); +#endif + /* Register kernel socket. */ if (zns->netlink.sock > 0) { /* Only want non-blocking on the netlink event socket */ @@ -880,6 +982,7 @@ void kernel_init(struct zebra_ns *zns) netlink_install_filter(zns->netlink.sock, zns->netlink_cmd.snl.nl_pid); zns->t_netlink = NULL; + thread_add_read(zebrad.master, kernel_read, zns, zns->netlink.sock, &zns->t_netlink); } |
