diff options
Diffstat (limited to 'zebra/rt_netlink.c')
| -rw-r--r-- | zebra/rt_netlink.c | 1089 |
1 files changed, 844 insertions, 245 deletions
diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 43e44cad16..640802fe31 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -27,6 +27,7 @@ #include <linux/mpls_iptunnel.h> #include <linux/neighbour.h> #include <linux/rtnetlink.h> +#include <linux/nexthop.h> /* Hack for GNU libc version 2. */ #ifndef MSG_TRUNC @@ -49,6 +50,7 @@ #include "vty.h" #include "mpls.h" #include "vxlan.h" +#include "printfrr.h" #include "zebra/zapi_msg.h" #include "zebra/zebra_ns.h" @@ -62,6 +64,7 @@ #include "zebra/zebra_mpls.h" #include "zebra/kernel_netlink.h" #include "zebra/rt_netlink.h" +#include "zebra/zebra_nhg.h" #include "zebra/zebra_mroute.h" #include "zebra/zebra_vxlan.h" #include "zebra/zebra_errors.h" @@ -72,6 +75,8 @@ static vlanid_t filter_vlan = 0; +static bool supports_nh; + struct gw_family_t { uint16_t filler; uint16_t family; @@ -186,6 +191,7 @@ static inline int zebra2proto(int proto) proto = RTPROT_OPENFABRIC; break; case ZEBRA_ROUTE_TABLE: + case ZEBRA_ROUTE_NHG: proto = RTPROT_ZEBRA; break; default: @@ -205,7 +211,7 @@ static inline int zebra2proto(int proto) return proto; } -static inline int proto2zebra(int proto, int family) +static inline int proto2zebra(int proto, int family, bool is_nexthop) { switch (proto) { case RTPROT_BABEL: @@ -249,6 +255,12 @@ static inline int proto2zebra(int proto, int family) case RTPROT_OPENFABRIC: proto = ZEBRA_ROUTE_OPENFABRIC; break; + case RTPROT_ZEBRA: + if (is_nexthop) { + proto = ZEBRA_ROUTE_NHG; + break; + } + /* Intentional fall thru */ default: /* * When a user adds a new protocol this will show up @@ -319,6 +331,169 @@ static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels) return num_labels; } +static struct nexthop +parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb, + enum blackhole_type bh_type, int index, void *prefsrc, + void *gate, afi_t afi, vrf_id_t vrf_id) +{ + struct interface *ifp = NULL; + struct nexthop nh = {0}; + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + + vrf_id_t nh_vrf_id = vrf_id; + size_t sz = (afi == AFI_IP) ? 4 : 16; + + if (bh_type == BLACKHOLE_UNSPEC) { + if (index && !gate) + nh.type = NEXTHOP_TYPE_IFINDEX; + else if (index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX + : NEXTHOP_TYPE_IPV6_IFINDEX; + else if (!index && gate) + nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4 + : NEXTHOP_TYPE_IPV6; + else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + } else { + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = bh_type; + } + nh.ifindex = index; + if (prefsrc) + memcpy(&nh.src, prefsrc, sz); + if (gate) + memcpy(&nh.gate, gate, sz); + + if (index) { + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index); + if (ifp) + nh_vrf_id = ifp->vrf_id; + } + nh.vrf_id = nh_vrf_id; + + if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels); + } + + if (rtm->rtm_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels); + + return nh; +} + +static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id, + struct route_entry *re, + struct rtmsg *rtm, + struct rtnexthop *rtnh, + struct rtattr **tb, + void *prefsrc, vrf_id_t vrf_id) +{ + void *gate = NULL; + struct interface *ifp = NULL; + int index = 0; + /* MPLS labels */ + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + int num_labels = 0; + struct rtattr *rtnh_tb[RTA_MAX + 1] = {}; + + int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + vrf_id_t nh_vrf_id = vrf_id; + + re->ng = nexthop_group_new(); + + for (;;) { + struct nexthop *nh = NULL; + + if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len) + break; + + index = rtnh->rtnh_ifindex; + if (index) { + /* + * Yes we are looking this up + * for every nexthop and just + * using the last one looked + * up right now + */ + ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), + index); + if (ifp) + nh_vrf_id = ifp->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT", + __PRETTY_FUNCTION__, index); + nh_vrf_id = VRF_DEFAULT; + } + } else + nh_vrf_id = vrf_id; + + if (rtnh->rtnh_len > sizeof(*rtnh)) { + memset(rtnh_tb, 0, sizeof(rtnh_tb)); + + netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh), + rtnh->rtnh_len - sizeof(*rtnh)); + if (rtnh_tb[RTA_GATEWAY]) + gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]); + if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE] + && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE]) + == LWTUNNEL_ENCAP_MPLS) { + num_labels = parse_encap_mpls( + rtnh_tb[RTA_ENCAP], labels); + } + } + + if (gate && rtm->rtm_family == AF_INET) { + if (index) + nh = route_entry_nexthop_ipv4_ifindex_add( + re, gate, prefsrc, index, nh_vrf_id); + else + nh = route_entry_nexthop_ipv4_add( + re, gate, prefsrc, nh_vrf_id); + } else if (gate && rtm->rtm_family == AF_INET6) { + if (index) + nh = route_entry_nexthop_ipv6_ifindex_add( + re, gate, index, nh_vrf_id); + else + nh = route_entry_nexthop_ipv6_add(re, gate, + nh_vrf_id); + } else + nh = route_entry_nexthop_ifindex_add(re, index, + nh_vrf_id); + + if (nh) { + if (num_labels) + nexthop_add_labels(nh, ZEBRA_LSP_STATIC, + num_labels, labels); + + if (rtnh->rtnh_flags & RTNH_F_ONLINK) + SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK); + } + + if (rtnh->rtnh_len == 0) + break; + + len -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + + uint8_t nhop_num = nexthop_group_nexthop_num(re->ng); + + if (!nhop_num) + nexthop_group_delete(&re->ng); + + return nhop_num; +} + /* Looking up routing table by netlink interface. */ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, int startup) @@ -340,6 +515,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, uint32_t mtu = 0; uint8_t distance = 0; route_tag_t tag = 0; + uint32_t nhe_id = 0; void *dest = NULL; void *gate = NULL; @@ -347,10 +523,6 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, void *src = NULL; /* IPv6 srcdest source prefix */ enum blackhole_type bh_type = BLACKHOLE_UNSPEC; - /* MPLS labels */ - mpls_label_t labels[MPLS_MAX_LABELS] = {0}; - int num_labels = 0; - rtm = NLMSG_DATA(h); if (startup && h->nlmsg_type != RTM_NEWROUTE) @@ -423,7 +595,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, /* Route which inserted by Zebra. */ if (is_selfroute(rtm->rtm_protocol)) { flags |= ZEBRA_FLAG_SELFROUTE; - proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family); + proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false); } if (tb[RTA_OIF]) index = *(int *)RTA_DATA(tb[RTA_OIF]); @@ -444,6 +616,9 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, if (tb[RTA_GATEWAY]) gate = RTA_DATA(tb[RTA_GATEWAY]); + if (tb[RTA_NH_ID]) + nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]); + if (tb[RTA_PRIORITY]) metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]); @@ -547,75 +722,24 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, afi = AFI_IP6; if (h->nlmsg_type == RTM_NEWROUTE) { - struct interface *ifp; - vrf_id_t nh_vrf_id = vrf_id; if (!tb[RTA_MULTIPATH]) { - struct nexthop nh; - size_t sz = (afi == AFI_IP) ? 4 : 16; - - memset(&nh, 0, sizeof(nh)); - - if (bh_type == BLACKHOLE_UNSPEC) { - if (index && !gate) - nh.type = NEXTHOP_TYPE_IFINDEX; - else if (index && gate) - nh.type = - (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4_IFINDEX - : NEXTHOP_TYPE_IPV6_IFINDEX; - else if (!index && gate) - nh.type = (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4 - : NEXTHOP_TYPE_IPV6; - else { - nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = bh_type; - } - } else { - nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = bh_type; - } - nh.ifindex = index; - if (prefsrc) - memcpy(&nh.src, prefsrc, sz); - if (gate) - memcpy(&nh.gate, gate, sz); - - if (index) { - ifp = if_lookup_by_index_per_ns( - zebra_ns_lookup(ns_id), - index); - if (ifp) - nh_vrf_id = ifp->vrf_id; - } - nh.vrf_id = nh_vrf_id; + struct nexthop nh = {0}; - if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] - && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE]) - == LWTUNNEL_ENCAP_MPLS) { - num_labels = - parse_encap_mpls(tb[RTA_ENCAP], labels); + if (!nhe_id) { + nh = parse_nexthop_unicast( + ns_id, rtm, tb, bh_type, index, prefsrc, + gate, afi, vrf_id); } - - if (rtm->rtm_flags & RTNH_F_ONLINK) - SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); - - if (num_labels) - nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, - num_labels, labels); - rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p, - &src_p, &nh, table, metric, mtu, distance, tag); + &src_p, &nh, nhe_id, table, metric, mtu, + distance, tag); } else { /* This is a multipath route */ - struct route_entry *re; struct rtnexthop *rtnh = (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]); - len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); - re = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); re->type = proto; re->distance = distance; @@ -624,148 +748,73 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, re->mtu = mtu; re->vrf_id = vrf_id; re->table = table; - re->nexthop_num = 0; re->uptime = monotime(NULL); re->tag = tag; + re->nhe_id = nhe_id; - for (;;) { - struct nexthop *nh = NULL; - - if (len < (int)sizeof(*rtnh) - || rtnh->rtnh_len > len) - break; - - index = rtnh->rtnh_ifindex; - if (index) { - /* - * Yes we are looking this up - * for every nexthop and just - * using the last one looked - * up right now - */ - ifp = if_lookup_by_index_per_ns( - zebra_ns_lookup(ns_id), - index); - if (ifp) - nh_vrf_id = ifp->vrf_id; - else { - flog_warn( - EC_ZEBRA_UNKNOWN_INTERFACE, - "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT", - __PRETTY_FUNCTION__, - index); - nh_vrf_id = VRF_DEFAULT; - } - } else - nh_vrf_id = vrf_id; - - gate = 0; - if (rtnh->rtnh_len > sizeof(*rtnh)) { - memset(tb, 0, sizeof(tb)); - netlink_parse_rtattr( - tb, RTA_MAX, RTNH_DATA(rtnh), - rtnh->rtnh_len - sizeof(*rtnh)); - if (tb[RTA_GATEWAY]) - gate = RTA_DATA( - tb[RTA_GATEWAY]); - if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE] - && *(uint16_t *)RTA_DATA( - tb[RTA_ENCAP_TYPE]) - == LWTUNNEL_ENCAP_MPLS) { - num_labels = parse_encap_mpls( - tb[RTA_ENCAP], labels); - } - } - - if (gate) { - if (rtm->rtm_family == AF_INET) { - if (index) - nh = route_entry_nexthop_ipv4_ifindex_add( - re, gate, - prefsrc, index, - nh_vrf_id); - else - nh = route_entry_nexthop_ipv4_add( - re, gate, - prefsrc, - nh_vrf_id); - } else if (rtm->rtm_family - == AF_INET6) { - if (index) - nh = route_entry_nexthop_ipv6_ifindex_add( - re, gate, index, - nh_vrf_id); - else - nh = route_entry_nexthop_ipv6_add( - re, gate, - nh_vrf_id); - } - } else - nh = route_entry_nexthop_ifindex_add( - re, index, nh_vrf_id); + if (!nhe_id) { + uint8_t nhop_num = + parse_multipath_nexthops_unicast( + ns_id, re, rtm, rtnh, tb, + prefsrc, vrf_id); - if (nh && num_labels) - nexthop_add_labels(nh, ZEBRA_LSP_STATIC, - num_labels, labels); - - if (nh && (rtnh->rtnh_flags & RTNH_F_ONLINK)) - SET_FLAG(nh->flags, - NEXTHOP_FLAG_ONLINK); - - if (rtnh->rtnh_len == 0) - break; - - len -= NLMSG_ALIGN(rtnh->rtnh_len); - rtnh = RTNH_NEXT(rtnh); + zserv_nexthop_num_warn( + __func__, (const struct prefix *)&p, + nhop_num); } - zserv_nexthop_num_warn(__func__, - (const struct prefix *)&p, - re->nexthop_num); - if (re->nexthop_num == 0) - XFREE(MTYPE_RE, re); - else + if (nhe_id || re->ng) rib_add_multipath(afi, SAFI_UNICAST, &p, &src_p, re); + else + XFREE(MTYPE_RE, re); } } else { - if (!tb[RTA_MULTIPATH]) { - struct nexthop nh; - size_t sz = (afi == AFI_IP) ? 4 : 16; - - memset(&nh, 0, sizeof(nh)); - if (bh_type == BLACKHOLE_UNSPEC) { - if (index && !gate) - nh.type = NEXTHOP_TYPE_IFINDEX; - else if (index && gate) - nh.type = - (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4_IFINDEX - : NEXTHOP_TYPE_IPV6_IFINDEX; - else if (!index && gate) - nh.type = (afi == AFI_IP) - ? NEXTHOP_TYPE_IPV4 - : NEXTHOP_TYPE_IPV6; - else { + if (nhe_id) { + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, + &p, &src_p, NULL, nhe_id, table, metric, + distance, true); + } else { + if (!tb[RTA_MULTIPATH]) { + struct nexthop nh; + size_t sz = (afi == AFI_IP) ? 4 : 16; + + memset(&nh, 0, sizeof(nh)); + if (bh_type == BLACKHOLE_UNSPEC) { + if (index && !gate) + nh.type = NEXTHOP_TYPE_IFINDEX; + else if (index && gate) + nh.type = + (afi == AFI_IP) + ? NEXTHOP_TYPE_IPV4_IFINDEX + : NEXTHOP_TYPE_IPV6_IFINDEX; + else if (!index && gate) + nh.type = + (afi == AFI_IP) + ? NEXTHOP_TYPE_IPV4 + : NEXTHOP_TYPE_IPV6; + else { + nh.type = + NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_UNSPEC; + } + } else { nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = BLACKHOLE_UNSPEC; + nh.bh_type = bh_type; } + nh.ifindex = index; + if (gate) + memcpy(&nh.gate, gate, sz); + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, &nh, 0, table, + metric, distance, true); } else { - nh.type = NEXTHOP_TYPE_BLACKHOLE; - nh.bh_type = bh_type; + /* XXX: need to compare the entire list of + * nexthops here for NLM_F_APPEND stupidity */ + rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, + flags, &p, &src_p, NULL, 0, table, + metric, distance, true); } - nh.ifindex = index; - if (gate) - memcpy(&nh.gate, gate, sz); - rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, - &p, &src_p, &nh, table, metric, distance, - true); - } else { - /* XXX: need to compare the entire list of nexthops - * here for NLM_F_APPEND stupidity */ - rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, - &p, &src_p, NULL, table, metric, distance, - true); } } @@ -1023,6 +1072,35 @@ static void _netlink_route_rta_add_gateway_info(uint8_t route_family, } } +static int build_label_stack(struct mpls_label_stack *nh_label, + mpls_lse_t *out_lse, char *label_buf, + size_t label_buf_size) +{ + char label_buf1[20]; + int num_labels = 0; + + for (int i = 0; nh_label && i < nh_label->num_labels; i++) { + if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) + continue; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (!num_labels) + sprintf(label_buf, "label %u", + nh_label->label[i]); + else { + sprintf(label_buf1, "/%u", nh_label->label[i]); + strlcat(label_buf, label_buf1, label_buf_size); + } + } + + out_lse[num_labels] = + mpls_lse_encode(nh_label->label[i], 0, 0, 0); + num_labels++; + } + + return num_labels; +} + /* This function takes a nexthop as argument and adds * the appropriate netlink attributes to an existing * netlink message. @@ -1040,10 +1118,12 @@ static void _netlink_route_build_singlepath(const char *routedesc, int bytelen, struct rtmsg *rtmsg, size_t req_size, int cmd) { - struct mpls_label_stack *nh_label; + mpls_lse_t out_lse[MPLS_MAX_LABELS]; - int num_labels = 0; char label_buf[256]; + int num_labels = 0; + + assert(nexthop); /* * label_buf is *only* currently used within debugging. @@ -1053,30 +1133,8 @@ static void _netlink_route_build_singlepath(const char *routedesc, int bytelen, */ label_buf[0] = '\0'; - assert(nexthop); - char label_buf1[20]; - - nh_label = nexthop->nh_label; - - for (int i = 0; nh_label && i < nh_label->num_labels; i++) { - if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) - continue; - - if (IS_ZEBRA_DEBUG_KERNEL) { - if (!num_labels) - sprintf(label_buf, "label %u", - nh_label->label[i]); - else { - sprintf(label_buf1, "/%u", nh_label->label[i]); - strlcat(label_buf, label_buf1, - sizeof(label_buf)); - } - } - - out_lse[num_labels] = - mpls_lse_encode(nh_label->label[i], 0, 0, 0); - num_labels++; - } + num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf, + sizeof(label_buf)); if (num_labels) { /* Set the BoS bit */ @@ -1221,16 +1279,17 @@ static void _netlink_route_build_multipath(const char *routedesc, int bytelen, struct rtmsg *rtmsg, const union g_addr **src) { - struct mpls_label_stack *nh_label; mpls_lse_t out_lse[MPLS_MAX_LABELS]; - int num_labels = 0; char label_buf[256]; + int num_labels = 0; rtnh->rtnh_len = sizeof(*rtnh); rtnh->rtnh_flags = 0; rtnh->rtnh_hops = 0; rta->rta_len += rtnh->rtnh_len; + assert(nexthop); + /* * label_buf is *only* currently used within debugging. * As such when we assign it we are guarding it inside @@ -1239,30 +1298,8 @@ static void _netlink_route_build_multipath(const char *routedesc, int bytelen, */ label_buf[0] = '\0'; - assert(nexthop); - char label_buf1[20]; - - nh_label = nexthop->nh_label; - - for (int i = 0; nh_label && i < nh_label->num_labels; i++) { - if (nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL) - continue; - - if (IS_ZEBRA_DEBUG_KERNEL) { - if (!num_labels) - sprintf(label_buf, "label %u", - nh_label->label[i]); - else { - sprintf(label_buf1, "/%u", nh_label->label[i]); - strlcat(label_buf, label_buf1, - sizeof(label_buf)); - } - } - - out_lse[num_labels] = - mpls_lse_encode(nh_label->label[i], 0, 0, 0); - num_labels++; - } + num_labels = build_label_stack(nexthop->nh_label, out_lse, label_buf, + sizeof(label_buf)); if (num_labels) { /* Set the BoS bit */ @@ -1430,6 +1467,13 @@ static void _netlink_route_debug(int cmd, const struct prefix *p, } } +static void _netlink_nexthop_debug(int cmd, uint32_t id) +{ + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("netlink_nexthop(): %s, id=%u", + nl_msg_type_to_str(cmd), id); +} + static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc) { if (IS_ZEBRA_DEBUG_KERNEL) @@ -1595,6 +1639,13 @@ static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) RTA_PAYLOAD(rta)); } + if (supports_nh) { + /* Kernel supports nexthop objects */ + addattr32(&req.n, sizeof(req), RTA_NH_ID, + dplane_ctx_get_nhe_id(ctx)); + goto skip; + } + /* Count overall nexthops so we can decide whether to use singlepath * or multipath case. */ @@ -1842,6 +1893,262 @@ int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) return suc; } +/* Char length to debug ID with */ +#define ID_LENGTH 10 + +static void _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size, + uint32_t id, + const struct nh_grp *z_grp, + const uint8_t count) +{ + struct nexthop_grp grp[count]; + /* Need space for max group size, "/", and null term */ + char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1]; + char buf1[ID_LENGTH + 2]; + + buf[0] = '\0'; + + memset(grp, 0, sizeof(grp)); + + if (count) { + for (int i = 0; i < count; i++) { + grp[i].id = z_grp[i].id; + grp[i].weight = z_grp[i].weight; + + if (IS_ZEBRA_DEBUG_KERNEL) { + if (i == 0) + snprintf(buf, sizeof(buf1), "group %u", + grp[i].id); + else { + snprintf(buf1, sizeof(buf1), "/%u", + grp[i].id); + strlcat(buf, buf1, sizeof(buf)); + } + } + } + addattr_l(n, req_size, NHA_GROUP, grp, count * sizeof(*grp)); + } + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s: ID (%u): %s", __func__, id, buf); +} + +/** + * netlink_nexthop() - Nexthop change via the netlink interface + * + * @ctx: Dataplane ctx + * + * Return: Result status + */ +static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[NL_PKT_BUF_SIZE]; + } req; + + mpls_lse_t out_lse[MPLS_MAX_LABELS]; + char label_buf[256]; + int num_labels = 0; + size_t req_size = sizeof(req); + + /* Nothing to do if the kernel doesn't support nexthop objects */ + if (!supports_nh) + return 0; + + label_buf[0] = '\0'; + + memset(&req, 0, req_size); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (cmd == RTM_NEWNEXTHOP) + req.n.nlmsg_flags |= NLM_F_REPLACE; + + req.n.nlmsg_type = cmd; + req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; + + req.nhm.nh_family = AF_UNSPEC; + /* TODO: Scope? */ + + uint32_t id = dplane_ctx_get_nhe_id(ctx); + + if (!id) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Failed trying to update a nexthop group in the kernel that does not have an ID"); + return -1; + } + + addattr32(&req.n, req_size, NHA_ID, id); + + if (cmd == RTM_NEWNEXTHOP) { + if (dplane_ctx_get_nhe_nh_grp_count(ctx)) + _netlink_nexthop_build_group( + &req.n, req_size, id, + dplane_ctx_get_nhe_nh_grp(ctx), + dplane_ctx_get_nhe_nh_grp_count(ctx)); + else { + const struct nexthop *nh = + dplane_ctx_get_nhe_ng(ctx)->nexthop; + afi_t afi = dplane_ctx_get_nhe_afi(ctx); + + if (afi == AFI_IP) + req.nhm.nh_family = AF_INET; + else if (afi == AFI_IP6) + req.nhm.nh_family = AF_INET6; + + switch (nh->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + addattr_l(&req.n, req_size, NHA_GATEWAY, + &nh->gate.ipv4, IPV4_MAX_BYTELEN); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + addattr_l(&req.n, req_size, NHA_GATEWAY, + &nh->gate.ipv6, IPV6_MAX_BYTELEN); + break; + case NEXTHOP_TYPE_BLACKHOLE: + addattr_l(&req.n, req_size, NHA_BLACKHOLE, NULL, + 0); + /* Blackhole shouldn't have anymore attributes + */ + goto nexthop_done; + case NEXTHOP_TYPE_IFINDEX: + /* Don't need anymore info for this */ + break; + } + + if (!nh->ifindex) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update without an interface"); + return -1; + } + + addattr32(&req.n, req_size, NHA_OIF, nh->ifindex); + + num_labels = + build_label_stack(nh->nh_label, out_lse, + label_buf, sizeof(label_buf)); + + if (num_labels) { + /* Set the BoS bit */ + out_lse[num_labels - 1] |= + htonl(1 << MPLS_LS_S_SHIFT); + + /* + * TODO: MPLS unsupported for now in kernel. + */ + if (req.nhm.nh_family == AF_MPLS) + goto nexthop_done; +#if 0 + addattr_l(&req.n, req_size, NHA_NEWDST, + &out_lse, + num_labels + * sizeof(mpls_lse_t)); +#endif + else { + struct rtattr *nest; + uint16_t encap = LWTUNNEL_ENCAP_MPLS; + + addattr_l(&req.n, req_size, + NHA_ENCAP_TYPE, &encap, + sizeof(uint16_t)); + nest = addattr_nest(&req.n, req_size, + NHA_ENCAP); + addattr_l(&req.n, req_size, + MPLS_IPTUNNEL_DST, &out_lse, + num_labels + * sizeof(mpls_lse_t)); + addattr_nest_end(&req.n, nest); + } + } + + nexthop_done: + if (IS_ZEBRA_DEBUG_KERNEL) { + char buf[NEXTHOP_STRLEN]; + + snprintfrr(buf, sizeof(buf), "%pNHv", nh); + zlog_debug("%s: ID (%u): %s (%u) %s ", __func__, + id, buf, nh->vrf_id, label_buf); + } + } + + req.nhm.nh_protocol = zebra2proto(dplane_ctx_get_nhe_type(ctx)); + + } else if (cmd != RTM_DELNEXTHOP) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Nexthop group kernel update command (%d) does not exist", + cmd); + return -1; + } + + _netlink_nexthop_debug(cmd, id); + + return netlink_talk_info(netlink_talk_filter, &req.n, + dplane_ctx_get_ns(ctx), 0); +} + +/** + * kernel_nexthop_update() - Update/delete a nexthop from the kernel + * + * @ctx: Dataplane context + * + * Return: Dataplane result flag + */ +enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx) +{ + int cmd = 0; + int ret = 0; + + switch (dplane_ctx_get_op(ctx)) { + case DPLANE_OP_NH_DELETE: + cmd = RTM_DELNEXTHOP; + break; + case DPLANE_OP_NH_INSTALL: + case DPLANE_OP_NH_UPDATE: + cmd = RTM_NEWNEXTHOP; + break; + case DPLANE_OP_ROUTE_INSTALL: + case DPLANE_OP_ROUTE_UPDATE: + case DPLANE_OP_ROUTE_DELETE: + case DPLANE_OP_ROUTE_NOTIFY: + case DPLANE_OP_LSP_INSTALL: + case DPLANE_OP_LSP_UPDATE: + case DPLANE_OP_LSP_DELETE: + case DPLANE_OP_LSP_NOTIFY: + case DPLANE_OP_PW_INSTALL: + case DPLANE_OP_PW_UNINSTALL: + case DPLANE_OP_SYS_ROUTE_ADD: + case DPLANE_OP_SYS_ROUTE_DELETE: + case DPLANE_OP_ADDR_INSTALL: + case DPLANE_OP_ADDR_UNINSTALL: + case DPLANE_OP_MAC_INSTALL: + case DPLANE_OP_MAC_DELETE: + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + case DPLANE_OP_VTEP_ADD: + case DPLANE_OP_VTEP_DELETE: + case DPLANE_OP_NONE: + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Context received for kernel nexthop update with incorrect OP code (%u)", + dplane_ctx_get_op(ctx)); + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + ret = netlink_nexthop(cmd, ctx); + + return (ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS + : ZEBRA_DPLANE_REQUEST_FAILURE); +} + /* * Update or delete a prefix from the kernel, * using info from a dataplane context. @@ -1919,6 +2226,298 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); } +/** + * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop + * + * @tb: Netlink RTA data + * @family: Address family in the nhmsg + * @ifp: Interface connected - this should be NULL, we fill it in + * @ns_id: Namspace id + * + * Return: New nexthop + */ +static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb, + unsigned char family, + struct interface **ifp, + ns_id_t ns_id) +{ + struct nexthop nh = {}; + void *gate = NULL; + enum nexthop_types_t type = 0; + int if_index = 0; + size_t sz = 0; + + if_index = *(int *)RTA_DATA(tb[NHA_OIF]); + + + if (tb[NHA_GATEWAY]) { + switch (family) { + case AF_INET: + type = NEXTHOP_TYPE_IPV4_IFINDEX; + sz = 4; + break; + case AF_INET6: + type = NEXTHOP_TYPE_IPV6_IFINDEX; + sz = 16; + break; + default: + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop gateway with bad address family (%d) received from kernel", + family); + return nh; + } + gate = RTA_DATA(tb[NHA_GATEWAY]); + } else + type = NEXTHOP_TYPE_IFINDEX; + + if (type) + nh.type = type; + + if (gate) + memcpy(&(nh.gate), gate, sz); + + if (if_index) + nh.ifindex = if_index; + + *ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex); + if (ifp) + nh.vrf_id = (*ifp)->vrf_id; + else { + flog_warn( + EC_ZEBRA_UNKNOWN_INTERFACE, + "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT", + __PRETTY_FUNCTION__, nh.ifindex); + + nh.vrf_id = VRF_DEFAULT; + } + + if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) { + uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]); + int num_labels = 0; + + mpls_label_t labels[MPLS_MAX_LABELS] = {0}; + + if (encap_type == LWTUNNEL_ENCAP_MPLS) + num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels); + + if (num_labels) + nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, + labels); + } + + return nh; +} + +static int netlink_nexthop_process_group(struct rtattr **tb, + struct nh_grp *z_grp, int z_grp_size) +{ + uint8_t count = 0; + /* linux/nexthop.h group struct */ + struct nexthop_grp *n_grp = NULL; + + n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]); + count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp)); + + if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) { + flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid nexthop group received from the kernel"); + return count; + } + +#if 0 + // TODO: Need type for something? + zlog_debug("Nexthop group type: %d", + *((uint16_t *)RTA_DATA(tb[NHA_GROUP_TYPE]))); + +#endif + + for (int i = 0; ((i < count) && (i < z_grp_size)); i++) { + z_grp[i].id = n_grp[i].id; + z_grp[i].weight = n_grp[i].weight; + } + return count; +} + +/** + * netlink_nexthop_change() - Read in change about nexthops from the kernel + * + * @h: Netlink message header + * @ns_id: Namspace id + * @startup: Are we reading under startup conditions? + * + * Return: Result status + */ +int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) +{ + int len; + /* nexthop group id */ + uint32_t id; + unsigned char family; + int type; + afi_t afi = AFI_UNSPEC; + vrf_id_t vrf_id = 0; + struct interface *ifp = NULL; + struct nhmsg *nhm = NULL; + struct nexthop nh = {}; + struct nh_grp grp[MULTIPATH_NUM] = {}; + /* Count of nexthops in group array */ + uint8_t grp_count = 0; + struct rtattr *tb[NHA_MAX + 1] = {}; + + nhm = NLMSG_DATA(h); + + if (startup && h->nlmsg_type != RTM_NEWNEXTHOP) + return 0; + + len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg)); + if (len < 0) { + zlog_warn( + "%s: Message received from netlink is of a broken size %d %zu", + __PRETTY_FUNCTION__, h->nlmsg_len, + (size_t)NLMSG_LENGTH(sizeof(struct nhmsg))); + return -1; + } + + netlink_parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len); + + + if (!tb[NHA_ID]) { + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Nexthop group without an ID received from the kernel"); + return -1; + } + + /* We use the ID key'd nhg table for kernel updates */ + id = *((uint32_t *)RTA_DATA(tb[NHA_ID])); + + family = nhm->nh_family; + afi = family2afi(family); + + type = proto2zebra(nhm->nh_protocol, 0, true); + + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("%s ID (%u) %s NS %u", + nl_msg_type_to_str(h->nlmsg_type), id, + nl_family_to_str(family), ns_id); + + + if (h->nlmsg_type == RTM_NEWNEXTHOP) { + if (tb[NHA_GROUP]) { + /** + * If this is a group message its only going to have + * an array of nexthop IDs associated with it + */ + grp_count = netlink_nexthop_process_group( + tb, grp, array_size(grp)); + } else { + if (tb[NHA_BLACKHOLE]) { + /** + * This nexthop is just for blackhole-ing + * traffic, it should not have an OIF, GATEWAY, + * or ENCAP + */ + nh.type = NEXTHOP_TYPE_BLACKHOLE; + nh.bh_type = BLACKHOLE_UNSPEC; + } else if (tb[NHA_OIF]) + /** + * This is a true new nexthop, so we need + * to parse the gateway and device info + */ + nh = netlink_nexthop_process_nh(tb, family, + &ifp, ns_id); + else { + + flog_warn( + EC_ZEBRA_BAD_NHG_MESSAGE, + "Invalid Nexthop message received from the kernel with ID (%u)", + id); + return -1; + } + SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE); + if (nhm->nh_flags & RTNH_F_ONLINK) + SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK); + vrf_id = nh.vrf_id; + } + + if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi, + type, startup)) + return -1; + + } else if (h->nlmsg_type == RTM_DELNEXTHOP) + zebra_nhg_kernel_del(id); + + return 0; +} + +#if 0 /* Force off kernel nexthop group installs for now */ +/** + * netlink_request_nexthop() - Request nextop information from the kernel + * @zns: Zebra namespace + * @family: AF_* netlink family + * @type: RTM_* route type + * + * Return: Result status + */ +static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + } req; + + /* Form the request, specifying filter (rtattr) if needed. */ + memset(&req, 0, sizeof(req)); + req.n.nlmsg_type = type; + req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req.nhm.nh_family = family; + + return netlink_request(&zns->netlink_cmd, &req.n); +} + + +/** + * netlink_nexthop_read() - Nexthop read function using netlink interface + * + * @zns: Zebra name space + * + * Return: Result status + * Only called at bootstrap time. + */ +int netlink_nexthop_read(struct zebra_ns *zns) +{ + int ret; + struct zebra_dplane_info dp_info; + + zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/); + + /* Get nexthop objects */ + ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP); + if (ret < 0) + return ret; + ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd, + &dp_info, 0, 1); + + if (!ret) + /* If we succesfully read in nexthop objects, + * this kernel must support them. + */ + supports_nh = true; + else if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Nexthop objects not supported on this kernel"); + + return ret; +} +#else +int netlink_nexthop_read(struct zebra_ns *zns) +{ + return 0; +} +#endif + + int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, int llalen, ns_id_t ns_id) { @@ -1951,7 +2550,7 @@ static int netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, req.n.nlmsg_type = cmd; req.ndm.ndm_family = PF_BRIDGE; req.ndm.ndm_state = NUD_NOARP | NUD_PERMANENT; - req.ndm.ndm_flags |= NTF_SELF; // Handle by "self", not "master" + req.ndm.ndm_flags |= NTF_SELF; /* Handle by "self", not "master" */ addattr_l(&req.n, sizeof(req), |
