diff options
Diffstat (limited to 'zebra/rt_netlink.c')
| -rw-r--r-- | zebra/rt_netlink.c | 387 |
1 files changed, 386 insertions, 1 deletions
diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 795ee2703a..bbaf89a7c0 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -1651,7 +1651,7 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, addattr_l(&req.n, sizeof req, RTA_PREFSRC, &src.ipv6, bytelen); } - } else { + } else { /* Multipath case */ char buf[NL_PKT_BUF_SIZE]; struct rtattr *rta = (void *)buf; struct rtnexthop *rtnh; @@ -1768,6 +1768,346 @@ skip: 0); } +/* + * Routing table change via netlink interface, using a dataplane context object + */ +static int netlink_route_multipath_ctx(int cmd, dplane_ctx_h ctx) +{ + int bytelen; + struct sockaddr_nl snl; + struct nexthop *nexthop = NULL; + unsigned int nexthop_num; + int family; + const char *routedesc; + int setsrc = 0; + union g_addr src; + const struct prefix *p, *src_p; + uint32_t table_id; + + struct { + struct nlmsghdr n; + struct rtmsg r; + char buf[NL_PKT_BUF_SIZE]; + } req; + + p = dplane_ctx_get_dest(ctx); + src_p = dplane_ctx_get_src(ctx); + + family = PREFIX_FAMILY(p); + + memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE); + + bytelen = (family == AF_INET ? 4 : 16); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + if ((p->family == AF_INET) || v6_rr_semantics) + req.n.nlmsg_flags |= NLM_F_REPLACE; + } + + req.n.nlmsg_type = cmd; + + req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; + + req.r.rtm_family = family; + req.r.rtm_dst_len = p->prefixlen; + req.r.rtm_src_len = src_p ? src_p->prefixlen : 0; + req.r.rtm_scope = RT_SCOPE_UNIVERSE; + + if (cmd == RTM_DELROUTE) { + req.r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx)); + } else { + req.r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx)); + } + + /* + * blackhole routes are not RTN_UNICAST, they are + * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT + * so setting this value as a RTN_UNICAST would + * cause the route lookup of just the prefix + * to fail. So no need to specify this for + * the RTM_DELROUTE case + */ + if (cmd != RTM_DELROUTE) + req.r.rtm_type = RTN_UNICAST; + + addattr_l(&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen); + if (src_p) + addattr_l(&req.n, sizeof req, RTA_SRC, &src_p->u.prefix, + bytelen); + + /* Metric. */ + /* Hardcode the metric for all routes coming from zebra. Metric isn't + * used + * either by the kernel or by zebra. Its purely for calculating best + * path(s) + * by the routing protocol and for communicating with protocol peers. + */ + addattr32(&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC); + +#if defined(SUPPORT_REALMS) + { + route_tag_t tag; + + if (cmd == RTM_DELROUTE) { + tag = dplane_ctx_get_old_tag(ctx); + } else { + tag = dplane_ctx_get_tag(ctx); + } + + if (tag > 0 && tag <= 255) + addattr32(&req.n, sizeof req, RTA_FLOW, tag); + } +#endif + /* Table corresponding to this route. */ + table_id = dplane_ctx_get_table(ctx); + if (table_id < 256) + req.r.rtm_table = table_id; + else { + req.r.rtm_table = RT_TABLE_UNSPEC; + addattr32(&req.n, sizeof req, RTA_TABLE, table_id); + } + + _netlink_route_debug(cmd, p, family, dplane_ctx_get_vrf(ctx), table_id); + + /* + * If we are not updating the route and we have received + * a route delete, then all we need to fill in is the + * prefix information to tell the kernel to schwack + * it. + */ + if (cmd == RTM_DELROUTE) + goto skip; + + if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) { + char buf[NL_PKT_BUF_SIZE]; + struct rtattr *rta = (void *)buf; + uint32_t mtu = dplane_ctx_get_mtu(ctx); + uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx); + if (!mtu || (nexthop_mtu && nexthop_mtu < mtu)) + mtu = nexthop_mtu; + rta->rta_type = RTA_METRICS; + rta->rta_len = RTA_LENGTH(0); + rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu); + addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta), + RTA_PAYLOAD(rta)); + } + + /* Count overall nexthops so we can decide whether to use singlepath + * or multipath case. */ + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + if (cmd == RTM_NEWROUTE && !NEXTHOP_IS_ACTIVE(nexthop->flags)) + continue; + + nexthop_num++; + } + + /* Singlepath case. */ + if (nexthop_num == 1 || multipath_num == 1) { + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + /* + * So we want to cover 2 types of blackhole + * routes here: + * 1) A normal blackhole route( ala from a static + * install. + * 2) A recursively resolved blackhole route + */ + if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { + switch (nexthop->bh_type) { + case BLACKHOLE_ADMINPROHIB: + req.r.rtm_type = RTN_PROHIBIT; + break; + case BLACKHOLE_REJECT: + req.r.rtm_type = RTN_UNREACHABLE; + break; + default: + req.r.rtm_type = RTN_BLACKHOLE; + break; + } + goto skip; + } + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) { + if (!setsrc) { + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4 + .s_addr + != 0) { + src.ipv4 = + nexthop->rmap_src + .ipv4; + setsrc = 1; + } else if (nexthop->src.ipv4 + .s_addr + != 0) { + src.ipv4 = + nexthop->src + .ipv4; + setsrc = 1; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED( + &nexthop->rmap_src + .ipv6)) { + src.ipv6 = + nexthop->rmap_src + .ipv6; + setsrc = 1; + } else if ( + !IN6_IS_ADDR_UNSPECIFIED( + &nexthop->src + .ipv6)) { + src.ipv6 = + nexthop->src + .ipv6; + setsrc = 1; + } + } + } + continue; + } + + if ((cmd == RTM_NEWROUTE + && NEXTHOP_IS_ACTIVE(nexthop->flags))) { + routedesc = nexthop->rparent + ? "recursive, single-path" + : "single-path"; + + _netlink_route_build_singlepath( + routedesc, bytelen, nexthop, &req.n, + &req.r, sizeof req, cmd); + nexthop_num++; + break; + } + } + if (setsrc && (cmd == RTM_NEWROUTE)) { + if (family == AF_INET) + addattr_l(&req.n, sizeof req, RTA_PREFSRC, + &src.ipv4, bytelen); + else if (family == AF_INET6) + addattr_l(&req.n, sizeof req, RTA_PREFSRC, + &src.ipv6, bytelen); + } + } else { /* Multipath case */ + char buf[NL_PKT_BUF_SIZE]; + struct rtattr *rta = (void *)buf; + struct rtnexthop *rtnh; + union g_addr *src1 = NULL; + + rta->rta_type = RTA_MULTIPATH; + rta->rta_len = RTA_LENGTH(0); + rtnh = RTA_DATA(rta); + + nexthop_num = 0; + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (nexthop_num >= multipath_num) + break; + + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) { + /* This only works for IPv4 now */ + if (!setsrc) { + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4 + .s_addr + != 0) { + src.ipv4 = + nexthop->rmap_src + .ipv4; + setsrc = 1; + } else if (nexthop->src.ipv4 + .s_addr + != 0) { + src.ipv4 = + nexthop->src + .ipv4; + setsrc = 1; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED( + &nexthop->rmap_src + .ipv6)) { + src.ipv6 = + nexthop->rmap_src + .ipv6; + setsrc = 1; + } else if ( + !IN6_IS_ADDR_UNSPECIFIED( + &nexthop->src + .ipv6)) { + src.ipv6 = + nexthop->src + .ipv6; + setsrc = 1; + } + } + } + continue; + } + + if ((cmd == RTM_NEWROUTE + && NEXTHOP_IS_ACTIVE(nexthop->flags))) { + routedesc = nexthop->rparent + ? "recursive, multipath" + : "multipath"; + nexthop_num++; + + _netlink_route_build_multipath( + routedesc, bytelen, nexthop, rta, rtnh, + &req.r, &src1); + rtnh = RTNH_NEXT(rtnh); + + if (!setsrc && src1) { + if (family == AF_INET) + src.ipv4 = src1->ipv4; + else if (family == AF_INET6) + src.ipv6 = src1->ipv6; + + setsrc = 1; + } + } + } + if (setsrc && (cmd == RTM_NEWROUTE)) { + if (family == AF_INET) + addattr_l(&req.n, sizeof req, RTA_PREFSRC, + &src.ipv4, bytelen); + else if (family == AF_INET6) + addattr_l(&req.n, sizeof req, RTA_PREFSRC, + &src.ipv6, bytelen); + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("Setting source"); + } + + if (rta->rta_len > RTA_LENGTH(0)) + addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH, + RTA_DATA(rta), RTA_PAYLOAD(rta)); + } + + /* If there is no useful nexthop then return. */ + if (nexthop_num == 0) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug( + "netlink_route_multipath(): No useful nexthop."); + return 0; + } + +skip: + + /* Destination netlink address. */ + memset(&snl, 0, sizeof snl); + snl.nl_family = AF_NETLINK; + + /* Talk to netlink socket. */ + return netlink_talk_info(netlink_talk_filter, &req.n, + dplane_ctx_get_ns(ctx), 0); +} + int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) { uint32_t actual_table; @@ -1871,6 +2211,51 @@ enum zebra_dplane_result kernel_route_rib(struct route_node *rn, return ZEBRA_DPLANE_REQUEST_SUCCESS; } +/* + * Update or delete a prefix from the kernel, + * using info from a dataplane context. + */ +enum zebra_dplane_result kernel_route_update(dplane_ctx_h ctx) +{ + int cmd, ret; + const struct prefix *p = dplane_ctx_get_dest(ctx); + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) { + cmd = RTM_DELROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) { + cmd = RTM_NEWROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + + if (p->family == AF_INET || v6_rr_semantics) { + /* Single 'replace' operation */ + cmd = RTM_NEWROUTE; + } else { + /* + * So v6 route replace semantics are not in + * the kernel at this point as I understand it. + * so let's do a delete then an add. + * In the future once v6 route replace semantics + * are in we can figure out what to do here to + * allow working with old and new kernels. + * + * I'm also intentionally ignoring the failure case + * of the route delete. If that happens yeah we're + * screwed. + */ + ret = netlink_route_multipath_ctx(RTM_DELROUTE, ctx); + cmd = RTM_NEWROUTE; + } + + } else { + return ZEBRA_DPLANE_REQUEST_FAILURE; + } + + ret = netlink_route_multipath_ctx(cmd, ctx); + + return (ret == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); +} + int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, int llalen, ns_id_t ns_id) { |
