diff options
35 files changed, 918 insertions, 279 deletions
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index e8151d94ed..cd377b32ca 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -4706,8 +4706,9 @@ static void bgp_cleanup_table(struct bgp *bgp, struct bgp_table *table, if (bgp_fibupd_safi(safi)) bgp_zebra_withdraw(p, pi, bgp, safi); - bgp_path_info_reap(rn, pi); } + + bgp_path_info_reap(rn, pi); } } diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index 9890a3f071..51a9684235 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -16234,6 +16234,10 @@ void bgp_vty_init(void) &no_neighbor_route_reflector_client_cmd); install_element(BGP_EVPN_NODE, &neighbor_route_reflector_client_cmd); install_element(BGP_EVPN_NODE, &no_neighbor_route_reflector_client_cmd); + install_element(BGP_EVPN_NODE, &neighbor_nexthop_self_cmd); + install_element(BGP_EVPN_NODE, &no_neighbor_nexthop_self_cmd); + install_element(BGP_EVPN_NODE, &neighbor_nexthop_self_force_cmd); + install_element(BGP_EVPN_NODE, &no_neighbor_nexthop_self_force_cmd); /* "neighbor route-server" commands.*/ install_element(BGP_NODE, &neighbor_route_server_client_hidden_cmd); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index cca3f4aaa3..b7c255f16f 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -1143,7 +1143,8 @@ static bool update_ipv6nh_for_route_install(int nh_othervrf, struct bgp *nh_bgp, api_nh->ifindex = 0; } } - api_nh->gate.ipv6 = *nexthop; + if (nexthop) + api_nh->gate.ipv6 = *nexthop; return true; } diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index cf6335d373..faee7dad4a 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1227,10 +1227,6 @@ struct peer *peer_new(struct bgp *bgp) peer->addpath_type[afi][safi] = BGP_ADDPATH_NONE; } - /* set nexthop-unchanged for l2vpn evpn by default */ - SET_FLAG(peer->af_flags[AFI_L2VPN][SAFI_EVPN], - PEER_FLAG_NEXTHOP_UNCHANGED); - SET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN); /* Initialize per peer bgp GR FSM */ diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst index c056b39889..7c86cac11c 100644 --- a/doc/user/bgp.rst +++ b/doc/user/bgp.rst @@ -971,7 +971,7 @@ Route Aggregation-IPv4 Address Family .. clicmd:: aggregate-address A.B.C.D/M summary-only This command specifies an aggregate address. Aggregated routes will - not be announce. + not be announced. .. index:: no aggregate-address A.B.C.D/M .. clicmd:: no aggregate-address A.B.C.D/M @@ -1023,7 +1023,7 @@ Route Aggregation-IPv6 Address Family .. clicmd:: aggregate-address X:X::X:X/M summary-only This command specifies an aggregate address. Aggregated routes will - not be announce. + not be announced. .. index:: no aggregate-address X:X::X:X/M .. clicmd:: no aggregate-address X:X::X:X/M @@ -1776,7 +1776,7 @@ In :ref:`route-map` we can match on or set the BGP communities attribute. Using this feature network operator can implement their network policy based on BGP communities attribute. -The ollowing commands can be used in route maps: +The following commands can be used in route maps: .. index:: match community WORD exact-match [exact-match] .. clicmd:: match community WORD exact-match [exact-match] @@ -2391,6 +2391,27 @@ the same behavior of using same next-hop and RMAC values. Enables or disables advertise-pip feature, specifiy system-IP and/or system-MAC parameters. +Support with VRF network namespace backend +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +It is possible to separate overlay networks contained in VXLAN interfaces from +underlay networks by using VRFs. VRF-lite and VRF-netns backends can be used for +that. In the latter case, this is necessary to set both bridge and vxlan interface +on the same network namespace, as below example illustrates: + +.. code-block:: shell + + # linux shell + ip netns add vrf1 + ip link add name vxlan101 type vxlan id 101 dstport 4789 dev eth0 local 10.1.1.1 + ip link set dev vxlan101 netns vrf1 + ip netns exec vrf1 ip link set dev lo up + ip netns exec vrf1 brctl addbr bridge101 + ip netns exec vrf1 brctl addif bridge101 vxlan101 + +This makes possible to separate not only layer 3 networks like VRF-lite networks. +Also, VRF netns based make possible to separate layer 2 networks on separate VRF +instances. + .. _bgp-cisco-compatibility: Cisco Compatibility @@ -2840,8 +2861,8 @@ Displaying Routes by AS Path Displaying Update Group Information ----------------------------------- -..index:: show bgp update-groups SUBGROUP-ID [advertise-queue|advertised-routes|packet-queue] -..clicmd:: show bgp update-groups [advertise-queue|advertised-routes|packet-queue] +.. index:: show bgp update-groups SUBGROUP-ID [advertise-queue|advertised-routes|packet-queue] +.. clicmd:: show bgp update-groups [advertise-queue|advertised-routes|packet-queue] Display Information about each individual update-group being used. If SUBGROUP-ID is specified only display about that particular group. If @@ -2850,8 +2871,8 @@ Displaying Update Group Information the list of routes we have sent to the peers in the update-group and packet-queue specifies the list of packets in the queue to be sent. -..index:: show bgp update-groups statistics -..clicmd:: show bgp update-groups statistics +.. index:: show bgp update-groups statistics +.. clicmd:: show bgp update-groups statistics Display Information about update-group events in FRR. diff --git a/doc/user/ospf_fundamentals.rst b/doc/user/ospf_fundamentals.rst index da348b02d2..b0eb018107 100644 --- a/doc/user/ospf_fundamentals.rst +++ b/doc/user/ospf_fundamentals.rst @@ -83,8 +83,8 @@ sharing a link, for example: - DR/BDR election results. - Any optional capabilities supported by each router. -The Hello protocol is comparatively trivial and will not be explored in greater -detail than here. +The Hello protocol is comparatively trivial and will not be explored in more +detail. .. index:: OSPF LSA overview .. _ospf-lsas: @@ -233,7 +233,7 @@ called `intra-area routes`. Cost The output cost of that interface, scaled inversely to some commonly known - reference value, :clicmd:`auto-cost reference-bandwidth (1-4294967`. + reference value, :clicmd:`auto-cost reference-bandwidth (1-4294967)`. Link Type Transit Network diff --git a/doc/user/wecmp_linkbw.rst b/doc/user/wecmp_linkbw.rst index 0d2fe9d756..8176aaffcb 100644 --- a/doc/user/wecmp_linkbw.rst +++ b/doc/user/wecmp_linkbw.rst @@ -14,7 +14,7 @@ across these next hops. In practice, flow-based hashing is used so that all traffic associated with a particular flow uses the same next hop, and by extension, the same path across the network. -Weigted ECMP using BGP link bandwidth introduces support for network-wide +Weighted ECMP using BGP link bandwidth introduces support for network-wide unequal cost multipathing (UCMP) to an IP destination. The unequal cost load balancing is implemented by the forwarding plane based on the weights associated with the next hops of the IP prefix. These weights are computed diff --git a/doc/user/zebra.rst b/doc/user/zebra.rst index f3b4ca7d03..f105bd72bc 100644 --- a/doc/user/zebra.rst +++ b/doc/user/zebra.rst @@ -870,6 +870,20 @@ FPM Commands will not attempt to connect to it anymore. +.. index:: fpm use-next-hop-groups +.. clicmd:: fpm use-next-hop-groups + + Use the new netlink messages ``RTM_NEWNEXTHOP`` / ``RTM_DELNEXTHOP`` to + group repeated route next hop information. + + +.. index:: no fpm use-next-hop-groups +.. clicmd:: no fpm use-next-hop-groups + + Use the old known FPM behavior of including next hop information in the + route (e.g. ``RTM_NEWROUTE``) messages. + + .. index:: show fpm counters [json] .. clicmd:: show fpm counters [json] @@ -985,15 +999,15 @@ zebra Terminal Mode Commands .. index:: show ip protocol .. clicmd:: show ip protocol -.. index:: show ipforward -.. clicmd:: show ipforward +.. index:: show ip forward +.. clicmd:: show ip forward Display whether the host's IP forwarding function is enabled or not. Almost any UNIX kernel can be configured with IP forwarding disabled. If so, the box can't work as a router. -.. index:: show ipv6forward -.. clicmd:: show ipv6forward +.. index:: show ipv6 forward +.. clicmd:: show ipv6 forward Display whether the host's IP v6 forwarding is enabled or not. diff --git a/include/linux/net_namespace.h b/include/linux/net_namespace.h index 0187c74d88..0ed9dd61d3 100644 --- a/include/linux/net_namespace.h +++ b/include/linux/net_namespace.h @@ -16,6 +16,7 @@ enum { NETNSA_NSID, NETNSA_PID, NETNSA_FD, + NETNSA_TARGET_NSID, __NETNSA_MAX, }; diff --git a/lib/netns_linux.c b/lib/netns_linux.c index 98f359401e..e1c0159fc5 100644 --- a/lib/netns_linux.c +++ b/lib/netns_linux.c @@ -379,12 +379,20 @@ struct ns *ns_lookup(ns_id_t ns_id) return ns_lookup_internal(ns_id); } -void ns_walk_func(int (*func)(struct ns *)) +void ns_walk_func(int (*func)(struct ns *, + void *param_in, + void **param_out), + void *param_in, + void **param_out) { struct ns *ns = NULL; + int ret; - RB_FOREACH (ns, ns_head, &ns_tree) - func(ns); + RB_FOREACH (ns, ns_head, &ns_tree) { + ret = func(ns, param_in, param_out); + if (ret == NS_WALK_STOP) + return; + } } const char *ns_get_name(struct ns *ns) @@ -584,9 +592,33 @@ int ns_socket(int domain, int type, int protocol, ns_id_t ns_id) return ret; } +/* if relative link_nsid matches default netns, + * then return default absolute netns value + * otherwise, return NS_UNKNOWN + */ +ns_id_t ns_id_get_absolute(ns_id_t ns_id_reference, ns_id_t link_nsid) +{ + struct ns *ns; + + ns = ns_lookup(ns_id_reference); + if (!ns) + return NS_UNKNOWN; + if (ns->relative_default_ns != link_nsid) + return NS_UNKNOWN; + ns = ns_get_default(); + assert(ns); + return ns->ns_id; +} + ns_id_t ns_get_default_id(void) { if (default_ns) return default_ns->ns_id; return NS_DEFAULT_INTERNAL; } + +struct ns *ns_get_default(void) +{ + return default_ns; +} + @@ -53,6 +53,11 @@ struct ns { /* Identifier, mapped on the NSID value */ ns_id_t internal_ns_id; + /* Identifier, value of NSID of default netns, + * relative value in that local netns + */ + ns_id_t relative_default_ns; + /* Name */ char *name; @@ -120,7 +125,14 @@ int ns_socket(int domain, int type, int protocol, ns_id_t ns_id); extern char *ns_netns_pathname(struct vty *vty, const char *name); /* Parse and execute a function on all the NETNS */ -extern void ns_walk_func(int (*func)(struct ns *)); +#define NS_WALK_CONTINUE 0 +#define NS_WALK_STOP 1 + +extern void ns_walk_func(int (*func)(struct ns *, + void *, + void **), + void *param_in, + void **param_out); /* API to get the NETNS name, from the ns pointer */ extern const char *ns_get_name(struct ns *ns); @@ -174,7 +186,9 @@ extern struct ns *ns_lookup_name(const char *name); */ extern int ns_enable(struct ns *ns, void (*func)(ns_id_t, void *)); extern struct ns *ns_get_created(struct ns *ns, char *name, ns_id_t ns_id); +extern ns_id_t ns_id_get_absolute(ns_id_t ns_id_reference, ns_id_t link_nsid); extern void ns_disable(struct ns *ns); +extern struct ns *ns_get_default(void); #ifdef __cplusplus } @@ -652,7 +652,8 @@ int vrf_handler_create(struct vty *vty, const char *vrfname, } int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname, - ns_id_t ns_id, ns_id_t internal_ns_id) + ns_id_t ns_id, ns_id_t internal_ns_id, + ns_id_t rel_def_ns_id) { struct ns *ns = NULL; @@ -700,6 +701,7 @@ int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname, } ns = ns_get_created(ns, pathname, ns_id); ns->internal_ns_id = internal_ns_id; + ns->relative_default_ns = rel_def_ns_id; ns->vrf_ctxt = (void *)vrf; vrf->ns_ctxt = (void *)ns; /* update VRF netns NAME */ @@ -797,7 +799,9 @@ DEFUN_NOSH (vrf_netns, frr_with_privs(vrf_daemon_privs) { ret = vrf_netns_handler_create(vty, vrf, pathname, - NS_UNKNOWN, NS_UNKNOWN); + NS_UNKNOWN, + NS_UNKNOWN, + NS_UNKNOWN); } return ret; } @@ -315,7 +315,7 @@ extern int vrf_handler_create(struct vty *vty, const char *name, */ extern int vrf_netns_handler_create(struct vty *vty, struct vrf *vrf, char *pathname, ns_id_t ext_ns_id, - ns_id_t ns_id); + ns_id_t ns_id, ns_id_t rel_def_ns_id); /* used internally to enable or disable VRF. * Notify a change in the VRF ID of the VRF diff --git a/ospf6d/ospf6_asbr.c b/ospf6d/ospf6_asbr.c index 6e71a21bd5..cea4dd93e5 100644 --- a/ospf6d/ospf6_asbr.c +++ b/ospf6d/ospf6_asbr.c @@ -47,6 +47,9 @@ #include "ospf6_flood.h" #include "ospf6d.h" +static void ospf6_asbr_redistribute_set(int type); +static void ospf6_asbr_redistribute_unset(int type); + unsigned char conf_debug_ospf6_asbr = 0; #define ZROUTE_NAME(x) zebra_route_string(x) @@ -935,18 +938,35 @@ static void ospf6_asbr_routemap_update(const char *mapname) ospf6->rmap[type].map = route_map_lookup_by_name( ospf6->rmap[type].name); - if (mapname && ospf6->rmap[type].map + if (mapname && (strcmp(ospf6->rmap[type].name, mapname) == 0)) { - if (IS_OSPF6_DEBUG_ASBR) - zlog_debug( - "%s: route-map %s update, reset redist %s", - __func__, mapname, - ZROUTE_NAME(type)); + if (ospf6->rmap[type].map) { + if (IS_OSPF6_DEBUG_ASBR) + zlog_debug( + "%s: route-map %s update, reset redist %s", + __func__, mapname, + ZROUTE_NAME(type)); - route_map_counter_increment( - ospf6->rmap[type].map); + route_map_counter_increment( + ospf6->rmap[type].map); - ospf6_asbr_distribute_list_update(type); + ospf6_asbr_distribute_list_update(type); + } else { + /* + * if the mapname matches a route-map on + * ospf6 but the map doesn't exist, it + * is being deleted. flush and then + * readvertise + */ + if (IS_OSPF6_DEBUG_ASBR) + zlog_debug( + "%s: route-map %s deleted, reset redist %s", + __func__, mapname, + ZROUTE_NAME(type)); + ospf6_asbr_redistribute_unset(type); + ospf6_asbr_routemap_set(type, mapname); + ospf6_asbr_redistribute_set(type); + } } } else ospf6->rmap[type].map = NULL; @@ -1061,6 +1081,7 @@ void ospf6_asbr_redistribute_add(int type, ifindex_t ifindex, if (IS_OSPF6_DEBUG_ASBR) zlog_debug("Denied by route-map \"%s\"", ospf6->rmap[type].name); + ospf6_asbr_redistribute_remove(type, ifindex, prefix); return; } } diff --git a/tools/frrcommon.sh.in b/tools/frrcommon.sh.in index 9dc8cea609..2955f74ce3 100644 --- a/tools/frrcommon.sh.in +++ b/tools/frrcommon.sh.in @@ -92,7 +92,7 @@ daemon_list() { continue fi debug "$daemon enabled" - enabled="$enabled $daemon" + if [ -n "$inst" ]; then debug "$daemon multi-instance $inst" oldifs="${IFS}" @@ -101,6 +101,8 @@ daemon_list() { enabled="$enabled $daemon-$i" done IFS="${oldifs}" + else + enabled="$enabled $daemon" fi else debug "$daemon disabled" diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index 32b7faaad7..88d5ab5cec 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -74,6 +74,7 @@ struct fpm_nl_ctx { bool connecting; bool rib_complete; bool rmac_complete; + bool use_nhg; struct sockaddr_storage addr; /* data plane buffers. */ @@ -99,6 +100,8 @@ struct fpm_nl_ctx { struct thread *t_dequeue; /* zebra events. */ + struct thread *t_nhgreset; + struct thread *t_nhgwalk; struct thread *t_ribreset; struct thread *t_ribwalk; struct thread *t_rmacreset; @@ -144,6 +147,8 @@ enum fpm_nl_events { FNE_DISABLE, /* Reset counters. */ FNE_RESET_COUNTERS, + /* Toggle next hop group feature. */ + FNE_TOGGLE_NHG, }; /* @@ -151,6 +156,8 @@ enum fpm_nl_events { */ static int fpm_process_event(struct thread *t); static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx); +static int fpm_nhg_send(struct thread *t); +static int fpm_nhg_reset(struct thread *t); static int fpm_rib_send(struct thread *t); static int fpm_rib_reset(struct thread *t); static int fpm_rmac_send(struct thread *t); @@ -255,6 +262,37 @@ DEFUN(no_fpm_set_address, no_fpm_set_address_cmd, return CMD_SUCCESS; } +DEFUN(fpm_use_nhg, fpm_use_nhg_cmd, + "fpm use-next-hop-groups", + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already enabled. */ + if (gfnc->use_nhg) + return CMD_SUCCESS; + + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_event); + + return CMD_SUCCESS; +} + +DEFUN(no_fpm_use_nhg, no_fpm_use_nhg_cmd, + "no fpm use-next-hop-groups", + NO_STR + FPM_STR + "Use netlink next hop groups feature.\n") +{ + /* Already disabled. */ + if (!gfnc->use_nhg) + return CMD_SUCCESS; + + thread_add_event(gfnc->fthread->master, fpm_process_event, gfnc, + FNE_TOGGLE_NHG, &gfnc->t_event); + + return CMD_SUCCESS; +} + DEFUN(fpm_reset_counters, fpm_reset_counters_cmd, "clear fpm counters", CLEAR_STR @@ -368,6 +406,11 @@ static int fpm_write_config(struct vty *vty) break; } + if (!gfnc->use_nhg) { + vty_out(vty, "no fpm use-next-hop-groups\n"); + written = 1; + } + return written; } @@ -399,14 +442,12 @@ static void fpm_reconnect(struct fpm_nl_ctx *fnc) THREAD_OFF(fnc->t_read); THREAD_OFF(fnc->t_write); - if (fnc->t_ribreset) - thread_cancel_async(zrouter.master, &fnc->t_ribreset, NULL); - if (fnc->t_ribwalk) - thread_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL); - if (fnc->t_rmacreset) - thread_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL); - if (fnc->t_rmacwalk) - thread_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL); + thread_cancel_async(zrouter.master, &fnc->t_nhgreset, NULL); + thread_cancel_async(zrouter.master, &fnc->t_nhgwalk, NULL); + thread_cancel_async(zrouter.master, &fnc->t_ribreset, NULL); + thread_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL); + thread_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL); + thread_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL); /* FPM is disabled, don't attempt to connect. */ if (fnc->disabled) @@ -490,9 +531,25 @@ static int fpm_write(struct thread *t) fnc->connecting = false; - /* Ask zebra main thread to start walking the RIB table. */ - thread_add_timer(zrouter.master, fpm_rib_send, fnc, 0, - &fnc->t_ribwalk); + /* + * Walk the route tables to send old information before starting + * to send updated information. + * + * NOTE 1: + * RIB table walk is called after the next group table walk + * ends. + * + * NOTE 2: + * Don't attempt to go through next hop group table if we were + * explictly told to not use it. + */ + if (fnc->use_nhg) + thread_add_timer(zrouter.master, fpm_nhg_send, fnc, 0, + &fnc->t_nhgwalk); + else + thread_add_timer(zrouter.master, fpm_rib_send, fnc, 0, + &fnc->t_ribwalk); + thread_add_timer(zrouter.master, fpm_rmac_send, fnc, 0, &fnc->t_rmacwalk); } @@ -610,6 +667,8 @@ static int fpm_connect(struct thread *t) &fnc->t_write); /* Mark all routes as unsent. */ + thread_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0, + &fnc->t_nhgreset); thread_add_timer(zrouter.master, fpm_rib_reset, fnc, 0, &fnc->t_ribreset); thread_add_timer(zrouter.master, fpm_rmac_reset, fnc, 0, @@ -632,16 +691,27 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) size_t nl_buf_len; ssize_t rv; uint64_t obytes, obytes_peak; + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + /* + * If we were configured to not use next hop groups, then quit as soon + * as possible. + */ + if ((!fnc->use_nhg) + && (op == DPLANE_OP_NH_DELETE || op == DPLANE_OP_NH_INSTALL + || op == DPLANE_OP_NH_UPDATE)) + return 0; nl_buf_len = 0; frr_mutex_lock_autounlock(&fnc->obuf_mutex); - switch (dplane_ctx_get_op(ctx)) { + switch (op) { case DPLANE_OP_ROUTE_UPDATE: case DPLANE_OP_ROUTE_DELETE: rv = netlink_route_multipath(RTM_DELROUTE, ctx, nl_buf, - sizeof(nl_buf), true); + sizeof(nl_buf), true, + fnc->use_nhg); if (rv <= 0) { zlog_err("%s: netlink_route_multipath failed", __func__); @@ -651,14 +721,14 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) nl_buf_len = (size_t)rv; /* UPDATE operations need a INSTALL, otherwise just quit. */ - if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) + if (op == DPLANE_OP_ROUTE_DELETE) break; /* FALL THROUGH */ case DPLANE_OP_ROUTE_INSTALL: - rv = netlink_route_multipath(RTM_NEWROUTE, ctx, - &nl_buf[nl_buf_len], - sizeof(nl_buf) - nl_buf_len, true); + rv = netlink_route_multipath( + RTM_NEWROUTE, ctx, &nl_buf[nl_buf_len], + sizeof(nl_buf) - nl_buf_len, true, fnc->use_nhg); if (rv <= 0) { zlog_err("%s: netlink_route_multipath failed", __func__); @@ -680,9 +750,28 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) nl_buf_len = (size_t)rv; break; + case DPLANE_OP_NH_DELETE: + rv = netlink_nexthop_encode(RTM_DELNEXTHOP, ctx, nl_buf, + sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_encode failed", __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; case DPLANE_OP_NH_INSTALL: case DPLANE_OP_NH_UPDATE: - case DPLANE_OP_NH_DELETE: + rv = netlink_nexthop_encode(RTM_NEWNEXTHOP, ctx, nl_buf, + sizeof(nl_buf)); + if (rv <= 0) { + zlog_err("%s: netlink_nexthop_encode failed", __func__); + return 0; + } + + nl_buf_len = (size_t)rv; + break; + case DPLANE_OP_LSP_INSTALL: case DPLANE_OP_LSP_UPDATE: case DPLANE_OP_LSP_DELETE: @@ -762,6 +851,65 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) return 0; } +/* + * Next hop walk/send functions. + */ +struct fpm_nhg_arg { + struct zebra_dplane_ctx *ctx; + struct fpm_nl_ctx *fnc; + bool complete; +}; + +static int fpm_nhg_send_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + struct fpm_nhg_arg *fna = arg; + + /* This entry was already sent, skip it. */ + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_FPM)) + return HASHWALK_CONTINUE; + + /* Reset ctx to reuse allocated memory, take a snapshot and send it. */ + dplane_ctx_reset(fna->ctx); + dplane_ctx_nexthop_init(fna->ctx, DPLANE_OP_NH_INSTALL, nhe); + if (fpm_nl_enqueue(fna->fnc, fna->ctx) == -1) { + /* Our buffers are full, lets give it some cycles. */ + fna->complete = false; + return HASHWALK_ABORT; + } + + /* Mark group as sent, so it doesn't get sent again. */ + SET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); + + return HASHWALK_CONTINUE; +} + +static int fpm_nhg_send(struct thread *t) +{ + struct fpm_nl_ctx *fnc = THREAD_ARG(t); + struct fpm_nhg_arg fna; + + fna.fnc = fnc; + fna.ctx = dplane_ctx_alloc(); + fna.complete = true; + + /* Send next hops. */ + hash_walk(zrouter.nhgs_id, fpm_nhg_send_cb, &fna); + + /* `free()` allocated memory. */ + dplane_ctx_fini(&fna.ctx); + + /* We are done sending next hops, lets install the routes now. */ + if (fna.complete) + thread_add_timer(zrouter.master, fpm_rib_send, fnc, 0, + &fnc->t_ribwalk); + else /* Otherwise reschedule next hop group again. */ + thread_add_timer(zrouter.master, fpm_nhg_send, fnc, 0, + &fnc->t_nhgwalk); + + return 0; +} + /** * Send all RIB installed routes to the connected data plane. */ @@ -876,6 +1024,23 @@ static int fpm_rmac_send(struct thread *t) return 0; } +/* + * Resets the next hop FPM flags so we send all next hops again. + */ +static void fpm_nhg_reset_cb(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe = bucket->data; + + /* Unset FPM installation flag so it gets installed again. */ + UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_FPM); +} + +static int fpm_nhg_reset(struct thread *t) +{ + hash_iterate(zrouter.nhgs_id, fpm_nhg_reset_cb, NULL); + return 0; +} + /** * Resets the RIB FPM flags so we send all routes again. */ @@ -999,6 +1164,12 @@ static int fpm_process_event(struct thread *t) memset(&fnc->counters, 0, sizeof(fnc->counters)); break; + case FNE_TOGGLE_NHG: + zlog_info("%s: toggle next hop groups support", __func__); + fnc->use_nhg = !fnc->use_nhg; + fpm_reconnect(fnc); + break; + default: if (IS_ZEBRA_DEBUG_FPM) zlog_debug("%s: unhandled event %d", __func__, event); @@ -1027,12 +1198,17 @@ static int fpm_nl_start(struct zebra_dplane_provider *prov) TAILQ_INIT(&fnc->ctxqueue); pthread_mutex_init(&fnc->ctxqueue_mutex, NULL); + /* Set default values. */ + fnc->use_nhg = true; + return 0; } static int fpm_nl_finish_early(struct fpm_nl_ctx *fnc) { /* Disable all events and close socket. */ + THREAD_OFF(fnc->t_nhgreset); + THREAD_OFF(fnc->t_nhgwalk); THREAD_OFF(fnc->t_ribreset); THREAD_OFF(fnc->t_ribwalk); THREAD_OFF(fnc->t_rmacreset); @@ -1147,6 +1323,8 @@ static int fpm_nl_new(struct thread_master *tm) install_element(ENABLE_NODE, &fpm_reset_counters_cmd); install_element(CONFIG_NODE, &fpm_set_address_cmd); install_element(CONFIG_NODE, &no_fpm_set_address_cmd); + install_element(CONFIG_NODE, &fpm_use_nhg_cmd); + install_element(CONFIG_NODE, &no_fpm_use_nhg_cmd); return 0; } diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c index 429bb968a5..55bcda8182 100644 --- a/zebra/if_netlink.c +++ b/zebra/if_netlink.c @@ -482,6 +482,7 @@ static int netlink_extract_vxlan_info(struct rtattr *link_data, struct rtattr *attr[IFLA_VXLAN_MAX + 1]; vni_t vni_in_msg; struct in_addr vtep_ip_in_msg; + ifindex_t ifindex_link; memset(vxl_info, 0, sizeof(*vxl_info)); memset(attr, 0, sizeof(attr)); @@ -510,6 +511,15 @@ static int netlink_extract_vxlan_info(struct rtattr *link_data, *(struct in_addr *)RTA_DATA(attr[IFLA_VXLAN_GROUP]); } + if (!attr[IFLA_VXLAN_LINK]) { + if (IS_ZEBRA_DEBUG_KERNEL) + zlog_debug("IFLA_VXLAN_LINK missing " + "from VXLAN IF message"); + } else { + ifindex_link = + *(ifindex_t *)RTA_DATA(attr[IFLA_VXLAN_LINK]); + vxl_info->ifindex_link = ifindex_link; + } return 0; } @@ -519,7 +529,8 @@ static int netlink_extract_vxlan_info(struct rtattr *link_data, * its members. Likewise, for VxLAN interface. */ static void netlink_interface_update_l2info(struct interface *ifp, - struct rtattr *link_data, int add) + struct rtattr *link_data, int add, + ns_id_t link_nsid) { if (!link_data) return; @@ -538,7 +549,12 @@ static void netlink_interface_update_l2info(struct interface *ifp, struct zebra_l2info_vxlan vxlan_info; netlink_extract_vxlan_info(link_data, &vxlan_info); + vxlan_info.link_nsid = link_nsid; zebra_l2_vxlanif_add_update(ifp, &vxlan_info, add); + if (link_nsid != NS_UNKNOWN && + vxlan_info.ifindex_link) + zebra_if_update_link(ifp, vxlan_info.ifindex_link, + link_nsid); } } @@ -622,6 +638,7 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup) ifindex_t link_ifindex = IFINDEX_INTERNAL; ifindex_t bond_ifindex = IFINDEX_INTERNAL; struct zebra_if *zif; + ns_id_t link_nsid = ns_id; zns = zebra_ns_lookup(ns_id); ifi = NLMSG_DATA(h); @@ -705,6 +722,11 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup) if (tb[IFLA_LINK]) link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]); + if (tb[IFLA_LINK_NETNSID]) { + link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]); + link_nsid = ns_id_get_absolute(ns_id, link_nsid); + } + /* Add interface. * We add by index first because in some cases such as the master * interface, we have the index before we have the name. Fixing @@ -749,9 +771,10 @@ static int netlink_interface(struct nlmsghdr *h, ns_id_t ns_id, int startup) /* Extract and save L2 interface information, take additional actions. */ - netlink_interface_update_l2info(ifp, linkinfo[IFLA_INFO_DATA], 1); + netlink_interface_update_l2info(ifp, linkinfo[IFLA_INFO_DATA], + 1, link_nsid); if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) - zebra_l2if_update_bridge_slave(ifp, bridge_ifindex); + zebra_l2if_update_bridge_slave(ifp, bridge_ifindex, ns_id); else if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) zebra_l2if_update_bond_slave(ifp, bond_ifindex); @@ -1168,6 +1191,7 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) ifindex_t link_ifindex = IFINDEX_INTERNAL; uint8_t old_hw_addr[INTERFACE_HWADDR_MAX]; struct zebra_if *zif; + ns_id_t link_nsid = ns_id; zns = zebra_ns_lookup(ns_id); ifi = NLMSG_DATA(h); @@ -1235,6 +1259,10 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) if (tb[IFLA_LINK]) link_ifindex = *(ifindex_t *)RTA_DATA(tb[IFLA_LINK]); + if (tb[IFLA_LINK_NETNSID]) { + link_nsid = *(ns_id_t *)RTA_DATA(tb[IFLA_LINK_NETNSID]); + link_nsid = ns_id_get_absolute(ns_id, link_nsid); + } if (tb[IFLA_IFALIAS]) { desc = (char *)RTA_DATA(tb[IFLA_IFALIAS]); } @@ -1319,10 +1347,12 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) /* Extract and save L2 interface information, take * additional actions. */ netlink_interface_update_l2info( - ifp, linkinfo[IFLA_INFO_DATA], 1); + ifp, linkinfo[IFLA_INFO_DATA], + 1, link_nsid); if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp)) zebra_l2if_update_bridge_slave(ifp, - bridge_ifindex); + bridge_ifindex, + ns_id); else if (IS_ZEBRA_IF_BOND_SLAVE(ifp)) zebra_l2if_update_bond_slave(ifp, bond_ifindex); } else if (ifp->vrf_id != vrf_id) { @@ -1421,10 +1451,12 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) /* Extract and save L2 interface information, take * additional actions. */ netlink_interface_update_l2info( - ifp, linkinfo[IFLA_INFO_DATA], 0); + ifp, linkinfo[IFLA_INFO_DATA], + 0, link_nsid); if (IS_ZEBRA_IF_BRIDGE_SLAVE(ifp) || was_bridge_slave) zebra_l2if_update_bridge_slave(ifp, - bridge_ifindex); + bridge_ifindex, + ns_id); else if (IS_ZEBRA_IF_BOND_SLAVE(ifp) || was_bond_slave) zebra_l2if_update_bond_slave(ifp, bond_ifindex); } diff --git a/zebra/interface.c b/zebra/interface.c index 9a248ba5d1..9d1f70609b 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -1236,6 +1236,23 @@ static void nbr_connected_dump_vty(struct vty *vty, vty_out(vty, "\n"); } +static const char *zebra_zifslavetype_2str(zebra_slave_iftype_t zif_slave_type) +{ + switch (zif_slave_type) { + case ZEBRA_IF_SLAVE_BRIDGE: + return "Bridge"; + case ZEBRA_IF_SLAVE_VRF: + return "Vrf"; + case ZEBRA_IF_SLAVE_BOND: + return "Bond"; + case ZEBRA_IF_SLAVE_OTHER: + return "Other"; + case ZEBRA_IF_SLAVE_NONE: + return "None"; + } + return "None"; +} + static const char *zebra_ziftype_2str(zebra_iftype_t zif_type) { switch (zif_type) { @@ -1463,6 +1480,9 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp) vty_out(vty, " Interface Type %s\n", zebra_ziftype_2str(zebra_if->zif_type)); + vty_out(vty, " Interface Slave Type %s\n", + zebra_zifslavetype_2str(zebra_if->zif_slave_type)); + if (IS_ZEBRA_IF_BRIDGE(ifp)) { struct zebra_l2info_bridge *bridge_info; @@ -1488,6 +1508,17 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp) if (vxlan_info->mcast_grp.s_addr != INADDR_ANY) vty_out(vty, " Mcast Group %s", inet_ntoa(vxlan_info->mcast_grp)); + if (vxlan_info->ifindex_link && + (vxlan_info->link_nsid != NS_UNKNOWN)) { + struct interface *ifp; + + ifp = if_lookup_by_index_per_ns( + zebra_ns_lookup(vxlan_info->link_nsid), + vxlan_info->ifindex_link); + vty_out(vty, " Link Interface %s", + ifp == NULL ? "Unknown" : + ifp->name); + } vty_out(vty, "\n"); } diff --git a/zebra/main.c b/zebra/main.c index e230a744f6..f447e9aa07 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -177,7 +177,7 @@ static void sigint(void) vrf_terminate(); rtadv_terminate(); - ns_walk_func(zebra_ns_early_shutdown); + ns_walk_func(zebra_ns_early_shutdown, NULL, NULL); zebra_ns_notify_close(); access_list_reset(); @@ -208,7 +208,7 @@ int zebra_finalize(struct thread *dummy) zlog_info("Zebra final shutdown"); /* Final shutdown of ns resources */ - ns_walk_func(zebra_ns_final_shutdown); + ns_walk_func(zebra_ns_final_shutdown, NULL, NULL); /* Stop dplane thread and finish any cleanup */ zebra_dplane_shutdown(); diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 89d9f4c973..2e6cc7cd06 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -1552,7 +1552,8 @@ static void netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen, * Routing table change via netlink interface, using a dataplane context object */ ssize_t netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx, - uint8_t *data, size_t datalen, bool fpm) + uint8_t *data, size_t datalen, bool fpm, + bool force_nhg) { int bytelen; struct nexthop *nexthop = NULL; @@ -1674,7 +1675,7 @@ ssize_t netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx, RTA_PAYLOAD(rta)); } - if (kernel_nexthops_supported()) { + if (kernel_nexthops_supported() || force_nhg) { /* Kernel supports nexthop objects */ if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug( @@ -1954,43 +1955,42 @@ static void _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size, } /** - * netlink_nexthop() - Nexthop change via the netlink interface + * Next hop packet encoding helper function. * - * @ctx: Dataplane ctx + * \param[in] cmd netlink command. + * \param[in] ctx dataplane context (information snapshot). + * \param[out] buf buffer to hold the packet. + * \param[in] buflen amount of buffer bytes. * - * Return: Result status + * \returns -1 on failure or the number of bytes written to buf. */ -static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) +ssize_t netlink_nexthop_encode(uint16_t cmd, const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) { struct { struct nlmsghdr n; struct nhmsg nhm; - char buf[NL_PKT_BUF_SIZE]; - } req; + char buf[]; + } *req = buf; mpls_lse_t out_lse[MPLS_MAX_LABELS]; char label_buf[256]; int num_labels = 0; - size_t req_size = sizeof(req); - - /* Nothing to do if the kernel doesn't support nexthop objects */ - if (!kernel_nexthops_supported()) - return 0; label_buf[0] = '\0'; - memset(&req, 0, req_size); + memset(req, 0, buflen); - req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); - req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; if (cmd == RTM_NEWNEXTHOP) - req.n.nlmsg_flags |= NLM_F_REPLACE; + req->n.nlmsg_flags |= NLM_F_REPLACE; - req.n.nlmsg_type = cmd; - req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; + req->n.nlmsg_type = cmd; + req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; - req.nhm.nh_family = AF_UNSPEC; + req->nhm.nh_family = AF_UNSPEC; /* TODO: Scope? */ uint32_t id = dplane_ctx_get_nhe_id(ctx); @@ -2002,7 +2002,7 @@ static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) return -1; } - addattr32(&req.n, req_size, NHA_ID, id); + addattr32(&req->n, buflen, NHA_ID, id); if (cmd == RTM_NEWNEXTHOP) { /* @@ -2013,7 +2013,7 @@ static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) */ if (dplane_ctx_get_nhe_nh_grp_count(ctx)) _netlink_nexthop_build_group( - &req.n, req_size, id, + &req->n, buflen, id, dplane_ctx_get_nhe_nh_grp(ctx), dplane_ctx_get_nhe_nh_grp_count(ctx)); else { @@ -2022,23 +2022,23 @@ static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) afi_t afi = dplane_ctx_get_nhe_afi(ctx); if (afi == AFI_IP) - req.nhm.nh_family = AF_INET; + req->nhm.nh_family = AF_INET; else if (afi == AFI_IP6) - req.nhm.nh_family = AF_INET6; + req->nhm.nh_family = AF_INET6; switch (nh->type) { case NEXTHOP_TYPE_IPV4: case NEXTHOP_TYPE_IPV4_IFINDEX: - addattr_l(&req.n, req_size, NHA_GATEWAY, + addattr_l(&req->n, buflen, NHA_GATEWAY, &nh->gate.ipv4, IPV4_MAX_BYTELEN); break; case NEXTHOP_TYPE_IPV6: case NEXTHOP_TYPE_IPV6_IFINDEX: - addattr_l(&req.n, req_size, NHA_GATEWAY, + addattr_l(&req->n, buflen, NHA_GATEWAY, &nh->gate.ipv6, IPV6_MAX_BYTELEN); break; case NEXTHOP_TYPE_BLACKHOLE: - addattr_l(&req.n, req_size, NHA_BLACKHOLE, NULL, + addattr_l(&req->n, buflen, NHA_BLACKHOLE, NULL, 0); /* Blackhole shouldn't have anymore attributes */ @@ -2055,10 +2055,10 @@ static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) return -1; } - addattr32(&req.n, req_size, NHA_OIF, nh->ifindex); + addattr32(&req->n, buflen, NHA_OIF, nh->ifindex); if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK)) - req.nhm.nh_flags |= RTNH_F_ONLINK; + req->nhm.nh_flags |= RTNH_F_ONLINK; num_labels = build_label_stack(nh->nh_label, out_lse, @@ -2072,10 +2072,10 @@ static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) /* * TODO: MPLS unsupported for now in kernel. */ - if (req.nhm.nh_family == AF_MPLS) + if (req->nhm.nh_family == AF_MPLS) goto nexthop_done; #if 0 - addattr_l(&req.n, req_size, NHA_NEWDST, + addattr_l(&req->n, buflen, NHA_NEWDST, &out_lse, num_labels * sizeof(mpls_lse_t)); @@ -2084,16 +2084,16 @@ static int netlink_nexthop(int cmd, struct zebra_dplane_ctx *ctx) struct rtattr *nest; uint16_t encap = LWTUNNEL_ENCAP_MPLS; - addattr_l(&req.n, req_size, + addattr_l(&req->n, buflen, NHA_ENCAP_TYPE, &encap, sizeof(uint16_t)); - nest = addattr_nest(&req.n, req_size, + nest = addattr_nest(&req->n, buflen, NHA_ENCAP); - addattr_l(&req.n, req_size, + addattr_l(&req->n, buflen, MPLS_IPTUNNEL_DST, &out_lse, num_labels * sizeof(mpls_lse_t)); - addattr_nest_end(&req.n, nest); + addattr_nest_end(&req->n, nest); } } @@ -2106,7 +2106,8 @@ nexthop_done: nh->vrf_id, label_buf); } - req.nhm.nh_protocol = zebra2proto(dplane_ctx_get_nhe_type(ctx)); + req->nhm.nh_protocol = + zebra2proto(dplane_ctx_get_nhe_type(ctx)); } else if (cmd != RTM_DELNEXTHOP) { flog_err( @@ -2120,8 +2121,7 @@ nexthop_done: zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd), id); - return netlink_talk_info(netlink_talk_filter, &req.n, - dplane_ctx_get_ns(ctx), 0); + return NLMSG_ALIGN(req->n.nlmsg_len); } /** @@ -2136,6 +2136,7 @@ enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx) enum dplane_op_e op; int cmd = 0; int ret = 0; + char buf[NL_PKT_BUF_SIZE]; op = dplane_ctx_get_op(ctx); if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE) @@ -2149,7 +2150,15 @@ enum zebra_dplane_result kernel_nexthop_update(struct zebra_dplane_ctx *ctx) return ZEBRA_DPLANE_REQUEST_FAILURE; } - ret = netlink_nexthop(cmd, ctx); + /* Nothing to do if the kernel doesn't support nexthop objects */ + if (!kernel_nexthops_supported()) + return ZEBRA_DPLANE_REQUEST_SUCCESS; + + if (netlink_nexthop_encode(cmd, ctx, buf, sizeof(buf)) > 0) + ret = netlink_talk_info(netlink_talk_filter, (void *)&buf, + dplane_ctx_get_ns(ctx), 0); + else + ret = 0; return (ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); @@ -2188,7 +2197,7 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) { netlink_route_multipath(RTM_DELROUTE, ctx, nl_pkt, sizeof(nl_pkt), - false); + false, false); netlink_talk_info(netlink_talk_filter, (struct nlmsghdr *)nl_pkt, dplane_ctx_get_ns(ctx), 0); @@ -2209,7 +2218,7 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx))) { netlink_route_multipath(RTM_DELROUTE, ctx, nl_pkt, sizeof(nl_pkt), - false); + false, false); netlink_talk_info(netlink_talk_filter, (struct nlmsghdr *)nl_pkt, dplane_ctx_get_ns(ctx), 0); @@ -2222,7 +2231,7 @@ enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) } if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))) { - netlink_route_multipath(cmd, ctx, nl_pkt, sizeof(nl_pkt), + netlink_route_multipath(cmd, ctx, nl_pkt, sizeof(nl_pkt), false, false); ret = netlink_talk_info(netlink_talk_filter, (struct nlmsghdr *)nl_pkt, diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h index a364d305c5..c09609f47c 100644 --- a/zebra/rt_netlink.h +++ b/zebra/rt_netlink.h @@ -67,8 +67,8 @@ void rt_netlink_init(void); extern int netlink_mpls_multipath(int cmd, struct zebra_dplane_ctx *ctx); extern ssize_t netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx, - uint8_t *data, size_t datalen, - bool fpm); + uint8_t *data, size_t datalen, bool fpm, + bool force_nhg); extern ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, uint8_t *data, size_t datalen); @@ -78,6 +78,9 @@ extern int netlink_route_read(struct zebra_ns *zns); extern int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup); extern int netlink_nexthop_read(struct zebra_ns *zns); +extern ssize_t netlink_nexthop_encode(uint16_t cmd, + const struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen); extern int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id); extern int netlink_macfdb_read(struct zebra_ns *zns); diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 092b5dd3c2..5db4555284 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -1451,10 +1451,6 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re, &api_nh->gate.ipv4, NULL, api_nh->ifindex, api_nh->vrf_id); - ifp = if_lookup_by_index(api_nh->ifindex, api_nh->vrf_id); - if (ifp && connected_is_unnumbered(ifp)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK); - /* Special handling for IPv4 routes sourced from EVPN: * the nexthop and associated MAC need to be installed. */ @@ -1516,8 +1512,16 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re, goto done; } + /* Mark nexthop as onlink either if client has explicitly told us + * to or if the nexthop is on an 'unnumbered' interface. + */ if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_ONLINK)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK); + else if (api_nh->type == NEXTHOP_TYPE_IPV4_IFINDEX) { + ifp = if_lookup_by_index(api_nh->ifindex, api_nh->vrf_id); + if (ifp && connected_is_unnumbered(ifp)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK); + } if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_WEIGHT)) nexthop->weight = api_nh->weight; diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index cc8cab1ff5..278e894d06 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -1658,9 +1658,8 @@ done: * * Return: Result status */ -static int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, - enum dplane_op_e op, - struct nhg_hash_entry *nhe) +int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct nhg_hash_entry *nhe) { struct zebra_vrf *zvrf = NULL; struct zebra_ns *zns = NULL; diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index f01ca2e84c..9ce542944d 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -499,6 +499,10 @@ enum zebra_dplane_result dplane_vtep_delete(const struct interface *ifp, int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, struct route_node *rn, struct route_entry *re); +/* Encode next hop information into data plane context. */ +int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, + struct nhg_hash_entry *nhe); + /* Retrieve the limit on the number of pending, unprocessed updates. */ uint32_t dplane_get_in_queue_limit(void); diff --git a/zebra/zebra_l2.c b/zebra/zebra_l2.c index e549d80a5c..a214494492 100644 --- a/zebra/zebra_l2.c +++ b/zebra/zebra_l2.c @@ -53,7 +53,13 @@ static void map_slaves_to_bridge(struct interface *br_if, int link) { struct vrf *vrf; struct interface *ifp; + struct zebra_vrf *zvrf; + struct zebra_ns *zns; + zvrf = zebra_vrf_lookup_by_id(br_if->vrf_id); + assert(zvrf); + zns = zvrf->zns; + assert(zns); RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { FOR_ALL_INTERFACES (vrf, ifp) { struct zebra_if *zif; @@ -72,7 +78,8 @@ static void map_slaves_to_bridge(struct interface *br_if, int link) br_slave = &zif->brslave_info; if (link) { - if (br_slave->bridge_ifindex == br_if->ifindex) + if (br_slave->bridge_ifindex == br_if->ifindex && + br_slave->ns_id == zns->ns_id) br_slave->br_if = br_if; } else { if (br_slave->br_if == br_if) @@ -83,12 +90,14 @@ static void map_slaves_to_bridge(struct interface *br_if, int link) } /* Public functions */ -void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave) +void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave, + struct zebra_ns *zns) { struct interface *br_if; /* TODO: Handle change of master */ - br_if = if_lookup_by_index_per_ns(zebra_ns_lookup(NS_DEFAULT), + assert(zns); + br_if = if_lookup_by_index_per_ns(zebra_ns_lookup(zns->ns_id), br_slave->bridge_ifindex); if (br_if) br_slave->br_if = br_if; @@ -237,23 +246,32 @@ void zebra_l2_vxlanif_del(struct interface *ifp) * from a bridge before it can be mapped to another bridge. */ void zebra_l2if_update_bridge_slave(struct interface *ifp, - ifindex_t bridge_ifindex) + ifindex_t bridge_ifindex, + ns_id_t ns_id) { struct zebra_if *zif; ifindex_t old_bridge_ifindex; + ns_id_t old_ns_id; + struct zebra_vrf *zvrf; zif = ifp->info; assert(zif); + zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id); + if (!zvrf) + return; + old_bridge_ifindex = zif->brslave_info.bridge_ifindex; - if (old_bridge_ifindex == bridge_ifindex) + old_ns_id = zif->brslave_info.ns_id; + if (old_bridge_ifindex == bridge_ifindex && + old_ns_id == zif->brslave_info.ns_id) return; + zif->brslave_info.ns_id = ns_id; zif->brslave_info.bridge_ifindex = bridge_ifindex; - /* Set up or remove link with master */ if (bridge_ifindex != IFINDEX_INTERNAL) { - zebra_l2_map_slave_to_bridge(&zif->brslave_info); + zebra_l2_map_slave_to_bridge(&zif->brslave_info, zvrf->zns); /* In the case of VxLAN, invoke the handler for EVPN. */ if (zif->zif_type == ZEBRA_IF_VXLAN) zebra_vxlan_if_update(ifp, ZEBRA_VXLIF_MASTER_CHANGE); diff --git a/zebra/zebra_l2.h b/zebra/zebra_l2.h index d9f0eec3f8..a3c780ee09 100644 --- a/zebra/zebra_l2.h +++ b/zebra/zebra_l2.h @@ -37,6 +37,7 @@ extern "C" { struct zebra_l2info_brslave { ifindex_t bridge_ifindex; /* Bridge Master */ struct interface *br_if; /* Pointer to master */ + ns_id_t ns_id; /* network namespace where bridge is */ }; /* zebra L2 interface information - bridge interface */ @@ -55,6 +56,10 @@ struct zebra_l2info_vxlan { struct in_addr vtep_ip; /* Local tunnel IP */ vlanid_t access_vlan; /* Access VLAN - for VLAN-aware bridge. */ struct in_addr mcast_grp; + ifindex_t ifindex_link; /* Interface index of interface + * linked with VXLAN + */ + ns_id_t link_nsid; }; struct zebra_l2info_bondslave { @@ -77,7 +82,8 @@ union zebra_l2if_info { #define IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zif) ((zif)->l2info.br.vlan_aware == 1) -extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave); +extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave, + struct zebra_ns *zns); extern void zebra_l2_unmap_slave_from_bridge(struct zebra_l2info_brslave *br_slave); extern void @@ -97,7 +103,8 @@ extern void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp, vlanid_t access_vlan); extern void zebra_l2_vxlanif_del(struct interface *ifp); extern void zebra_l2if_update_bridge_slave(struct interface *ifp, - ifindex_t bridge_ifindex); + ifindex_t bridge_ifindex, + ns_id_t ns_id); extern void zebra_l2if_update_bond_slave(struct interface *ifp, ifindex_t bond_ifindex); diff --git a/zebra/zebra_netns_id.c b/zebra/zebra_netns_id.c index 77a9a7c368..0d86421b9e 100644 --- a/zebra/zebra_netns_id.c +++ b/zebra/zebra_netns_id.c @@ -159,27 +159,34 @@ static ns_id_t extract_nsid(struct nlmsghdr *nlh, char *buf) return ns_id; } -ns_id_t zebra_ns_id_get(const char *netnspath) +/* fd_param = -1 is ignored. + * netnspath set to null is ignored. + * one of the 2 params is mandatory. netnspath is looked in priority + */ +ns_id_t zebra_ns_id_get(const char *netnspath, int fd_param) { int ns_id = -1; struct sockaddr_nl snl; - int fd, sock, ret; + int fd = -1, sock, ret; unsigned int seq; ns_id_t return_nsid = NS_UNKNOWN; /* netns path check */ - if (!netnspath) - return NS_UNKNOWN; - fd = open(netnspath, O_RDONLY); - if (fd == -1) + if (!netnspath && fd_param == -1) return NS_UNKNOWN; - + if (netnspath) { + fd = open(netnspath, O_RDONLY); + if (fd == -1) + return NS_UNKNOWN; + } else if (fd_param != -1) + fd = fd_param; /* netlink socket */ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock < 0) { flog_err_sys(EC_LIB_SOCKET, "netlink( %u) socket() error: %s", sock, safe_strerror(errno)); - close(fd); + if (fd_param == -1) + close(fd); return NS_UNKNOWN; } memset(&snl, 0, sizeof(snl)); @@ -192,7 +199,8 @@ ns_id_t zebra_ns_id_get(const char *netnspath) "netlink( %u) socket() bind error: %s", sock, safe_strerror(errno)); close(sock); - close(fd); + if (fd_param == -1) + close(fd); return NS_UNKNOWN; } @@ -214,7 +222,8 @@ ns_id_t zebra_ns_id_get(const char *netnspath) ret = send_receive(sock, nlh, seq, buf); if (ret < 0) { close(sock); - close(fd); + if (fd_param == -1) + close(fd); return NS_UNKNOWN; } nlh = (struct nlmsghdr *)buf; @@ -258,7 +267,8 @@ ns_id_t zebra_ns_id_get(const char *netnspath) "netlink( %u) recvfrom() error 2 when reading: %s", fd, safe_strerror(errno)); close(sock); - close(fd); + if (fd_param == -1) + close(fd); if (errno == ENOTSUP) { zlog_debug("NEWNSID locally generated"); return zebra_ns_id_get_fallback(netnspath); @@ -278,7 +288,8 @@ ns_id_t zebra_ns_id_get(const char *netnspath) ret = send_receive(sock, nlh, seq, buf); if (ret < 0) { close(sock); - close(fd); + if (fd_param == -1) + close(fd); return NS_UNKNOWN; } nlh = (struct nlmsghdr *)buf; @@ -309,16 +320,18 @@ ns_id_t zebra_ns_id_get(const char *netnspath) } while (len != 0 && ret == 0); } - close(fd); + if (fd_param == -1) + close(fd); close(sock); return return_nsid; } #else -ns_id_t zebra_ns_id_get(const char *netnspath) +ns_id_t zebra_ns_id_get(const char *netnspath, int fd __attribute__ ((unused))) { return zebra_ns_id_get_fallback(netnspath); } + #endif /* ! defined(HAVE_NETLINK) */ #ifdef HAVE_NETNS @@ -354,7 +367,7 @@ ns_id_t zebra_ns_id_get_default(void) return NS_DEFAULT_INTERNAL; } close(fd); - return zebra_ns_id_get((char *)NS_DEFAULT_NAME); + return zebra_ns_id_get((char *)NS_DEFAULT_NAME, -1); #else /* HAVE_NETNS */ return NS_DEFAULT_INTERNAL; #endif /* !HAVE_NETNS */ diff --git a/zebra/zebra_netns_id.h b/zebra/zebra_netns_id.h index 7a5f6851f4..dd9eab18e0 100644 --- a/zebra/zebra_netns_id.h +++ b/zebra/zebra_netns_id.h @@ -24,7 +24,7 @@ extern "C" { #endif -extern ns_id_t zebra_ns_id_get(const char *netnspath); +extern ns_id_t zebra_ns_id_get(const char *netnspath, int fd); extern ns_id_t zebra_ns_id_get_default(void); #ifdef __cplusplus diff --git a/zebra/zebra_netns_notify.c b/zebra/zebra_netns_notify.c index c5d11f1837..72e4fd0055 100644 --- a/zebra/zebra_netns_notify.c +++ b/zebra/zebra_netns_notify.c @@ -72,13 +72,14 @@ static void zebra_ns_notify_create_context_from_entry_name(const char *name) char *netnspath = ns_netns_pathname(NULL, name); struct vrf *vrf; int ret; - ns_id_t ns_id, ns_id_external; + ns_id_t ns_id, ns_id_external, ns_id_relative = NS_UNKNOWN; + struct ns *default_ns; if (netnspath == NULL) return; frr_with_privs(&zserv_privs) { - ns_id = zebra_ns_id_get(netnspath); + ns_id = zebra_ns_id_get(netnspath, -1); } if (ns_id == NS_UNKNOWN) return; @@ -97,9 +98,21 @@ static void zebra_ns_notify_create_context_from_entry_name(const char *name) ns_map_nsid_with_external(ns_id, false); return; } + + default_ns = ns_get_default(); + + /* force kernel ns_id creation in that new vrf */ + frr_with_privs(&zserv_privs) { + ns_switch_to_netns(netnspath); + ns_id_relative = zebra_ns_id_get(NULL, default_ns->fd); + ns_switchback_to_initial(); + } + frr_with_privs(&zserv_privs) { ret = vrf_netns_handler_create(NULL, vrf, netnspath, - ns_id_external, ns_id); + ns_id_external, + ns_id, + ns_id_relative); } if (ret != CMD_SUCCESS) { flog_warn(EC_ZEBRA_NS_VRF_CREATION_FAILED, diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index f24552c80b..fdbeac88e1 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -1776,33 +1776,24 @@ static int nexthop_active(afi_t afi, struct route_entry *re, return 1; /* - * Check to see if we should trust the passed in information - * for UNNUMBERED interfaces as that we won't find the GW - * address in the routing table. - * This check should suffice to handle IPv4 or IPv6 routes - * sourced from EVPN routes which are installed with the - * next hop as the remote VTEP IP. + * If the nexthop has been marked as 'onlink' we just need to make + * sure the nexthop's interface is known and is operational. */ if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) { ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id); if (!ifp) { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - " %s: Onlink and interface: %u[%u] does not exist", - __func__, nexthop->ifindex, - nexthop->vrf_id); + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("nexthop %pNHv marked onlink but nhif %u doesn't exist", + nexthop, nexthop->ifindex); return 0; } - - if (if_is_operative(ifp)) - return 1; - else { - if (IS_ZEBRA_DEBUG_RIB_DETAILED) - zlog_debug( - " %s: Onlink and interface %s is not operative", - __func__, ifp->name); + if (!if_is_operative(ifp)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("nexthop %pNHv marked onlink but nhif %s is not operational", + nexthop, ifp->name); return 0; } + return 1; } if ((top->p.family == AF_INET && top->p.prefixlen == 32 diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h index 5792584d0f..785ce20b75 100644 --- a/zebra/zebra_nhg.h +++ b/zebra/zebra_nhg.h @@ -117,6 +117,10 @@ struct nhg_hash_entry { */ #define NEXTHOP_GROUP_BACKUP (1 << 5) +/* + * Track FPM installation status.. + */ +#define NEXTHOP_GROUP_FPM (1 << 6) }; /* Was this one we created, either this session or previously? */ diff --git a/zebra/zebra_ns.c b/zebra/zebra_ns.c index 4e51437337..6462daf687 100644 --- a/zebra/zebra_ns.c +++ b/zebra/zebra_ns.c @@ -153,20 +153,25 @@ static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete) /* During zebra shutdown, do partial cleanup while the async dataplane * is still running. */ -int zebra_ns_early_shutdown(struct ns *ns) +int zebra_ns_early_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))) { struct zebra_ns *zns = ns->info; if (zns == NULL) return 0; - return zebra_ns_disable_internal(zns, false); + zebra_ns_disable_internal(zns, false); + return NS_WALK_CONTINUE; } /* During zebra shutdown, do final cleanup * after all dataplane work is complete. */ -int zebra_ns_final_shutdown(struct ns *ns) +int zebra_ns_final_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))) { struct zebra_ns *zns = ns->info; @@ -175,7 +180,7 @@ int zebra_ns_final_shutdown(struct ns *ns) kernel_terminate(zns, true); - return 0; + return NS_WALK_CONTINUE; } int zebra_ns_init(const char *optional_default_name) @@ -183,12 +188,16 @@ int zebra_ns_init(const char *optional_default_name) struct ns *default_ns; ns_id_t ns_id; ns_id_t ns_id_external; + struct ns *ns; frr_with_privs(&zserv_privs) { ns_id = zebra_ns_id_get_default(); } ns_id_external = ns_map_nsid_with_external(ns_id, true); ns_init_management(ns_id_external, ns_id); + ns = ns_get_default(); + if (ns) + ns->relative_default_ns = ns_id; default_ns = ns_lookup(ns_get_default_id()); if (!default_ns) { diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h index dc79a83db0..f7d1f40782 100644 --- a/zebra/zebra_ns.h +++ b/zebra/zebra_ns.h @@ -67,9 +67,12 @@ struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id); int zebra_ns_init(const char *optional_default_name); int zebra_ns_enable(ns_id_t ns_id, void **info); int zebra_ns_disabled(struct ns *ns); -int zebra_ns_early_shutdown(struct ns *ns); -int zebra_ns_final_shutdown(struct ns *ns); - +int zebra_ns_early_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))); +int zebra_ns_final_shutdown(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))); int zebra_ns_config_write(struct vty *vty, struct ns *ns); #ifdef __cplusplus diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index d85f48e570..998c035656 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -1194,7 +1194,7 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) ifindex_t ifindex; ifindex = mac->fwd_info.local.ifindex; - zns = zebra_ns_lookup(NS_DEFAULT); + zns = zebra_ns_lookup(mac->fwd_info.local.ns_id); ifp = if_lookup_by_index_per_ns(zns, ifindex); if (!ifp) return; @@ -1276,7 +1276,7 @@ static void zvni_print_mac(zebra_mac_t *mac, void *ctxt, json_object *json) ifindex_t ifindex; ifindex = mac->fwd_info.local.ifindex; - zns = zebra_ns_lookup(NS_DEFAULT); + zns = zebra_ns_lookup(mac->fwd_info.local.ns_id); ifp = if_lookup_by_index_per_ns(zns, ifindex); if (!ifp) return; @@ -1368,7 +1368,7 @@ static void zvni_print_mac_hash(struct hash_bucket *bucket, void *ctxt) if (wctx->flags & SHOW_REMOTE_MAC_FROM_VTEP) return; - zns = zebra_ns_lookup(NS_DEFAULT); + zns = zebra_ns_lookup(mac->fwd_info.local.ns_id); ifindex = mac->fwd_info.local.ifindex; ifp = if_lookup_by_index_per_ns(zns, ifindex); if (!ifp) // unexpected @@ -2731,7 +2731,12 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni, zebra_mac_t *mac = NULL; struct zebra_if *zif = NULL; struct zebra_l2info_vxlan *vxl = NULL; + struct zebra_vrf *zvrf; + ns_id_t local_ns_id = NS_DEFAULT; + zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id); + if (zvrf && zvrf->zns) + local_ns_id = zvrf->zns->ns_id; zif = zvni->vxlan_if->info; if (!zif) return -1; @@ -2756,6 +2761,7 @@ static int zvni_gw_macip_add(struct interface *ifp, zebra_vni_t *zvni, SET_FLAG(mac->flags, ZEBRA_MAC_DEF_GW); memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); mac->fwd_info.local.ifindex = ifp->ifindex; + mac->fwd_info.local.ns_id = local_ns_id; mac->fwd_info.local.vid = vxl->access_vlan; n = zvni_neigh_lookup(zvni, ip); @@ -3522,6 +3528,68 @@ static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr) 0 /* seq */, ZEBRA_NEIGH_ACTIVE, ZEBRA_MACIP_DEL); } +struct zvni_from_svi_param { + struct interface *br_if; + struct interface *svi_if; + struct zebra_if *zif; + uint8_t bridge_vlan_aware; + vlanid_t vid; +}; + +static int zvni_map_vlan_ns(struct ns *ns, + void *_in_param, + void **_p_zvni) +{ + struct zebra_ns *zns = ns->info; + struct route_node *rn; + struct interface *br_if; + zebra_vni_t **p_zvni = (zebra_vni_t **)_p_zvni; + zebra_vni_t *zvni; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl = NULL; + struct zvni_from_svi_param *in_param = + (struct zvni_from_svi_param *)_in_param; + int found = 0; + + if (!in_param) + return NS_WALK_STOP; + br_if = in_param->br_if; + zif = in_param->zif; + assert(zif); + assert(br_if); + + /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ + /* TODO: Optimize with a hash. */ + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + tmp_if = (struct interface *)rn->info; + if (!tmp_if) + continue; + zif = tmp_if->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + if (!if_is_operative(tmp_if)) + continue; + vxl = &zif->l2info.vxl; + + if (zif->brslave_info.br_if != br_if) + continue; + + if (!in_param->bridge_vlan_aware + || vxl->access_vlan == in_param->vid) { + found = 1; + break; + } + } + if (!found) + return NS_WALK_CONTINUE; + + zvni = zvni_lookup(vxl->vni); + if (p_zvni) + *p_zvni = zvni; + return NS_WALK_STOP; +} + /* * Map port or (port, VLAN) to a VNI. This is invoked upon getting MAC * notifications, to see if they are of interest. @@ -3529,25 +3597,51 @@ static int zvni_mac_send_del_to_client(vni_t vni, struct ethaddr *macaddr) static zebra_vni_t *zvni_map_vlan(struct interface *ifp, struct interface *br_if, vlanid_t vid) { - struct zebra_ns *zns; - struct route_node *rn; - struct interface *tmp_if = NULL; struct zebra_if *zif; struct zebra_l2info_bridge *br; - struct zebra_l2info_vxlan *vxl = NULL; - uint8_t bridge_vlan_aware; - zebra_vni_t *zvni; - int found = 0; + zebra_vni_t **p_zvni; + zebra_vni_t *zvni = NULL; + struct zvni_from_svi_param in_param; /* Determine if bridge is VLAN-aware or not */ zif = br_if->info; assert(zif); br = &zif->l2info.br; - bridge_vlan_aware = br->vlan_aware; + in_param.bridge_vlan_aware = br->vlan_aware; + in_param.vid = vid; + in_param.br_if = br_if; + in_param.zif = zif; + p_zvni = &zvni; + + ns_walk_func(zvni_map_vlan_ns, + (void *)&in_param, + (void **)p_zvni); + return zvni; +} + +static int zvni_from_svi_ns(struct ns *ns, + void *_in_param, + void **_p_zvni) +{ + struct zebra_ns *zns = ns->info; + struct route_node *rn; + struct interface *br_if; + zebra_vni_t **p_zvni = (zebra_vni_t **)_p_zvni; + zebra_vni_t *zvni; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl = NULL; + struct zvni_from_svi_param *in_param = + (struct zvni_from_svi_param *)_in_param; + int found = 0; + + if (!in_param) + return NS_WALK_STOP; + br_if = in_param->br_if; + zif = in_param->zif; + assert(zif); - /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ /* TODO: Optimize with a hash. */ - zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { tmp_if = (struct interface *)rn->info; if (!tmp_if) @@ -3562,17 +3656,20 @@ static zebra_vni_t *zvni_map_vlan(struct interface *ifp, if (zif->brslave_info.br_if != br_if) continue; - if (!bridge_vlan_aware || vxl->access_vlan == vid) { + if (!in_param->bridge_vlan_aware + || vxl->access_vlan == !in_param->vid) { found = 1; break; } } if (!found) - return NULL; + return NS_WALK_CONTINUE; zvni = zvni_lookup(vxl->vni); - return zvni; + if (p_zvni) + *p_zvni = zvni; + return NS_WALK_STOP; } /* @@ -3582,16 +3679,11 @@ static zebra_vni_t *zvni_map_vlan(struct interface *ifp, static zebra_vni_t *zvni_from_svi(struct interface *ifp, struct interface *br_if) { - struct zebra_ns *zns; - struct route_node *rn; - struct interface *tmp_if = NULL; - struct zebra_if *zif; struct zebra_l2info_bridge *br; - struct zebra_l2info_vxlan *vxl = NULL; - uint8_t bridge_vlan_aware; - vlanid_t vid = 0; - zebra_vni_t *zvni; - int found = 0; + zebra_vni_t *zvni = NULL; + zebra_vni_t **p_zvni; + struct zebra_if *zif; + struct zvni_from_svi_param in_param; if (!br_if) return NULL; @@ -3604,8 +3696,10 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp, zif = br_if->info; assert(zif); br = &zif->l2info.br; - bridge_vlan_aware = br->vlan_aware; - if (bridge_vlan_aware) { + in_param.bridge_vlan_aware = br->vlan_aware; + in_param.vid = 0; + + if (in_param.bridge_vlan_aware) { struct zebra_l2info_vlan *vl; if (!IS_ZEBRA_IF_VLAN(ifp)) @@ -3614,37 +3708,54 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp, zif = ifp->info; assert(zif); vl = &zif->l2info.vl; - vid = vl->vid; + in_param.vid = vl->vid; } + in_param.br_if = br_if; + in_param.zif = zif; + p_zvni = &zvni; /* See if this interface (or interface plus VLAN Id) maps to a VxLAN */ + ns_walk_func(zvni_from_svi_ns, + (void *)&in_param, + (void **)p_zvni); + return zvni; +} + +static int zvni_map_to_svi_ns(struct ns *ns, + void *_in_param, + void **_p_ifp) +{ + struct zebra_ns *zns = ns->info; + struct route_node *rn; + struct zvni_from_svi_param *in_param = + (struct zvni_from_svi_param *)_in_param; + struct zebra_l2info_vlan *vl; + struct interface *tmp_if = NULL; + struct interface **p_ifp = (struct interface **)_p_ifp; + struct zebra_if *zif; + + if (!in_param) + return NS_WALK_STOP; + /* TODO: Optimize with a hash. */ - zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { tmp_if = (struct interface *)rn->info; - if (!tmp_if) + /* Check oper status of the SVI. */ + if (!tmp_if || !if_is_operative(tmp_if)) continue; zif = tmp_if->info; - if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) - continue; - if (!if_is_operative(tmp_if)) - continue; - vxl = &zif->l2info.vxl; - - if (zif->brslave_info.br_if != br_if) + if (!zif || zif->zif_type != ZEBRA_IF_VLAN + || zif->link != in_param->br_if) continue; + vl = (struct zebra_l2info_vlan *)&zif->l2info.vl; - if (!bridge_vlan_aware || vxl->access_vlan == vid) { - found = 1; - break; + if (vl->vid == in_param->vid) { + if (p_ifp) + *p_ifp = tmp_if; + return NS_WALK_STOP; } } - - if (!found) - return NULL; - - zvni = zvni_lookup(vxl->vni); - return zvni; + return NS_WALK_CONTINUE; } /* Map to SVI on bridge corresponding to specified VLAN. This can be one @@ -3656,15 +3767,11 @@ static zebra_vni_t *zvni_from_svi(struct interface *ifp, */ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) { - struct zebra_ns *zns; - struct route_node *rn; struct interface *tmp_if = NULL; struct zebra_if *zif; struct zebra_l2info_bridge *br; - struct zebra_l2info_vlan *vl; - uint8_t bridge_vlan_aware; - int found = 0; - + struct zvni_from_svi_param in_param; + struct interface **p_ifp; /* Defensive check, caller expected to invoke only with valid bridge. */ if (!br_if) return NULL; @@ -3673,33 +3780,56 @@ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) zif = br_if->info; assert(zif); br = &zif->l2info.br; - bridge_vlan_aware = br->vlan_aware; - + in_param.bridge_vlan_aware = br->vlan_aware; /* Check oper status of the SVI. */ - if (!bridge_vlan_aware) + if (!in_param.bridge_vlan_aware) return if_is_operative(br_if) ? br_if : NULL; + in_param.vid = vid; + in_param.br_if = br_if; + in_param.zif = NULL; + p_ifp = &tmp_if; + /* Identify corresponding VLAN interface. */ + ns_walk_func(zvni_map_to_svi_ns, + (void *)&in_param, + (void **)p_ifp); + return tmp_if; +} + +static int zvni_map_to_macvlan_ns(struct ns *ns, + void *_in_param, + void **_p_ifp) +{ + struct zebra_ns *zns = ns->info; + struct zvni_from_svi_param *in_param = + (struct zvni_from_svi_param *)_in_param; + struct interface **p_ifp = (struct interface **)_p_ifp; + struct route_node *rn; + struct interface *tmp_if = NULL; + struct zebra_if *zif; + + if (!in_param) + return NS_WALK_STOP; + /* Identify corresponding VLAN interface. */ - /* TODO: Optimize with a hash. */ - zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { tmp_if = (struct interface *)rn->info; /* Check oper status of the SVI. */ if (!tmp_if || !if_is_operative(tmp_if)) continue; zif = tmp_if->info; - if (!zif || zif->zif_type != ZEBRA_IF_VLAN - || zif->link != br_if) + + if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN) continue; - vl = &zif->l2info.vl; - if (vl->vid == vid) { - found = 1; - break; + if (zif->link == in_param->svi_if) { + if (p_ifp) + *p_ifp = tmp_if; + return NS_WALK_STOP; } } - return found ? tmp_if : NULL; + return NS_WALK_CONTINUE; } /* Map to MAC-VLAN interface corresponding to specified SVI interface. @@ -3707,11 +3837,10 @@ static struct interface *zvni_map_to_svi(vlanid_t vid, struct interface *br_if) static struct interface *zvni_map_to_macvlan(struct interface *br_if, struct interface *svi_if) { - struct zebra_ns *zns; - struct route_node *rn; struct interface *tmp_if = NULL; struct zebra_if *zif; - int found = 0; + struct interface **p_ifp; + struct zvni_from_svi_param in_param; /* Defensive check, caller expected to invoke only with valid bridge. */ if (!br_if) @@ -3726,28 +3855,19 @@ static struct interface *zvni_map_to_macvlan(struct interface *br_if, zif = br_if->info; assert(zif); - /* Identify corresponding VLAN interface. */ - zns = zebra_ns_lookup(NS_DEFAULT); - for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { - tmp_if = (struct interface *)rn->info; - /* Check oper status of the SVI. */ - if (!tmp_if || !if_is_operative(tmp_if)) - continue; - zif = tmp_if->info; - - if (!zif || zif->zif_type != ZEBRA_IF_MACVLAN) - continue; - - if (zif->link == svi_if) { - found = 1; - break; - } - } + in_param.vid = 0; + in_param.br_if = br_if; + in_param.zif = NULL; + in_param.svi_if = svi_if; + p_ifp = &tmp_if; - return found ? tmp_if : NULL; + /* Identify corresponding VLAN interface. */ + ns_walk_func(zvni_map_to_macvlan_ns, + (void *)&in_param, + (void **)p_ifp); + return tmp_if; } - /* * Install remote MAC into the forwarding plane. */ @@ -3900,6 +4020,7 @@ static void zvni_deref_ip2mac(zebra_vni_t *zvni, zebra_mac_t *mac) static void zvni_read_mac_neigh(zebra_vni_t *zvni, struct interface *ifp) { struct zebra_ns *zns; + struct zebra_vrf *zvrf; struct zebra_if *zif; struct interface *vlan_if; struct zebra_l2info_vxlan *vxl; @@ -3907,7 +4028,10 @@ static void zvni_read_mac_neigh(zebra_vni_t *zvni, struct interface *ifp) zif = ifp->info; vxl = &zif->l2info.vxl; - zns = zebra_ns_lookup(NS_DEFAULT); + zvrf = zebra_vrf_lookup_by_id(zvni->vrf_id); + if (!zvrf || !zvrf->zns) + return; + zns = zvrf->zns; if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( @@ -4115,18 +4239,21 @@ static int zvni_send_del_to_client(vni_t vni) return zserv_send_message(client, s); } -/* - * Build the VNI hash table by going over the VxLAN interfaces. This - * is called when EVPN (advertise-all-vni) is enabled. - */ -static void zvni_build_hash_table(void) +static int zvni_build_hash_table_ns(struct ns *ns, + void *param_in __attribute__((unused)), + void **param_out __attribute__((unused))) { - struct zebra_ns *zns; + struct zebra_ns *zns = ns->info; struct route_node *rn; struct interface *ifp; + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + + if (!zvrf) + return NS_WALK_STOP; /* Walk VxLAN interfaces and create VNI hash. */ - zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { vni_t vni; zebra_vni_t *zvni = NULL; @@ -4143,7 +4270,15 @@ static void zvni_build_hash_table(void) vxl = &zif->l2info.vxl; vni = vxl->vni; - + /* link of VXLAN interface should be in zebra_evpn_vrf */ + if (zvrf->zns->ns_id != vxl->link_nsid) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Intf %s(%u) VNI %u, link not in same " + "namespace than BGP EVPN core instance ", + ifp->name, ifp->ifindex, vni); + continue; + } /* L3-VNI and L2-VNI are handled seperately */ zl3vni = zl3vni_lookup(vni); if (zl3vni) { @@ -4212,7 +4347,7 @@ static void zvni_build_hash_table(void) zlog_debug( "Failed to add VNI hash, IF %s(%u) L2-VNI %u", ifp->name, ifp->ifindex, vni); - return; + return NS_WALK_CONTINUE; } if (zvni->local_vtep_ip.s_addr != @@ -4249,6 +4384,19 @@ static void zvni_build_hash_table(void) } } } + return NS_WALK_CONTINUE; +} + +/* + * Build the VNI hash table by going over the VxLAN interfaces. This + * is called when EVPN (advertise-all-vni) is enabled. + */ + +static void zvni_build_hash_table(void) +{ + ns_walk_func(zvni_build_hash_table_ns, + (void *)NULL, + (void **)NULL); } /* @@ -5033,14 +5181,22 @@ static int zl3vni_del(zebra_l3vni_t *zl3vni) return 0; } -struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) +static int zl3vni_map_to_vxlan_if_ns(struct ns *ns, + void *_zl3vni, + void **_pifp) { - struct zebra_ns *zns = NULL; + struct zebra_ns *zns = ns->info; + zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)_zl3vni; struct route_node *rn = NULL; struct interface *ifp = NULL; + struct zebra_vrf *zvrf; + + zvrf = zebra_vrf_get_evpn(); + + if (!zvrf) + return NS_WALK_STOP; /* loop through all vxlan-interface */ - zns = zebra_ns_lookup(NS_DEFAULT); for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { struct zebra_if *zif = NULL; @@ -5055,13 +5211,39 @@ struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) continue; vxl = &zif->l2info.vxl; - if (vxl->vni == zl3vni->vni) { - zl3vni->local_vtep_ip = vxl->vtep_ip; - return ifp; + if (vxl->vni != zl3vni->vni) + continue; + + /* link of VXLAN interface should be in zebra_evpn_vrf */ + if (zvrf->zns->ns_id != vxl->link_nsid) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug( + "Intf %s(%u) VNI %u, link not in same " + "namespace than BGP EVPN core instance ", + ifp->name, ifp->ifindex, vxl->vni); + continue; } + + + zl3vni->local_vtep_ip = vxl->vtep_ip; + if (_pifp) + *_pifp = (void *)ifp; + return NS_WALK_STOP; } - return NULL; + return NS_WALK_CONTINUE; +} + +struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) +{ + struct interface **p_ifp; + struct interface *ifp = NULL; + + p_ifp = &ifp; + + ns_walk_func(zl3vni_map_to_vxlan_if_ns, + (void *)zl3vni, (void **)p_ifp); + return ifp; } struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni) @@ -5537,7 +5719,7 @@ static void process_remote_macip_add(vni_t vni, return; } - zvrf = vrf_info_lookup(zvni->vxlan_if->vrf_id); + zvrf = zebra_vrf_get_evpn(); if (!zvrf) return; @@ -8018,6 +8200,11 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, bool upd_neigh = false; bool is_dup_detect = false; struct in_addr vtep_ip = {.s_addr = 0}; + ns_id_t local_ns_id = NS_DEFAULT; + + zvrf = zebra_vrf_lookup_by_id(ifp->vrf_id); + if (zvrf && zvrf->zns) + local_ns_id = zvrf->zns->ns_id; /* We are interested in MACs only on ports or (port, VLAN) that * map to a VNI. @@ -8041,11 +8228,10 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, return -1; } - zvrf = vrf_info_lookup(zvni->vxlan_if->vrf_id); + zvrf = zebra_vrf_get_evpn(); if (!zvrf) { if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug(" No Vrf found for vrf_id: %d", - zvni->vxlan_if->vrf_id); + zlog_debug(" No Evpn Global Vrf found"); return -1; } @@ -8070,6 +8256,7 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, } SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); mac->fwd_info.local.ifindex = ifp->ifindex; + mac->fwd_info.local.ns_id = local_ns_id; mac->fwd_info.local.vid = vid; if (sticky) SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); @@ -8094,6 +8281,7 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, */ if (mac_sticky == sticky && mac->fwd_info.local.ifindex == ifp->ifindex + && mac->fwd_info.local.ns_id == local_ns_id && mac->fwd_info.local.vid == vid) { if (IS_ZEBRA_DEBUG_VXLAN) zlog_debug( @@ -8118,6 +8306,7 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); mac->fwd_info.local.ifindex = ifp->ifindex; + mac->fwd_info.local.ns_id = local_ns_id; mac->fwd_info.local.vid = vid; } else if (CHECK_FLAG(mac->flags, ZEBRA_MAC_REMOTE) || @@ -8155,6 +8344,7 @@ int zebra_vxlan_local_mac_add_update(struct interface *ifp, SET_FLAG(mac->flags, ZEBRA_MAC_LOCAL); memset(&mac->fwd_info, 0, sizeof(mac->fwd_info)); mac->fwd_info.local.ifindex = ifp->ifindex; + mac->fwd_info.local.ns_id = local_ns_id; mac->fwd_info.local.vid = vid; if (sticky) SET_FLAG(mac->flags, ZEBRA_MAC_STICKY); @@ -9620,6 +9810,25 @@ stream_failure: return; } +static int macfdb_read_ns(struct ns *ns, + void *_in_param __attribute__((unused)), + void **out_param __attribute__((unused))) +{ + struct zebra_ns *zns = ns->info; + + macfdb_read(zns); + return NS_WALK_CONTINUE; +} + +static int neigh_read_ns(struct ns *ns, + void *_in_param __attribute__((unused)), + void **out_param __attribute__((unused))) +{ + struct zebra_ns *zns = ns->info; + + neigh_read(zns); + return NS_WALK_CONTINUE; +} /* * Handle message from client to learn (or stop learning) about VNIs and MACs. @@ -9669,10 +9878,10 @@ void zebra_vxlan_advertise_all_vni(ZAPI_HANDLER_ARGS) NULL); /* Read the MAC FDB */ - macfdb_read(zvrf->zns); + ns_walk_func(macfdb_read_ns, NULL, NULL); /* Read neighbors */ - neigh_read(zvrf->zns); + ns_walk_func(neigh_read_ns, NULL, NULL); } else { /* Cleanup VTEPs for all VNIs - uninstall from * kernel and free entries. diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h index 0a46fb2075..d2b02df2ad 100644 --- a/zebra/zebra_vxlan_private.h +++ b/zebra/zebra_vxlan_private.h @@ -310,6 +310,7 @@ struct zebra_mac_t_ { union { struct { ifindex_t ifindex; + ns_id_t ns_id; vlanid_t vid; } local; |
