diff options
57 files changed, 3378 insertions, 524 deletions
diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am index a2880b7b94..8a410adca1 100644 --- a/bgpd/Makefile.am +++ b/bgpd/Makefile.am @@ -87,7 +87,7 @@ libbgp_a_SOURCES = \ bgp_encap_tlv.c $(BGP_VNC_RFAPI_SRC) bgp_attr_evpn.c \ bgp_evpn.c bgp_evpn_vty.c bgp_vpn.c bgp_label.c bgp_rd.c \ bgp_keepalives.c bgp_io.c bgp_flowspec.c bgp_flowspec_util.c \ - bgp_flowspec_vty.c bgp_labelpool.c + bgp_flowspec_vty.c bgp_labelpool.c bgp_pbr.c noinst_HEADERS = \ bgp_memory.h \ @@ -101,7 +101,7 @@ noinst_HEADERS = \ $(BGP_VNC_RFAPI_HD) bgp_attr_evpn.h bgp_evpn.h bgp_evpn_vty.h \ bgp_vpn.h bgp_label.h bgp_rd.h bgp_evpn_private.h bgp_keepalives.h \ bgp_io.h bgp_flowspec.h bgp_flowspec_private.h bgp_flowspec_util.h \ - bgp_labelpool.h + bgp_labelpool.h bgp_pbr.h bgpd_SOURCES = bgp_main.c bgpd_LDADD = libbgp.a $(BGP_VNC_RFP_LIB) ../lib/libfrr.la @LIBCAP@ @LIBM@ diff --git a/bgpd/bgp_attr_evpn.c b/bgpd/bgp_attr_evpn.c index d2a61b93fe..14ff01ada5 100644 --- a/bgpd/bgp_attr_evpn.c +++ b/bgpd/bgp_attr_evpn.c @@ -227,16 +227,18 @@ extern int bgp_build_evpn_prefix(int evpn_type, uint32_t eth_tag, dst->family = AF_EVPN; p_evpn_p->route_type = evpn_type; if (evpn_type == BGP_EVPN_IP_PREFIX_ROUTE) { - p_evpn_p->eth_tag = eth_tag; - p_evpn_p->ip_prefix_length = p2.prefixlen; + p_evpn_p->prefix_addr.eth_tag = eth_tag; + p_evpn_p->prefix_addr.ip_prefix_length = p2.prefixlen; if (src->family == AF_INET) { - SET_IPADDR_V4(&p_evpn_p->ip); - memcpy(&p_evpn_p->ip.ipaddr_v4, &src->u.prefix4, + SET_IPADDR_V4(&p_evpn_p->prefix_addr.ip); + memcpy(&p_evpn_p->prefix_addr.ip.ipaddr_v4, + &src->u.prefix4, sizeof(struct in_addr)); dst->prefixlen = (uint8_t)PREFIX_LEN_ROUTE_TYPE_5_IPV4; } else { - SET_IPADDR_V6(&p_evpn_p->ip); - memcpy(&p_evpn_p->ip.ipaddr_v6, &src->u.prefix6, + SET_IPADDR_V6(&p_evpn_p->prefix_addr.ip); + memcpy(&p_evpn_p->prefix_addr.ip.ipaddr_v6, + &src->u.prefix6, sizeof(struct in6_addr)); dst->prefixlen = (uint8_t)PREFIX_LEN_ROUTE_TYPE_5_IPV6; } diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c index 29ac5f520d..3e3fbcbfe8 100644 --- a/bgpd/bgp_debug.c +++ b/bgpd/bgp_debug.c @@ -59,6 +59,7 @@ unsigned long conf_bgp_debug_update_groups; unsigned long conf_bgp_debug_vpn; unsigned long conf_bgp_debug_flowspec; unsigned long conf_bgp_debug_labelpool; +unsigned long conf_bgp_debug_pbr; unsigned long term_bgp_debug_as4; unsigned long term_bgp_debug_neighbor_events; @@ -75,6 +76,7 @@ unsigned long term_bgp_debug_update_groups; unsigned long term_bgp_debug_vpn; unsigned long term_bgp_debug_flowspec; unsigned long term_bgp_debug_labelpool; +unsigned long term_bgp_debug_pbr; struct list *bgp_debug_neighbor_events_peers = NULL; struct list *bgp_debug_keepalive_peers = NULL; @@ -1653,7 +1655,40 @@ DEFUN (no_debug_bgp_vpn, if (vty->node != CONFIG_NODE) vty_out(vty, "disabled debug bgp vpn %s\n", argv[idx]->text); + return CMD_SUCCESS; +} +/* debug bgp pbr */ +DEFUN (debug_bgp_pbr, + debug_bgp_pbr_cmd, + "debug bgp pbr", + DEBUG_STR + BGP_STR + "BGP policy based routing\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_ON(pbr, PBR); + else { + TERM_DEBUG_ON(pbr, PBR); + vty_out(vty, "BGP policy based routing is on\n"); + } + return CMD_SUCCESS; +} + +DEFUN (no_debug_bgp_pbr, + no_debug_bgp_pbr_cmd, + "no debug bgp pbr", + NO_STR + DEBUG_STR + BGP_STR + "BGP policy based routing\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_OFF(pbr, PBR); + else { + TERM_DEBUG_OFF(pbr, PBR); + vty_out(vty, "BGP policy based routing is off\n"); + } return CMD_SUCCESS; } @@ -1733,6 +1768,7 @@ DEFUN (no_debug_bgp, TERM_DEBUG_OFF(vpn, VPN_LEAK_LABEL); TERM_DEBUG_OFF(flowspec, FLOWSPEC); TERM_DEBUG_OFF(labelpool, LABELPOOL); + TERM_DEBUG_OFF(pbr, PBR); vty_out(vty, "All possible debugging has been turned off\n"); return CMD_SUCCESS; @@ -1808,6 +1844,9 @@ DEFUN_NOSH (show_debugging_bgp, if (BGP_DEBUG(labelpool, LABELPOOL)) vty_out(vty, " BGP labelpool debugging is on\n"); + if (BGP_DEBUG(pbr, PBR)) + vty_out(vty, " BGP policy based routing debugging is on\n"); + vty_out(vty, "\n"); return CMD_SUCCESS; } @@ -1865,6 +1904,9 @@ int bgp_debug_count(void) if (BGP_DEBUG(labelpool, LABELPOOL)) ret++; + if (BGP_DEBUG(pbr, PBR)) + ret++; + return ret; } @@ -1966,6 +2008,10 @@ static int bgp_config_write_debug(struct vty *vty) write++; } + if (CONF_BGP_DEBUG(pbr, PBR)) { + vty_out(vty, "debug bgp pbr\n"); + write++; + } return write; } @@ -2069,6 +2115,13 @@ void bgp_debug_init(void) install_element(CONFIG_NODE, &debug_bgp_labelpool_cmd); install_element(ENABLE_NODE, &no_debug_bgp_labelpool_cmd); install_element(CONFIG_NODE, &no_debug_bgp_labelpool_cmd); + + /* debug bgp pbr */ + install_element(ENABLE_NODE, &debug_bgp_pbr_cmd); + install_element(CONFIG_NODE, &debug_bgp_pbr_cmd); + install_element(ENABLE_NODE, &no_debug_bgp_pbr_cmd); + install_element(CONFIG_NODE, &no_debug_bgp_pbr_cmd); + } /* Return true if this prefix is on the per_prefix_list of prefixes to debug diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h index ad476ee918..d5d8fbb505 100644 --- a/bgpd/bgp_debug.h +++ b/bgpd/bgp_debug.h @@ -75,6 +75,7 @@ extern unsigned long conf_bgp_debug_update_groups; extern unsigned long conf_bgp_debug_vpn; extern unsigned long conf_bgp_debug_flowspec; extern unsigned long conf_bgp_debug_labelpool; +extern unsigned long conf_bgp_debug_pbr; extern unsigned long term_bgp_debug_as4; extern unsigned long term_bgp_debug_neighbor_events; @@ -89,6 +90,7 @@ extern unsigned long term_bgp_debug_update_groups; extern unsigned long term_bgp_debug_vpn; extern unsigned long term_bgp_debug_flowspec; extern unsigned long term_bgp_debug_labelpool; +extern unsigned long term_bgp_debug_pbr; extern struct list *bgp_debug_neighbor_events_peers; extern struct list *bgp_debug_keepalive_peers; @@ -123,6 +125,8 @@ struct bgp_debug_filter { #define BGP_DEBUG_VPN_LEAK_LABEL 0x08 #define BGP_DEBUG_FLOWSPEC 0x01 #define BGP_DEBUG_LABELPOOL 0x01 +#define BGP_DEBUG_PBR 0x01 +#define BGP_DEBUG_PBR_ERROR 0x02 #define BGP_DEBUG_PACKET_SEND 0x01 #define BGP_DEBUG_PACKET_SEND_DETAIL 0x02 diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c index 8eb0222a1b..85b9ffd8ca 100644 --- a/bgpd/bgp_ecommunity.c +++ b/bgpd/bgp_ecommunity.c @@ -34,6 +34,7 @@ #include "bgpd/bgp_lcommunity.h" #include "bgpd/bgp_aspath.h" #include "bgpd/bgp_flowspec_private.h" +#include "bgpd/bgp_pbr.h" /* struct used to dump the rate contained in FS set traffic-rate EC */ union traffic_rate { @@ -931,3 +932,52 @@ int ecommunity_del_val(struct ecommunity *ecom, struct ecommunity_val *eval) ecom->val = p; return 1; } + +int ecommunity_fill_pbr_action(struct ecommunity_val *ecom_eval, + struct bgp_pbr_entry_action *api) +{ + if (ecom_eval->val[1] == ECOMMUNITY_TRAFFIC_RATE) { + api->action = ACTION_TRAFFICRATE; + api->u.r.rate_info[3] = ecom_eval->val[4]; + api->u.r.rate_info[2] = ecom_eval->val[5]; + api->u.r.rate_info[1] = ecom_eval->val[6]; + api->u.r.rate_info[0] = ecom_eval->val[7]; + } else if (ecom_eval->val[1] == ECOMMUNITY_TRAFFIC_ACTION) { + api->action = ACTION_TRAFFIC_ACTION; + /* else distribute code is set by default */ + if (ecom_eval->val[5] & (1 << FLOWSPEC_TRAFFIC_ACTION_TERMINAL)) + api->u.za.filter |= TRAFFIC_ACTION_TERMINATE; + else + api->u.za.filter |= TRAFFIC_ACTION_DISTRIBUTE; + if (ecom_eval->val[5] == 1 << FLOWSPEC_TRAFFIC_ACTION_SAMPLE) + api->u.za.filter |= TRAFFIC_ACTION_SAMPLE; + + } else if (ecom_eval->val[1] == ECOMMUNITY_TRAFFIC_MARKING) { + api->action = ACTION_MARKING; + api->u.marking_dscp = ecom_eval->val[7]; + } else if (ecom_eval->val[1] == ECOMMUNITY_REDIRECT_VRF) { + /* must use external function */ + return 0; + } else if (ecom_eval->val[1] == ECOMMUNITY_REDIRECT_IP_NH) { + /* see draft-ietf-idr-flowspec-redirect-ip-02 + * Q1: how come a ext. community can host ipv6 address + * Q2 : from cisco documentation: + * Announces the reachability of one or more flowspec NLRI. + * When a BGP speaker receives an UPDATE message with the + * redirect-to-IP extended community, it is expected to + * create a traffic filtering rule for every flow-spec + * NLRI in the message that has this path as its best + * path. The filter entry matches the IP packets + * described in the NLRI field and redirects them or + * copies them towards the IPv4 or IPv6 address specified + * in the 'Network Address of Next- Hop' + * field of the associated MP_REACH_NLRI. + */ + struct ecommunity_ip *ip_ecom = (struct ecommunity_ip *) + ecom_eval + 2; + + api->u.zr.redirect_ip_v4 = ip_ecom->ip; + } else + return -1; + return 0; +} diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h index 3aeb458dc6..88bdb5e2ae 100644 --- a/bgpd/bgp_ecommunity.h +++ b/bgpd/bgp_ecommunity.h @@ -172,4 +172,8 @@ extern int ecommunity_strip(struct ecommunity *ecom, uint8_t type, extern struct ecommunity *ecommunity_new(void); extern int ecommunity_del_val(struct ecommunity *ecom, struct ecommunity_val *eval); +struct bgp_pbr_entry_action; +extern int ecommunity_fill_pbr_action(struct ecommunity_val *ecom_eval, + struct bgp_pbr_entry_action *api); + #endif /* _QUAGGA_BGP_ECOMMUNITY_H */ diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 49808e7cdd..ad45da84eb 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -510,15 +510,15 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, s, add ? ZEBRA_REMOTE_MACIP_ADD : ZEBRA_REMOTE_MACIP_DEL, bgp->vrf_id); stream_putl(s, vpn->vni); - stream_put(s, &p->prefix.mac.octet, ETH_ALEN); /* Mac Addr */ + stream_put(s, &p->prefix.macip_addr.mac.octet, ETH_ALEN); /* Mac Addr */ /* IP address length and IP address, if any. */ - if (IS_EVPN_PREFIX_IPADDR_NONE(p)) + if (is_evpn_prefix_ipaddr_none(p)) stream_putl(s, 0); else { - ipa_len = IS_EVPN_PREFIX_IPADDR_V4(p) ? IPV4_MAX_BYTELEN + ipa_len = is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN; stream_putl(s, ipa_len); - stream_put(s, &p->prefix.ip.ip.addr, ipa_len); + stream_put(s, &p->prefix.macip_addr.ip.ip.addr, ipa_len); } stream_put_in_addr(s, &remote_vtep_ip); @@ -532,8 +532,10 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, zlog_debug( "Tx %s MACIP, VNI %u MAC %s IP %s (flags: 0x%x) remote VTEP %s", add ? "ADD" : "DEL", vpn->vni, - prefix_mac2str(&p->prefix.mac, buf1, sizeof(buf1)), - ipaddr2str(&p->prefix.ip, buf3, sizeof(buf3)), flags, + prefix_mac2str(&p->prefix.macip_addr.mac, + buf1, sizeof(buf1)), + ipaddr2str(&p->prefix.macip_addr.ip, + buf3, sizeof(buf3)), flags, inet_ntop(AF_INET, &remote_vtep_ip, buf2, sizeof(buf2))); @@ -563,9 +565,9 @@ static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn, s, add ? ZEBRA_REMOTE_VTEP_ADD : ZEBRA_REMOTE_VTEP_DEL, bgp->vrf_id); stream_putl(s, vpn->vni); - if (IS_EVPN_PREFIX_IPADDR_V4(p)) - stream_put_in_addr(s, &p->prefix.ip.ipaddr_v4); - else if (IS_EVPN_PREFIX_IPADDR_V6(p)) { + if (is_evpn_prefix_ipaddr_v4(p)) + stream_put_in_addr(s, &p->prefix.imet_addr.ip.ipaddr_v4); + else if (is_evpn_prefix_ipaddr_v6(p)) { zlog_err( "Bad remote IP when trying to %s remote VTEP for VNI %u", add ? "ADD" : "DEL", vpn->vni); @@ -577,7 +579,7 @@ static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn, if (bgp_debug_zebra(NULL)) zlog_debug("Tx %s Remote VTEP, VNI %u remote VTEP %s", add ? "ADD" : "DEL", vpn->vni, - inet_ntoa(p->prefix.ip.ipaddr_v4)); + inet_ntoa(p->prefix.imet_addr.ip.ipaddr_v4)); return zclient_send_message(zclient); } @@ -1293,8 +1295,8 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, * these routes. */ if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE && - (IS_EVPN_PREFIX_IPADDR_V4(p) || - !IN6_IS_ADDR_LINKLOCAL(&p->prefix.ip.ipaddr_v6)) && + (is_evpn_prefix_ipaddr_v4(p) || + !IN6_IS_ADDR_LINKLOCAL(&p->prefix.macip_addr.ip.ipaddr_v6)) && CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS)) add_l3_ecomm = 1; @@ -1539,8 +1541,8 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) continue; - if (IS_EVPN_PREFIX_IPADDR_V6(evp) && - IN6_IS_ADDR_LINKLOCAL(&evp->prefix.ip.ipaddr_v6)) + if (is_evpn_prefix_ipaddr_v6(evp) && + IN6_IS_ADDR_LINKLOCAL(&evp->prefix.macip_addr.ip.ipaddr_v6)) update_evpn_route_entry(bgp, vpn, afi, safi, rn, &attr_ip6_ll, 0, 1, &ri, 0); else { @@ -1792,10 +1794,7 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, char buf1[PREFIX_STRLEN]; memset(pp, 0, sizeof(struct prefix)); - if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) - ip_prefix_from_type2_prefix(evp, pp); - else if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) - ip_prefix_from_type5_prefix(evp, pp); + ip_prefix_from_evpn_prefix(evp, pp); if (bgp_debug_zebra(NULL)) { zlog_debug( @@ -1807,11 +1806,11 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, /* Create (or fetch) route within the VRF. */ /* NOTE: There is no RD here. */ - if (IS_EVPN_PREFIX_IPADDR_V4(evp)) { + if (is_evpn_prefix_ipaddr_v4(evp)) { afi = AFI_IP; safi = SAFI_UNICAST; rn = bgp_node_get(bgp_vrf->rib[afi][safi], pp); - } else if (IS_EVPN_PREFIX_IPADDR_V6(evp)) { + } else if (is_evpn_prefix_ipaddr_v6(evp)) { afi = AFI_IP6; safi = SAFI_UNICAST; rn = bgp_node_get(bgp_vrf->rib[afi][safi], pp); @@ -1970,10 +1969,7 @@ static int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, char buf1[PREFIX_STRLEN]; memset(pp, 0, sizeof(struct prefix)); - if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) - ip_prefix_from_type2_prefix(evp, pp); - else if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) - ip_prefix_from_type5_prefix(evp, pp); + ip_prefix_from_evpn_prefix(evp, pp); if (bgp_debug_zebra(NULL)) { zlog_debug( @@ -1985,7 +1981,7 @@ static int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, /* Locate route within the VRF. */ /* NOTE: There is no RD here. */ - if (IS_EVPN_PREFIX_IPADDR_V4(evp)) { + if (is_evpn_prefix_ipaddr_v4(evp)) { afi = AFI_IP; safi = SAFI_UNICAST; rn = bgp_node_lookup(bgp_vrf->rib[afi][safi], pp); @@ -2232,8 +2228,8 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, int install) continue; /* if not a mac+ip route skip this route */ - if (!(IS_EVPN_PREFIX_IPADDR_V4(evp) - || IS_EVPN_PREFIX_IPADDR_V6(evp))) + if (!(is_evpn_prefix_ipaddr_v4(evp) + || is_evpn_prefix_ipaddr_v6(evp))) continue; for (ri = rn->info; ri; ri = ri->next) { @@ -2423,8 +2419,8 @@ static int install_uninstall_route_in_vrfs(struct bgp *bgp_def, afi_t afi, /* if it is type-2 route and not a mac+ip route skip this route */ if ((evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) - && !(IS_EVPN_PREFIX_IPADDR_V4(evp) - || IS_EVPN_PREFIX_IPADDR_V6(evp))) + && !(is_evpn_prefix_ipaddr_v4(evp) + || is_evpn_prefix_ipaddr_v6(evp))) return 0; for (ALL_LIST_ELEMENTS(vrfs, node, nnode, bgp_vrf)) { @@ -2850,7 +2846,7 @@ static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi, /* Copy Ethernet Tag */ memcpy(ð_tag, pfx, 4); - p.prefix.eth_tag = ntohl(eth_tag); + p.prefix.macip_addr.eth_tag = ntohl(eth_tag); pfx += 4; /* Get the MAC Addr len */ @@ -2858,7 +2854,7 @@ static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi, /* Get the MAC Addr */ if (macaddr_len == (ETH_ALEN * 8)) { - memcpy(&p.prefix.mac.octet, pfx, ETH_ALEN); + memcpy(&p.prefix.macip_addr.mac.octet, pfx, ETH_ALEN); pfx += ETH_ALEN; } else { zlog_err( @@ -2880,10 +2876,10 @@ static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi, if (ipaddr_len) { ipaddr_len /= 8; /* Convert to bytes. */ - p.prefix.ip.ipa_type = (ipaddr_len == IPV4_MAX_BYTELEN) + p.prefix.macip_addr.ip.ipa_type = (ipaddr_len == IPV4_MAX_BYTELEN) ? IPADDR_V4 : IPADDR_V6; - memcpy(&p.prefix.ip.ip.addr, pfx, ipaddr_len); + memcpy(&p.prefix.macip_addr.ip.ip.addr, pfx, ipaddr_len); } pfx += ipaddr_len; @@ -2965,14 +2961,14 @@ static int process_type3_route(struct peer *peer, afi_t afi, safi_t safi, /* Copy Ethernet Tag */ memcpy(ð_tag, pfx, 4); - p.prefix.eth_tag = ntohl(eth_tag); + p.prefix.imet_addr.eth_tag = ntohl(eth_tag); pfx += 4; /* Get the IP. */ ipaddr_len = *pfx++; if (ipaddr_len == IPV4_MAX_BITLEN) { - p.prefix.ip.ipa_type = IPADDR_V4; - memcpy(&p.prefix.ip.ip.addr, pfx, IPV4_MAX_BYTELEN); + p.prefix.imet_addr.ip.ipa_type = IPADDR_V4; + memcpy(&p.prefix.imet_addr.ip.ip.addr, pfx, IPV4_MAX_BYTELEN); } else { zlog_err( "%u:%s - Rx EVPN Type-3 NLRI with unsupported IP address length %d", @@ -3039,7 +3035,7 @@ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi, /* Fetch Ethernet Tag. */ memcpy(ð_tag, pfx, 4); - p.prefix.eth_tag = ntohl(eth_tag); + p.prefix.prefix_addr.eth_tag = ntohl(eth_tag); pfx += 4; /* Fetch IP prefix length. */ @@ -3050,21 +3046,21 @@ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi, peer->bgp->vrf_id, peer->host, ippfx_len); return -1; } - p.prefix.ip_prefix_length = ippfx_len; + p.prefix.prefix_addr.ip_prefix_length = ippfx_len; /* Determine IPv4 or IPv6 prefix */ /* Since the address and GW are from the same family, this just becomes * a simple check on the total size. */ if (psize == 34) { - SET_IPADDR_V4(&p.prefix.ip); - memcpy(&p.prefix.ip.ipaddr_v4, pfx, 4); + SET_IPADDR_V4(&p.prefix.prefix_addr.ip); + memcpy(&p.prefix.prefix_addr.ip.ipaddr_v4, pfx, 4); pfx += 4; memcpy(&evpn.gw_ip.ipv4, pfx, 4); pfx += 4; } else { - SET_IPADDR_V6(&p.prefix.ip); - memcpy(&p.prefix.ip.ipaddr_v6, pfx, 16); + SET_IPADDR_V6(&p.prefix.prefix_addr.ip); + memcpy(&p.prefix.prefix_addr.ip.ipaddr_v6, pfx, 16); pfx += 16; memcpy(&evpn.gw_ip.ipv6, pfx, 16); pfx += 16; @@ -3109,7 +3105,7 @@ static void evpn_mpattr_encode_type5(struct stream *s, struct prefix *p, /* len denites the total len of IP and GW-IP in the route IP and GW-IP have to be both ipv4 or ipv6 */ - if (IS_IPADDR_V4(&p_evpn_p->ip)) + if (IS_IPADDR_V4(&p_evpn_p->prefix_addr.ip)) len = 8; /* IP and GWIP are both ipv4 */ else len = 32; /* IP and GWIP are both ipv6 */ @@ -3120,20 +3116,20 @@ static void evpn_mpattr_encode_type5(struct stream *s, struct prefix *p, stream_put(s, &(attr->evpn_overlay.eth_s_id), 10); else stream_put(s, &temp, 10); - stream_putl(s, p_evpn_p->eth_tag); - stream_putc(s, p_evpn_p->ip_prefix_length); - if (IS_IPADDR_V4(&p_evpn_p->ip)) - stream_put_ipv4(s, p_evpn_p->ip.ipaddr_v4.s_addr); + stream_putl(s, p_evpn_p->prefix_addr.eth_tag); + stream_putc(s, p_evpn_p->prefix_addr.ip_prefix_length); + if (IS_IPADDR_V4(&p_evpn_p->prefix_addr.ip)) + stream_put_ipv4(s, p_evpn_p->prefix_addr.ip.ipaddr_v4.s_addr); else - stream_put(s, &p_evpn_p->ip.ipaddr_v6, 16); + stream_put(s, &p_evpn_p->prefix_addr.ip.ipaddr_v6, 16); if (attr) { - if (IS_IPADDR_V4(&p_evpn_p->ip)) + if (IS_IPADDR_V4(&p_evpn_p->prefix_addr.ip)) stream_put_ipv4(s, attr->evpn_overlay.gw_ip.ipv4.s_addr); else stream_put(s, &(attr->evpn_overlay.gw_ip.ipv6), 16); } else { - if (IS_IPADDR_V4(&p_evpn_p->ip)) + if (IS_IPADDR_V4(&p_evpn_p->prefix_addr.ip)) stream_put_ipv4(s, 0); else stream_put(s, &temp, 16); @@ -3583,44 +3579,49 @@ void bgp_evpn_route2json(struct prefix_evpn *p, json_object *json) if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) { json_object_int_add(json, "routeType", p->prefix.route_type); - json_object_int_add(json, "ethTag", p->prefix.eth_tag); + json_object_int_add(json, "ethTag", + p->prefix.imet_addr.eth_tag); json_object_int_add(json, "ipLen", - IS_EVPN_PREFIX_IPADDR_V4(p) + is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN); json_object_string_add(json, "ip", - inet_ntoa(p->prefix.ip.ipaddr_v4)); + inet_ntoa(p->prefix.imet_addr.ip.ipaddr_v4)); } else if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) { - if (IS_EVPN_PREFIX_IPADDR_NONE(p)) { + if (is_evpn_prefix_ipaddr_none(p)) { json_object_int_add(json, "routeType", p->prefix.route_type); - json_object_int_add(json, "ethTag", p->prefix.eth_tag); + json_object_int_add(json, "ethTag", + p->prefix.macip_addr.eth_tag); json_object_int_add(json, "macLen", 8 * ETH_ALEN); json_object_string_add(json, "mac", - prefix_mac2str(&p->prefix.mac, + prefix_mac2str(&p->prefix.macip_addr.mac, buf1, sizeof(buf1))); } else { uint8_t family; - family = IS_EVPN_PREFIX_IPADDR_V4(p) ? AF_INET + family = is_evpn_prefix_ipaddr_v4(p) ? AF_INET : AF_INET6; json_object_int_add(json, "routeType", p->prefix.route_type); - json_object_int_add(json, "ethTag", p->prefix.eth_tag); + json_object_int_add(json, "ethTag", + p->prefix.macip_addr.eth_tag); json_object_int_add(json, "macLen", 8 * ETH_ALEN); json_object_string_add(json, "mac", - prefix_mac2str(&p->prefix.mac, + prefix_mac2str(&p->prefix.macip_addr.mac, buf1, sizeof(buf1))); json_object_int_add(json, "ipLen", - IS_EVPN_PREFIX_IPADDR_V4(p) + is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN); json_object_string_add( json, "ip", - inet_ntop(family, &p->prefix.ip.ip.addr, buf2, + inet_ntop(family, + &p->prefix.macip_addr.ip.ip.addr, + buf2, PREFIX2STR_BUFFER)); } } else { @@ -3639,42 +3640,44 @@ char *bgp_evpn_route2str(struct prefix_evpn *p, char *buf, int len) if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) { snprintf(buf, len, "[%d]:[%d]:[%d]:[%s]", p->prefix.route_type, - p->prefix.eth_tag, - IS_EVPN_PREFIX_IPADDR_V4(p) ? IPV4_MAX_BITLEN + p->prefix.imet_addr.eth_tag, + is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN, - inet_ntoa(p->prefix.ip.ipaddr_v4)); + inet_ntoa(p->prefix.imet_addr.ip.ipaddr_v4)); } else if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) { - if (IS_EVPN_PREFIX_IPADDR_NONE(p)) + if (is_evpn_prefix_ipaddr_none(p)) snprintf(buf, len, "[%d]:[%d]:[%d]:[%s]", p->prefix.route_type, - p->prefix.eth_tag, + p->prefix.macip_addr.eth_tag, 8 * ETH_ALEN, - prefix_mac2str(&p->prefix.mac, buf1, + prefix_mac2str(&p->prefix.macip_addr.mac, buf1, sizeof(buf1))); else { uint8_t family; - family = IS_EVPN_PREFIX_IPADDR_V4(p) ? AF_INET + family = is_evpn_prefix_ipaddr_v4(p) ? AF_INET : AF_INET6; snprintf(buf, len, "[%d]:[%d]:[%d]:[%s]:[%d]:[%s]", p->prefix.route_type, - p->prefix.eth_tag, + p->prefix.macip_addr.eth_tag, 8 * ETH_ALEN, - prefix_mac2str(&p->prefix.mac, buf1, + prefix_mac2str(&p->prefix.macip_addr.mac, buf1, sizeof(buf1)), family == AF_INET ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN, - inet_ntop(family, &p->prefix.ip.ip.addr, buf2, + inet_ntop(family, + &p->prefix.macip_addr.ip.ip.addr, + buf2, PREFIX2STR_BUFFER)); } } else if (p->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) { snprintf(buf, len, "[%d]:[%d]:[%d]:[%s]", p->prefix.route_type, - p->prefix.eth_tag, - p->prefix.ip_prefix_length, - IS_EVPN_PREFIX_IPADDR_V4(p) - ? inet_ntoa(p->prefix.ip.ipaddr_v4) - : inet6_ntoa(p->prefix.ip.ipaddr_v6)); + p->prefix.prefix_addr.eth_tag, + p->prefix.prefix_addr.ip_prefix_length, + is_evpn_prefix_ipaddr_v4(p) + ? inet_ntoa(p->prefix.prefix_addr.ip.ipaddr_v4) + : inet6_ntoa(p->prefix.prefix_addr.ip.ipaddr_v6)); } else { /* For EVPN route types not supported yet. */ snprintf(buf, len, "(unsupported route type %d)", @@ -3703,9 +3706,9 @@ void bgp_evpn_encode_prefix(struct stream *s, struct prefix *p, switch (evp->prefix.route_type) { case BGP_EVPN_MAC_IP_ROUTE: - if (IS_EVPN_PREFIX_IPADDR_V4(evp)) + if (is_evpn_prefix_ipaddr_v4(evp)) ipa_len = IPV4_MAX_BYTELEN; - else if (IS_EVPN_PREFIX_IPADDR_V6(evp)) + else if (is_evpn_prefix_ipaddr_v6(evp)) ipa_len = IPV6_MAX_BYTELEN; /* RD, ESI, EthTag, MAC+len, IP len, [IP], 1 VNI */ len = 8 + 10 + 4 + 1 + 6 + 1 + ipa_len + 3; @@ -3717,12 +3720,13 @@ void bgp_evpn_encode_prefix(struct stream *s, struct prefix *p, stream_put(s, &attr->evpn_overlay.eth_s_id, ESI_LEN); else stream_put(s, 0, 10); - stream_putl(s, evp->prefix.eth_tag); /* Ethernet Tag ID */ + stream_putl(s, evp->prefix.macip_addr.eth_tag); /* Ethernet Tag ID */ stream_putc(s, 8 * ETH_ALEN); /* Mac Addr Len - bits */ - stream_put(s, evp->prefix.mac.octet, 6); /* Mac Addr */ - stream_putc(s, 8 * ipa_len); /* IP address Length */ - if (ipa_len) /* IP */ - stream_put(s, &evp->prefix.ip.ip.addr, ipa_len); + stream_put(s, evp->prefix.macip_addr.mac.octet, 6); /* Mac Addr */ + stream_putc(s, 8 * ipa_len); /* IP address Length */ + if (ipa_len) /* IP */ + stream_put(s, &evp->prefix.macip_addr.ip.ip.addr, + ipa_len); /* 1st label is the L2 VNI */ stream_put(s, label, BGP_LABEL_BYTES); /* Include 2nd label (L3 VNI) if advertising MAC+IP */ @@ -3733,10 +3737,10 @@ void bgp_evpn_encode_prefix(struct stream *s, struct prefix *p, case BGP_EVPN_IMET_ROUTE: stream_putc(s, 17); // TODO: length - assumes IPv4 address stream_put(s, prd->val, 8); /* RD */ - stream_putl(s, evp->prefix.eth_tag); /* Ethernet Tag ID */ + stream_putl(s, evp->prefix.imet_addr.eth_tag); /* Ethernet Tag ID */ stream_putc(s, IPV4_MAX_BITLEN); /* IP address Length - bits */ /* Originating Router's IP Addr */ - stream_put_in_addr(s, &evp->prefix.ip.ipaddr_v4); + stream_put_in_addr(s, &evp->prefix.imet_addr.ip.ipaddr_v4); break; case BGP_EVPN_IP_PREFIX_ROUTE: diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index 1eecb9ecf7..1efde3a719 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -272,15 +272,15 @@ static inline void ip_prefix_from_type5_prefix(struct prefix_evpn *evp, struct prefix *ip) { memset(ip, 0, sizeof(struct prefix)); - if (IS_EVPN_PREFIX_IPADDR_V4(evp)) { + if (is_evpn_prefix_ipaddr_v4(evp)) { ip->family = AF_INET; - ip->prefixlen = evp->prefix.ip_prefix_length; - memcpy(&(ip->u.prefix4), &(evp->prefix.ip.ip), + ip->prefixlen = evp->prefix.prefix_addr.ip_prefix_length; + memcpy(&(ip->u.prefix4), &(evp->prefix.prefix_addr.ip.ip), IPV4_MAX_BYTELEN); - } else if (IS_EVPN_PREFIX_IPADDR_V6(evp)) { + } else if (is_evpn_prefix_ipaddr_v6(evp)) { ip->family = AF_INET6; - ip->prefixlen = evp->prefix.ip_prefix_length; - memcpy(&(ip->u.prefix6), &(evp->prefix.ip.ip), + ip->prefixlen = evp->prefix.prefix_addr.ip_prefix_length; + memcpy(&(ip->u.prefix6), &(evp->prefix.prefix_addr.ip.ip), IPV6_MAX_BYTELEN); } } @@ -290,26 +290,36 @@ static inline int is_evpn_prefix_default(struct prefix *evp) if (evp->family != AF_EVPN) return 0; - return ((evp->u.prefix_evpn.ip_prefix_length == 0) ? 1 : 0); + return ((evp->u.prefix_evpn.prefix_addr.ip_prefix_length == 0) ? + 1 : 0); } static inline void ip_prefix_from_type2_prefix(struct prefix_evpn *evp, struct prefix *ip) { memset(ip, 0, sizeof(struct prefix)); - if (IS_EVPN_PREFIX_IPADDR_V4(evp)) { + if (is_evpn_prefix_ipaddr_v4(evp)) { ip->family = AF_INET; ip->prefixlen = IPV4_MAX_BITLEN; - memcpy(&(ip->u.prefix4), &(evp->prefix.ip.ip), + memcpy(&(ip->u.prefix4), &(evp->prefix.macip_addr.ip.ip), IPV4_MAX_BYTELEN); - } else if (IS_EVPN_PREFIX_IPADDR_V6(evp)) { + } else if (is_evpn_prefix_ipaddr_v6(evp)) { ip->family = AF_INET6; ip->prefixlen = IPV6_MAX_BITLEN; - memcpy(&(ip->u.prefix6), &(evp->prefix.ip.ip), + memcpy(&(ip->u.prefix6), &(evp->prefix.macip_addr.ip.ip), IPV6_MAX_BYTELEN); } } +static inline void ip_prefix_from_evpn_prefix(struct prefix_evpn *evp, + struct prefix *ip) +{ + if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) + ip_prefix_from_type2_prefix(evp, ip); + else if (evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) + ip_prefix_from_type5_prefix(evp, ip); +} + static inline void build_evpn_type2_prefix(struct prefix_evpn *p, struct ethaddr *mac, struct ipaddr *ip) @@ -318,10 +328,10 @@ static inline void build_evpn_type2_prefix(struct prefix_evpn *p, p->family = AF_EVPN; p->prefixlen = EVPN_TYPE_2_ROUTE_PREFIXLEN; p->prefix.route_type = BGP_EVPN_MAC_IP_ROUTE; - memcpy(&p->prefix.mac.octet, mac->octet, ETH_ALEN); - p->prefix.ip.ipa_type = IPADDR_NONE; + memcpy(&p->prefix.macip_addr.mac.octet, mac->octet, ETH_ALEN); + p->prefix.macip_addr.ip.ipa_type = IPADDR_NONE; if (ip) - memcpy(&p->prefix.ip, ip, sizeof(*ip)); + memcpy(&p->prefix.macip_addr.ip, ip, sizeof(*ip)); } static inline void build_type5_prefix_from_ip_prefix(struct prefix_evpn *evp, @@ -343,10 +353,10 @@ static inline void build_type5_prefix_from_ip_prefix(struct prefix_evpn *evp, memset(evp, 0, sizeof(struct prefix_evpn)); evp->family = AF_EVPN; evp->prefixlen = EVPN_TYPE_5_ROUTE_PREFIXLEN; - evp->prefix.ip_prefix_length = ip_prefix->prefixlen; evp->prefix.route_type = BGP_EVPN_IP_PREFIX_ROUTE; - evp->prefix.ip.ipa_type = ip.ipa_type; - memcpy(&evp->prefix.ip, &ip, sizeof(struct ipaddr)); + evp->prefix.prefix_addr.ip_prefix_length = ip_prefix->prefixlen; + evp->prefix.prefix_addr.ip.ipa_type = ip.ipa_type; + memcpy(&evp->prefix.prefix_addr.ip, &ip, sizeof(struct ipaddr)); } static inline void build_evpn_type3_prefix(struct prefix_evpn *p, @@ -356,8 +366,8 @@ static inline void build_evpn_type3_prefix(struct prefix_evpn *p, p->family = AF_EVPN; p->prefixlen = EVPN_TYPE_3_ROUTE_PREFIXLEN; p->prefix.route_type = BGP_EVPN_IMET_ROUTE; - p->prefix.ip.ipa_type = IPADDR_V4; - p->prefix.ip.ipaddr_v4 = originator_ip; + p->prefix.imet_addr.ip.ipa_type = IPADDR_V4; + p->prefix.imet_addr.ip.ipaddr_v4 = originator_ip; } static inline int evpn_default_originate_set(struct bgp *bgp, afi_t afi, diff --git a/bgpd/bgp_flowspec_util.c b/bgpd/bgp_flowspec_util.c index 007b27f17e..956cf28c21 100644 --- a/bgpd/bgp_flowspec_util.c +++ b/bgpd/bgp_flowspec_util.c @@ -25,6 +25,7 @@ #include "bgp_table.h" #include "bgp_flowspec_util.h" #include "bgp_flowspec_private.h" +#include "bgp_pbr.h" static void hex2bin(uint8_t *hex, int *bin) { @@ -50,6 +51,109 @@ static int hexstr2num(uint8_t *hexstr, int len) return num; } +/* call bgp_flowspec_op_decode + * returns offset + */ +static int bgp_flowspec_call_non_opaque_decode(uint8_t *nlri_content, int len, + struct bgp_pbr_match_val *mval, + uint8_t *match_num, int *error) +{ + int ret; + + ret = bgp_flowspec_op_decode( + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content, + len, + mval, error); + if (*error < 0) + zlog_err("%s: flowspec_op_decode error %d", + __func__, *error); + else + *match_num = *error; + return ret; +} + +static bool bgp_flowspec_contains_prefix(struct prefix *pfs, + struct prefix *input, + int prefix_check) +{ + uint32_t offset = 0; + int type; + int ret = 0, error = 0; + uint8_t *nlri_content = (uint8_t *)pfs->u.prefix_flowspec.ptr; + size_t len = pfs->u.prefix_flowspec.prefixlen; + struct prefix compare; + + error = 0; + while (offset < len-1 && error >= 0) { + type = nlri_content[offset]; + offset++; + switch (type) { + case FLOWSPEC_DEST_PREFIX: + case FLOWSPEC_SRC_PREFIX: + memset(&compare, 0, sizeof(struct prefix)); + ret = bgp_flowspec_ip_address( + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content+offset, + len - offset, + &compare, &error); + if (ret <= 0) + break; + if (prefix_check && + compare.prefixlen != input->prefixlen) + break; + if (compare.family != input->family) + break; + if ((input->family == AF_INET) && + IPV4_ADDR_SAME(&input->u.prefix4, + &compare.u.prefix4)) + return true; + if ((input->family == AF_INET6) && + IPV6_ADDR_SAME(&input->u.prefix6.s6_addr, + &compare.u.prefix6.s6_addr)) + return true; + break; + case FLOWSPEC_IP_PROTOCOL: + case FLOWSPEC_PORT: + case FLOWSPEC_DEST_PORT: + case FLOWSPEC_SRC_PORT: + case FLOWSPEC_ICMP_TYPE: + case FLOWSPEC_ICMP_CODE: + ret = bgp_flowspec_op_decode(BGP_FLOWSPEC_VALIDATE_ONLY, + nlri_content+offset, + len - offset, + NULL, &error); + break; + case FLOWSPEC_TCP_FLAGS: + ret = bgp_flowspec_tcpflags_decode( + BGP_FLOWSPEC_VALIDATE_ONLY, + nlri_content+offset, + len - offset, + NULL, &error); + break; + case FLOWSPEC_PKT_LEN: + case FLOWSPEC_DSCP: + ret = bgp_flowspec_op_decode( + BGP_FLOWSPEC_VALIDATE_ONLY, + nlri_content + offset, + len - offset, NULL, + &error); + break; + case FLOWSPEC_FRAGMENT: + ret = bgp_flowspec_fragment_type_decode( + BGP_FLOWSPEC_VALIDATE_ONLY, + nlri_content + offset, + len - offset, NULL, + &error); + break; + default: + error = -1; + break; + } + offset += ret; + } + return false; +} /* * handle the flowspec address src/dst or generic address NLRI @@ -122,9 +226,12 @@ int bgp_flowspec_op_decode(enum bgp_flowspec_util_nlri_t type, uint32_t offset = 0; int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; int len_written; + struct bgp_pbr_match_val *mval = (struct bgp_pbr_match_val *)result; *error = 0; do { + if (loop > BGP_PBR_MATCH_VAL_MAX) + *error = -2; hex2bin(&nlri_ptr[offset], op); offset++; len = 2*op[2]+op[3]; @@ -168,7 +275,24 @@ int bgp_flowspec_op_decode(enum bgp_flowspec_util_nlri_t type, ptr += len_written; break; case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: - /* TODO : FS OPAQUE */ + /* limitation: stop converting */ + if (*error == -2) + break; + mval->value = value; + if (op[5] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_LESS_THAN; + if (op[6] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_GREATER_THAN; + if (op[7] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_EQUAL_TO; + if (op[1] == 1) + mval->unary_operator = OPERATOR_UNARY_AND; + else + mval->unary_operator = OPERATOR_UNARY_OR; + mval++; break; case BGP_FLOWSPEC_VALIDATE_ONLY: default: @@ -203,12 +327,15 @@ int bgp_flowspec_tcpflags_decode(enum bgp_flowspec_util_nlri_t type, int op[8]; int len, value_size, loop = 0, value; char *ptr = (char *)result; /* for return_string */ + struct bgp_pbr_match_val *mval = (struct bgp_pbr_match_val *)result; uint32_t offset = 0; int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; int len_written; *error = 0; do { + if (loop > BGP_PBR_MATCH_VAL_MAX) + *error = -2; hex2bin(&nlri_ptr[offset], op); /* if first element, AND bit can not be set */ if (op[1] == 1 && loop == 0) @@ -252,7 +379,29 @@ int bgp_flowspec_tcpflags_decode(enum bgp_flowspec_util_nlri_t type, ptr += len_written; break; case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: - /* TODO : FS OPAQUE */ + /* limitation: stop converting */ + if (*error == -2) + break; + mval->value = value; + if (op[6] == 1) { + /* different from */ + mval->compare_operator |= + OPERATOR_COMPARE_LESS_THAN; + mval->compare_operator |= + OPERATOR_COMPARE_GREATER_THAN; + } else + mval->compare_operator |= + OPERATOR_COMPARE_EQUAL_TO; + if (op[7] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_EXACT_MATCH; + if (op[1] == 1) + mval->unary_operator = + OPERATOR_UNARY_AND; + else + mval->unary_operator = + OPERATOR_UNARY_OR; + mval++; break; case BGP_FLOWSPEC_VALIDATE_ONLY: default: @@ -284,6 +433,8 @@ int bgp_flowspec_fragment_type_decode(enum bgp_flowspec_util_nlri_t type, int op[8]; int len, value, value_size, loop = 0; char *ptr = (char *)result; /* for return_string */ + struct bgp_pbr_fragment_val *mval = + (struct bgp_pbr_fragment_val *)result; uint32_t offset = 0; int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; int len_written; @@ -340,7 +491,7 @@ int bgp_flowspec_fragment_type_decode(enum bgp_flowspec_util_nlri_t type, } break; case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: - /* TODO : FS OPAQUE */ + mval->bitmask = (uint8_t)value; break; case BGP_FLOWSPEC_VALIDATE_ONLY: default: @@ -354,89 +505,158 @@ int bgp_flowspec_fragment_type_decode(enum bgp_flowspec_util_nlri_t type, return offset; } - -static bool bgp_flowspec_contains_prefix(struct prefix *pfs, - struct prefix *input, - int prefix_check) +int bgp_flowspec_match_rules_fill(uint8_t *nlri_content, int len, + struct bgp_pbr_entry_main *bpem) { - uint32_t offset = 0; - int type; - int ret = 0, error = 0; - uint8_t *nlri_content = (uint8_t *)pfs->u.prefix_flowspec.ptr; - size_t len = pfs->u.prefix_flowspec.prefixlen; - struct prefix compare; + int offset = 0, error = 0; + struct prefix *prefix; + struct bgp_pbr_match_val *mval; + uint8_t *match_num; + uint8_t bitmask = 0; + int ret = 0, type; - error = 0; - while (offset < len-1 && error >= 0) { + while (offset < len - 1 && error >= 0) { type = nlri_content[offset]; offset++; switch (type) { case FLOWSPEC_DEST_PREFIX: case FLOWSPEC_SRC_PREFIX: - memset(&compare, 0, sizeof(struct prefix)); + bitmask = 0; + if (type == FLOWSPEC_DEST_PREFIX) { + bitmask |= PREFIX_DST_PRESENT; + prefix = &bpem->dst_prefix; + } else { + bitmask |= PREFIX_SRC_PRESENT; + prefix = &bpem->src_prefix; + } ret = bgp_flowspec_ip_address( BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, - nlri_content+offset, + nlri_content + offset, len - offset, - &compare, &error); - if (ret <= 0) - break; - if (prefix_check && - compare.prefixlen != input->prefixlen) - break; - if (compare.family != input->family) - break; - if ((input->family == AF_INET) && - IPV4_ADDR_SAME(&input->u.prefix4, - &compare.u.prefix4)) - return true; - if ((input->family == AF_INET6) && - IPV6_ADDR_SAME(&input->u.prefix6.s6_addr, - &compare.u.prefix6.s6_addr)) - return true; + prefix, &error); + if (error < 0) + zlog_err("%s: flowspec_ip_address error %d", + __func__, error); + else + bpem->match_bitmask |= bitmask; + offset += ret; break; case FLOWSPEC_IP_PROTOCOL: + match_num = &(bpem->match_protocol_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->protocol); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; case FLOWSPEC_PORT: + match_num = &(bpem->match_port_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->port); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; case FLOWSPEC_DEST_PORT: + match_num = &(bpem->match_dst_port_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->dst_port); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; case FLOWSPEC_SRC_PORT: + match_num = &(bpem->match_src_port_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->src_port); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; case FLOWSPEC_ICMP_TYPE: - case FLOWSPEC_ICMP_CODE: - ret = bgp_flowspec_op_decode(BGP_FLOWSPEC_VALIDATE_ONLY, - nlri_content+offset, - len - offset, - NULL, &error); + match_num = &(bpem->match_icmp_type_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->icmp_type); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); break; - case FLOWSPEC_TCP_FLAGS: - ret = bgp_flowspec_tcpflags_decode( - BGP_FLOWSPEC_VALIDATE_ONLY, - nlri_content+offset, - len - offset, - NULL, &error); + case FLOWSPEC_ICMP_CODE: + match_num = &(bpem->match_icmp_code_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->icmp_code); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); break; case FLOWSPEC_PKT_LEN: + match_num = + &(bpem->match_packet_length_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->packet_length); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; case FLOWSPEC_DSCP: - ret = bgp_flowspec_op_decode( - BGP_FLOWSPEC_VALIDATE_ONLY, - nlri_content + offset, - len - offset, NULL, - &error); + match_num = &(bpem->match_dscp_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->dscp); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_TCP_FLAGS: + ret = bgp_flowspec_tcpflags_decode( + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content + offset, + len - offset, + &bpem->tcpflags, &error); + if (error < 0) + zlog_err("%s: flowspec_tcpflags_decode error %d", + __func__, error); + else + bpem->match_tcpflags_num = error; + /* contains the number of slots used */ + offset += ret; break; case FLOWSPEC_FRAGMENT: ret = bgp_flowspec_fragment_type_decode( - BGP_FLOWSPEC_VALIDATE_ONLY, - nlri_content + offset, - len - offset, NULL, - &error); + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content + offset, + len - offset, &bpem->fragment, + &error); + if (error < 0) + zlog_err("%s: flowspec_fragment_type_decode error %d", + __func__, error); + else + bpem->match_bitmask |= FRAGMENT_PRESENT; + offset += ret; break; default: - error = -1; - break; + zlog_err("%s: unknown type %d\n", __func__, type); } - offset += ret; } - return false; + return error; } + struct bgp_node *bgp_flowspec_get_match_per_ip(afi_t afi, struct bgp_table *rib, struct prefix *match, diff --git a/bgpd/bgp_flowspec_util.h b/bgpd/bgp_flowspec_util.h index aa21461102..e4454ab4db 100644 --- a/bgpd/bgp_flowspec_util.h +++ b/bgpd/bgp_flowspec_util.h @@ -50,6 +50,9 @@ extern int bgp_flowspec_fragment_type_decode(enum bgp_flowspec_util_nlri_t type, uint8_t *nlri_ptr, uint32_t max_len, void *result, int *error); +struct bgp_pbr_entry_main; +extern int bgp_flowspec_match_rules_fill(uint8_t *nlri_content, int len, + struct bgp_pbr_entry_main *bpem); extern struct bgp_node *bgp_flowspec_get_match_per_ip(afi_t afi, struct bgp_table *rib, diff --git a/bgpd/bgp_pbr.c b/bgpd/bgp_pbr.c new file mode 100644 index 0000000000..04d6314fd7 --- /dev/null +++ b/bgpd/bgp_pbr.c @@ -0,0 +1,1140 @@ +/* + * BGP pbr + * Copyright (C) 6WIND + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "zebra.h" +#include "prefix.h" +#include "zclient.h" +#include "jhash.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_pbr.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_flowspec_util.h" +#include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_mplsvpn.h" + +DEFINE_MTYPE_STATIC(BGPD, PBR_MATCH_ENTRY, "PBR match entry") +DEFINE_MTYPE_STATIC(BGPD, PBR_MATCH, "PBR match") +DEFINE_MTYPE_STATIC(BGPD, PBR_ACTION, "PBR action") + +static int bgp_pbr_match_counter_unique; +static int bgp_pbr_match_entry_counter_unique; +static int bgp_pbr_action_counter_unique; +static int bgp_pbr_match_iptable_counter_unique; + +struct bgp_pbr_match_iptable_unique { + uint32_t unique; + struct bgp_pbr_match *bpm_found; +}; + +struct bgp_pbr_match_entry_unique { + uint32_t unique; + struct bgp_pbr_match_entry *bpme_found; +}; + +struct bgp_pbr_action_unique { + uint32_t unique; + struct bgp_pbr_action *bpa_found; +}; + +static int bgp_pbr_action_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_action *bpa = (struct bgp_pbr_action *)backet->data; + struct bgp_pbr_action_unique *bpau = (struct bgp_pbr_action_unique *) + arg; + uint32_t unique = bpau->unique; + + if (bpa->unique == unique) { + bpau->bpa_found = bpa; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +static int bgp_pbr_match_entry_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match_entry *bpme = + (struct bgp_pbr_match_entry *)backet->data; + struct bgp_pbr_match_entry_unique *bpmeu = + (struct bgp_pbr_match_entry_unique *)arg; + uint32_t unique = bpmeu->unique; + + if (bpme->unique == unique) { + bpmeu->bpme_found = bpme; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +struct bgp_pbr_match_ipsetname { + char *ipsetname; + struct bgp_pbr_match *bpm_found; +}; + +static int bgp_pbr_match_pername_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_ipsetname *bpmi = + (struct bgp_pbr_match_ipsetname *)arg; + char *ipset_name = bpmi->ipsetname; + + if (!strncmp(ipset_name, bpm->ipset_name, + ZEBRA_IPSET_NAME_SIZE)) { + bpmi->bpm_found = bpm; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +static int bgp_pbr_match_iptable_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_iptable_unique *bpmiu = + (struct bgp_pbr_match_iptable_unique *)arg; + uint32_t unique = bpmiu->unique; + + if (bpm->unique2 == unique) { + bpmiu->bpm_found = bpm; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +struct bgp_pbr_match_unique { + uint32_t unique; + struct bgp_pbr_match *bpm_found; +}; + +static int bgp_pbr_match_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_unique *bpmu = (struct bgp_pbr_match_unique *) + arg; + uint32_t unique = bpmu->unique; + + if (bpm->unique == unique) { + bpmu->bpm_found = bpm; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +static int sprintf_bgp_pbr_match_val(char *str, struct bgp_pbr_match_val *mval, + const char *prepend) +{ + char *ptr = str; + + if (prepend) + ptr += sprintf(ptr, "%s", prepend); + else { + if (mval->unary_operator & OPERATOR_UNARY_OR) + ptr += sprintf(ptr, ", or "); + if (mval->unary_operator & OPERATOR_UNARY_AND) + ptr += sprintf(ptr, ", and "); + } + if (mval->compare_operator & OPERATOR_COMPARE_LESS_THAN) + ptr += sprintf(ptr, "<"); + if (mval->compare_operator & OPERATOR_COMPARE_GREATER_THAN) + ptr += sprintf(ptr, ">"); + if (mval->compare_operator & OPERATOR_COMPARE_EQUAL_TO) + ptr += sprintf(ptr, "="); + if (mval->compare_operator & OPERATOR_COMPARE_EXACT_MATCH) + ptr += sprintf(ptr, "match"); + ptr += sprintf(ptr, " %u", mval->value); + return (int)(ptr - str); +} + +#define INCREMENT_DISPLAY(_ptr, _cnt) do { \ + if (_cnt) \ + (_ptr) += sprintf((_ptr), "; "); \ + _cnt++; \ + } while (0) + +/* return 1 if OK, 0 if validation should stop) */ +static int bgp_pbr_validate_policy_route(struct bgp_pbr_entry_main *api) +{ + /* because bgp pbr entry may contain unsupported + * combinations, a message will be displayed here if + * not supported. + * for now, only match/set supported is + * - combination src/dst => redirect nexthop [ + rate] + * - combination src/dst => redirect VRF [ + rate] + * - combination src/dst => drop + */ + if (api->match_src_port_num || api->match_dst_port_num + || api->match_port_num || api->match_protocol_num + || api->match_icmp_type_num || api->match_icmp_type_num + || api->match_packet_length_num || api->match_dscp_num + || api->match_tcpflags_num) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_debug("BGP: some SET actions not supported by Zebra. ignoring."); + } + return 0; + } + if (!(api->match_bitmask & PREFIX_SRC_PRESENT) && + !(api->match_bitmask & PREFIX_DST_PRESENT)) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_debug("BGP: match actions without src" + " or dst address can not operate." + " ignoring."); + } + return 0; + } + return 1; +} + +/* return -1 if build or validation failed */ +static int bgp_pbr_build_and_validate_entry(struct prefix *p, + struct bgp_info *info, + struct bgp_pbr_entry_main *api) +{ + int ret; + int i, action_count = 0; + struct ecommunity *ecom; + struct ecommunity_val *ecom_eval; + struct bgp_pbr_entry_action *api_action; + struct prefix *src = NULL, *dst = NULL; + int valid_prefix = 0; + afi_t afi = AFI_IP; + + /* extract match from flowspec entries */ + ret = bgp_flowspec_match_rules_fill((uint8_t *)p->u.prefix_flowspec.ptr, + p->u.prefix_flowspec.prefixlen, api); + if (ret < 0) + return -1; + /* extract actiosn from flowspec ecom list */ + if (info && info->attr && info->attr->ecommunity) { + ecom = info->attr->ecommunity; + for (i = 0; i < ecom->size; i++) { + ecom_eval = (struct ecommunity_val *) + ecom->val + (i * ECOMMUNITY_SIZE); + + if (action_count > ACTIONS_MAX_NUM) { + if (BGP_DEBUG(pbr, PBR_ERROR)) + zlog_err("%s: flowspec actions exceeds limit (max %u)", + __func__, action_count); + break; + } + api_action = &api->actions[action_count]; + + if ((ecom_eval->val[1] == + (char)ECOMMUNITY_REDIRECT_VRF) && + (ecom_eval->val[0] == + (char)ECOMMUNITY_ENCODE_TRANS_EXP || + ecom_eval->val[0] == + (char)ECOMMUNITY_EXTENDED_COMMUNITY_PART_2 || + ecom_eval->val[0] == + (char)ECOMMUNITY_EXTENDED_COMMUNITY_PART_3)) { + struct ecommunity *eckey = ecommunity_new(); + struct ecommunity_val ecom_copy; + + memcpy(&ecom_copy, ecom_eval, + sizeof(struct ecommunity_val)); + ecom_copy.val[0] &= + ~ECOMMUNITY_ENCODE_TRANS_EXP; + ecom_copy.val[1] = ECOMMUNITY_ROUTE_TARGET; + ecommunity_add_val(eckey, &ecom_copy); + + api_action->action = ACTION_REDIRECT; + api_action->u.redirect_vrf = + get_first_vrf_for_redirect_with_rt( + eckey); + ecommunity_free(&eckey); + } else if ((ecom_eval->val[0] == + (char)ECOMMUNITY_ENCODE_REDIRECT_IP_NH) && + (ecom_eval->val[1] == + (char)ECOMMUNITY_REDIRECT_IP_NH)) { + api_action->action = ACTION_REDIRECT_IP; + api_action->u.zr.redirect_ip_v4.s_addr = + info->attr->nexthop.s_addr; + api_action->u.zr.duplicate = ecom_eval->val[7]; + } else { + if (ecom_eval->val[0] != + (char)ECOMMUNITY_ENCODE_TRANS_EXP) + continue; + ret = ecommunity_fill_pbr_action(ecom_eval, + api_action); + if (ret != 0) + continue; + } + api->action_num++; + } + } + + /* validate if incoming matc/action is compatible + * with our policy routing engine + */ + if (!bgp_pbr_validate_policy_route(api)) + return -1; + + /* check inconsistency in the match rule */ + if (api->match_bitmask & PREFIX_SRC_PRESENT) { + src = &api->src_prefix; + afi = family2afi(src->family); + valid_prefix = 1; + } + if (api->match_bitmask & PREFIX_DST_PRESENT) { + dst = &api->dst_prefix; + if (valid_prefix && afi != family2afi(dst->family)) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_debug("%s: inconsistency:" + " no match for afi src and dst (%u/%u)", + __func__, afi, family2afi(dst->family)); + } + return -1; + } + } + return 0; +} + +static void bgp_pbr_match_entry_free(void *arg) +{ + struct bgp_pbr_match_entry *bpme; + + bpme = (struct bgp_pbr_match_entry *)arg; + + if (bpme->installed) { + bgp_send_pbr_ipset_entry_match(bpme, false); + bpme->installed = false; + bpme->backpointer = NULL; + } + XFREE(MTYPE_PBR_MATCH_ENTRY, bpme); +} + +static void bgp_pbr_match_free(void *arg) +{ + struct bgp_pbr_match *bpm; + + bpm = (struct bgp_pbr_match *)arg; + + hash_clean(bpm->entry_hash, bgp_pbr_match_entry_free); + + if (hashcount(bpm->entry_hash) == 0) { + /* delete iptable entry first */ + /* then delete ipset match */ + if (bpm->installed) { + if (bpm->installed_in_iptable) { + bgp_send_pbr_iptable(bpm->action, + bpm, false); + bpm->installed_in_iptable = false; + bpm->action->refcnt--; + } + bgp_send_pbr_ipset_match(bpm, false); + bpm->installed = false; + bpm->action = NULL; + } + } + hash_free(bpm->entry_hash); + + XFREE(MTYPE_PBR_MATCH, bpm); +} + +static void *bgp_pbr_match_alloc_intern(void *arg) +{ + struct bgp_pbr_match *bpm, *new; + + bpm = (struct bgp_pbr_match *)arg; + + new = XCALLOC(MTYPE_PBR_MATCH, sizeof(*new)); + memcpy(new, bpm, sizeof(*bpm)); + + return new; +} + +static void bgp_pbr_action_free(void *arg) +{ + struct bgp_pbr_action *bpa; + + bpa = (struct bgp_pbr_action *)arg; + + if (bpa->refcnt == 0) { + if (bpa->installed && bpa->table_id != 0) { + bgp_send_pbr_rule_action(bpa, false); + bgp_zebra_announce_default(bpa->bgp, &(bpa->nh), + AFI_IP, + bpa->table_id, + false); + } + } + XFREE(MTYPE_PBR_ACTION, bpa); +} + +static void *bgp_pbr_action_alloc_intern(void *arg) +{ + struct bgp_pbr_action *bpa, *new; + + bpa = (struct bgp_pbr_action *)arg; + + new = XCALLOC(MTYPE_PBR_ACTION, sizeof(*new)); + + memcpy(new, bpa, sizeof(*bpa)); + + return new; +} + +static void *bgp_pbr_match_entry_alloc_intern(void *arg) +{ + struct bgp_pbr_match_entry *bpme, *new; + + bpme = (struct bgp_pbr_match_entry *)arg; + + new = XCALLOC(MTYPE_PBR_MATCH_ENTRY, sizeof(*new)); + + memcpy(new, bpme, sizeof(*bpme)); + + return new; +} + +uint32_t bgp_pbr_match_hash_key(void *arg) +{ + struct bgp_pbr_match *pbm = (struct bgp_pbr_match *)arg; + uint32_t key; + + key = jhash_1word(pbm->vrf_id, 0x4312abde); + key = jhash_1word(pbm->flags, key); + return jhash_1word(pbm->type, key); +} + +int bgp_pbr_match_hash_equal(const void *arg1, const void *arg2) +{ + const struct bgp_pbr_match *r1, *r2; + + r1 = (const struct bgp_pbr_match *)arg1; + r2 = (const struct bgp_pbr_match *)arg2; + + if (r1->vrf_id != r2->vrf_id) + return 0; + + if (r1->type != r2->type) + return 0; + + if (r1->flags != r2->flags) + return 0; + + if (r1->action != r2->action) + return 0; + + return 1; +} + +uint32_t bgp_pbr_match_entry_hash_key(void *arg) +{ + struct bgp_pbr_match_entry *pbme; + uint32_t key; + + pbme = (struct bgp_pbr_match_entry *)arg; + key = prefix_hash_key(&pbme->src); + key = jhash_1word(prefix_hash_key(&pbme->dst), key); + + return key; +} + +int bgp_pbr_match_entry_hash_equal(const void *arg1, const void *arg2) +{ + const struct bgp_pbr_match_entry *r1, *r2; + + r1 = (const struct bgp_pbr_match_entry *)arg1; + r2 = (const struct bgp_pbr_match_entry *)arg2; + + /* on updates, comparing + * backpointer is not necessary + */ + + /* unique value is self calculated + */ + + /* rate is ignored for now + */ + + if (!prefix_same(&r1->src, &r2->src)) + return 0; + + if (!prefix_same(&r1->dst, &r2->dst)) + return 0; + + return 1; +} + +uint32_t bgp_pbr_action_hash_key(void *arg) +{ + struct bgp_pbr_action *pbra; + uint32_t key; + + pbra = (struct bgp_pbr_action *)arg; + key = jhash_1word(pbra->table_id, 0x4312abde); + key = jhash_1word(pbra->fwmark, key); + return key; +} + +int bgp_pbr_action_hash_equal(const void *arg1, const void *arg2) +{ + const struct bgp_pbr_action *r1, *r2; + + r1 = (const struct bgp_pbr_action *)arg1; + r2 = (const struct bgp_pbr_action *)arg2; + + /* unique value is self calculated + * table and fwmark is self calculated + */ + if (r1->rate != r2->rate) + return 0; + + if (r1->vrf_id != r2->vrf_id) + return 0; + + if (memcmp(&r1->nh, &r2->nh, sizeof(struct nexthop))) + return 0; + return 1; +} + +struct bgp_pbr_action *bgp_pbr_action_rule_lookup(vrf_id_t vrf_id, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_action_unique bpau; + + if (!bgp || unique == 0) + return NULL; + bpau.unique = unique; + bpau.bpa_found = NULL; + hash_walk(bgp->pbr_action_hash, bgp_pbr_action_walkcb, &bpau); + return bpau.bpa_found; +} + +struct bgp_pbr_match *bgp_pbr_match_ipset_lookup(vrf_id_t vrf_id, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_match_unique bpmu; + + if (!bgp || unique == 0) + return NULL; + bpmu.unique = unique; + bpmu.bpm_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_match_walkcb, &bpmu); + return bpmu.bpm_found; +} + +struct bgp_pbr_match_entry *bgp_pbr_match_ipset_entry_lookup(vrf_id_t vrf_id, + char *ipset_name, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_match_entry_unique bpmeu; + struct bgp_pbr_match_ipsetname bpmi; + + if (!bgp || unique == 0) + return NULL; + bpmi.ipsetname = XCALLOC(MTYPE_TMP, ZEBRA_IPSET_NAME_SIZE); + snprintf(bpmi.ipsetname, ZEBRA_IPSET_NAME_SIZE, "%s", ipset_name); + bpmi.bpm_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_match_pername_walkcb, &bpmi); + XFREE(MTYPE_TMP, bpmi.ipsetname); + if (!bpmi.bpm_found) + return NULL; + bpmeu.bpme_found = NULL; + bpmeu.unique = unique; + hash_walk(bpmi.bpm_found->entry_hash, + bgp_pbr_match_entry_walkcb, &bpmeu); + return bpmeu.bpme_found; +} + +struct bgp_pbr_match *bgp_pbr_match_iptable_lookup(vrf_id_t vrf_id, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_match_iptable_unique bpmiu; + + if (!bgp || unique == 0) + return NULL; + bpmiu.unique = unique; + bpmiu.bpm_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_match_iptable_walkcb, &bpmiu); + return bpmiu.bpm_found; +} + +void bgp_pbr_cleanup(struct bgp *bgp) +{ + if (bgp->pbr_match_hash) { + hash_clean(bgp->pbr_match_hash, bgp_pbr_match_free); + hash_free(bgp->pbr_match_hash); + bgp->pbr_match_hash = NULL; + } + if (bgp->pbr_action_hash) { + hash_clean(bgp->pbr_action_hash, bgp_pbr_action_free); + hash_free(bgp->pbr_action_hash); + bgp->pbr_action_hash = NULL; + } +} + +void bgp_pbr_init(struct bgp *bgp) +{ + bgp->pbr_match_hash = + hash_create_size(8, bgp_pbr_match_hash_key, + bgp_pbr_match_hash_equal, + "Match Hash"); + bgp->pbr_action_hash = + hash_create_size(8, bgp_pbr_action_hash_key, + bgp_pbr_action_hash_equal, + "Match Hash Entry"); +} + +void bgp_pbr_print_policy_route(struct bgp_pbr_entry_main *api) +{ + int i = 0; + char return_string[512]; + char *ptr = return_string; + char buff[64]; + int nb_items = 0; + + ptr += sprintf(ptr, "MATCH : "); + if (api->match_bitmask & PREFIX_SRC_PRESENT) { + struct prefix *p = &(api->src_prefix); + + ptr += sprintf(ptr, "@src %s", prefix2str(p, buff, 64)); + INCREMENT_DISPLAY(ptr, nb_items); + } + if (api->match_bitmask & PREFIX_DST_PRESENT) { + struct prefix *p = &(api->dst_prefix); + + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@dst %s", prefix2str(p, buff, 64)); + } + + if (api->match_protocol_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_protocol_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->protocol[i], + i > 0 ? NULL : "@proto "); + + if (api->match_src_port_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_src_port_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->src_port[i], + i > 0 ? NULL : "@srcport "); + + if (api->match_dst_port_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_dst_port_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->dst_port[i], + i > 0 ? NULL : "@dstport "); + + if (api->match_port_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_port_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->port[i], + i > 0 ? NULL : "@port "); + + if (api->match_icmp_type_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_icmp_type_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->icmp_type[i], + i > 0 ? NULL : "@icmptype "); + + if (api->match_icmp_code_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_icmp_code_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->icmp_code[i], + i > 0 ? NULL : "@icmpcode "); + + if (api->match_packet_length_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_packet_length_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->packet_length[i], + i > 0 ? NULL : "@plen "); + + if (api->match_dscp_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_dscp_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->dscp[i], + i > 0 ? NULL : "@dscp "); + + if (api->match_tcpflags_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_tcpflags_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->tcpflags[i], + i > 0 ? NULL : "@tcpflags "); + + if (api->match_bitmask & FRAGMENT_PRESENT) { + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@fragment %u", api->fragment.bitmask); + } + if (!nb_items) + ptr = return_string; + else + ptr += sprintf(ptr, "; "); + if (api->action_num) + ptr += sprintf(ptr, "SET : "); + nb_items = 0; + for (i = 0; i < api->action_num; i++) { + switch (api->actions[i].action) { + case ACTION_TRAFFICRATE: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@set rate %f", + api->actions[i].u.r.rate); + break; + case ACTION_TRAFFIC_ACTION: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@action "); + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_TERMINATE) + ptr += sprintf(ptr, + " terminate (apply filter(s))"); + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_DISTRIBUTE) + ptr += sprintf(ptr, " distribute"); + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_SAMPLE) + ptr += sprintf(ptr, " sample"); + break; + case ACTION_REDIRECT_IP: + INCREMENT_DISPLAY(ptr, nb_items); + char local_buff[INET_ADDRSTRLEN]; + + if (inet_ntop(AF_INET, + &api->actions[i].u.zr.redirect_ip_v4, + local_buff, INET_ADDRSTRLEN) != NULL) + ptr += sprintf(ptr, + "@redirect ip nh %s", local_buff); + break; + case ACTION_REDIRECT: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@redirect vrf %u", + api->actions[i].u.redirect_vrf); + break; + case ACTION_MARKING: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@set dscp %u", + api->actions[i].u.marking_dscp); + break; + default: + break; + } + } + zlog_info("%s", return_string); +} + +static void bgp_pbr_flush_entry(struct bgp *bgp, struct bgp_pbr_action *bpa, + struct bgp_pbr_match *bpm, + struct bgp_pbr_match_entry *bpme) +{ + /* if bpme is null, bpm is also null + */ + if (bpme == NULL) + return; + /* ipset del entry */ + if (bpme->installed) { + bgp_send_pbr_ipset_entry_match(bpme, false); + bpme->installed = false; + bpme->backpointer = NULL; + } + hash_release(bpm->entry_hash, bpme); + if (hashcount(bpm->entry_hash) == 0) { + /* delete iptable entry first */ + /* then delete ipset match */ + if (bpm->installed) { + if (bpm->installed_in_iptable) { + bgp_send_pbr_iptable(bpm->action, + bpm, false); + bpm->installed_in_iptable = false; + bpm->action->refcnt--; + } + bgp_send_pbr_ipset_match(bpm, false); + bpm->installed = false; + bpm->action = NULL; + } + hash_release(bgp->pbr_match_hash, bpm); + /* XXX release pbr_match_action if not used + * note that drop does not need to call send_pbr_action + */ + } + if (bpa->refcnt == 0) { + if (bpa->installed && bpa->table_id != 0) { + bgp_send_pbr_rule_action(bpa, false); + bgp_zebra_announce_default(bpa->bgp, &(bpa->nh), + AFI_IP, + bpa->table_id, + false); + } + } +} + +struct bgp_pbr_match_entry_remain { + struct bgp_pbr_match_entry *bpme_to_match; + struct bgp_pbr_match_entry *bpme_found; +}; + +static int bgp_pbr_get_remaining_entry(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_entry_remain *bpmer = + (struct bgp_pbr_match_entry_remain *)arg; + struct bgp_pbr_match *bpm_temp; + struct bgp_pbr_match_entry *bpme = bpmer->bpme_to_match; + + if (!bpme->backpointer || + bpm == bpme->backpointer || + bpme->backpointer->action == bpm->action) + return HASHWALK_CONTINUE; + /* ensure bpm other characteristics are equal */ + bpm_temp = bpme->backpointer; + if (bpm_temp->vrf_id != bpm->vrf_id || + bpm_temp->type != bpm->type || + bpm_temp->flags != bpm->flags) + return HASHWALK_CONTINUE; + + /* look for remaining bpme */ + bpmer->bpme_found = hash_lookup(bpm->entry_hash, bpme); + if (!bpmer->bpme_found) + return HASHWALK_CONTINUE; + return HASHWALK_ABORT; +} + +static void bgp_pbr_policyroute_remove_from_zebra(struct bgp *bgp, + struct bgp_info *binfo, + vrf_id_t vrf_id, + struct prefix *src, + struct prefix *dst) +{ + struct bgp_pbr_match temp; + struct bgp_pbr_match_entry temp2; + struct bgp_pbr_match *bpm; + struct bgp_pbr_match_entry *bpme; + struct bgp_pbr_match_entry_remain bpmer; + + /* as we don't know information from EC + * look for bpm that have the bpm + * with vrf_id characteristics + */ + memset(&temp2, 0, sizeof(temp2)); + memset(&temp, 0, sizeof(temp)); + if (src) { + temp.flags |= MATCH_IP_SRC_SET; + prefix_copy(&temp2.src, src); + } else + temp2.src.family = AF_INET; + if (dst) { + temp.flags |= MATCH_IP_DST_SET; + prefix_copy(&temp2.dst, dst); + } else + temp2.dst.family = AF_INET; + + if (src == NULL || dst == NULL) + temp.type = IPSET_NET; + else + temp.type = IPSET_NET_NET; + if (vrf_id == VRF_UNKNOWN) /* XXX case BGP destroy */ + temp.vrf_id = 0; + else + temp.vrf_id = vrf_id; + bpme = &temp2; + bpm = &temp; + bpme->backpointer = bpm; + /* right now, a previous entry may already exist + * flush previous entry if necessary + */ + bpmer.bpme_to_match = bpme; + bpmer.bpme_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_get_remaining_entry, &bpmer); + if (bpmer.bpme_found) { + static struct bgp_pbr_match *local_bpm; + static struct bgp_pbr_action *local_bpa; + + local_bpm = bpmer.bpme_found->backpointer; + local_bpa = local_bpm->action; + bgp_pbr_flush_entry(bgp, local_bpa, + local_bpm, bpmer.bpme_found); + } +} + +static void bgp_pbr_policyroute_add_to_zebra(struct bgp *bgp, + struct bgp_info *binfo, + vrf_id_t vrf_id, + struct prefix *src, + struct prefix *dst, + struct nexthop *nh, + float *rate) +{ + struct bgp_pbr_match temp; + struct bgp_pbr_match_entry temp2; + struct bgp_pbr_match *bpm; + struct bgp_pbr_match_entry *bpme = NULL; + struct bgp_pbr_action temp3; + struct bgp_pbr_action *bpa = NULL; + struct bgp_pbr_match_entry_remain bpmer; + + /* look for bpa first */ + memset(&temp3, 0, sizeof(temp3)); + if (rate) + temp3.rate = *rate; + if (nh) + memcpy(&temp3.nh, nh, sizeof(struct nexthop)); + temp3.vrf_id = vrf_id; + bpa = hash_get(bgp->pbr_action_hash, &temp3, + bgp_pbr_action_alloc_intern); + + if (bpa->fwmark == 0) { + /* drop is handled by iptable */ + if (nh && nh->type == NEXTHOP_TYPE_BLACKHOLE) { + bpa->table_id = 0; + bpa->installed = true; + } else { + bpa->fwmark = bgp_zebra_tm_get_id(); + bpa->table_id = bpa->fwmark; + bpa->installed = false; + } + bpa->bgp = bgp; + bpa->unique = ++bgp_pbr_action_counter_unique; + /* 0 value is forbidden */ + bpa->install_in_progress = false; + } + + /* then look for bpm */ + memset(&temp, 0, sizeof(temp)); + if (src == NULL || dst == NULL) + temp.type = IPSET_NET; + else + temp.type = IPSET_NET_NET; + temp.vrf_id = vrf_id; + if (src) + temp.flags |= MATCH_IP_SRC_SET; + if (dst) + temp.flags |= MATCH_IP_DST_SET; + temp.action = bpa; + bpm = hash_get(bgp->pbr_match_hash, &temp, + bgp_pbr_match_alloc_intern); + + /* new, then self allocate ipset_name and unique */ + if (bpm && bpm->unique == 0) { + bpm->unique = ++bgp_pbr_match_counter_unique; + /* 0 value is forbidden */ + sprintf(bpm->ipset_name, "match%p", bpm); + bpm->entry_hash = hash_create_size(8, + bgp_pbr_match_entry_hash_key, + bgp_pbr_match_entry_hash_equal, + "Match Entry Hash"); + bpm->installed = false; + + /* unique2 should be updated too */ + bpm->unique2 = ++bgp_pbr_match_iptable_counter_unique; + bpm->installed_in_iptable = false; + bpm->install_in_progress = false; + bpm->install_iptable_in_progress = false; + } + + memset(&temp2, 0, sizeof(temp2)); + if (src) + prefix_copy(&temp2.src, src); + else + temp2.src.family = AF_INET; + if (dst) + prefix_copy(&temp2.dst, dst); + else + temp2.dst.family = AF_INET; + if (bpm) + bpme = hash_get(bpm->entry_hash, &temp2, + bgp_pbr_match_entry_alloc_intern); + if (bpme && bpme->unique == 0) { + bpme->unique = ++bgp_pbr_match_entry_counter_unique; + /* 0 value is forbidden */ + bpme->backpointer = bpm; + bpme->installed = false; + bpme->install_in_progress = false; + } + + /* BGP FS: append entry to zebra + * - policies are not routing entries and as such + * route replace semantics don't necessarily follow + * through to policy entries + * - because of that, not all policing information will be stored + * into zebra. and non selected policies will be suppressed from zebra + * - as consequence, in order to bring consistency + * a policy will be added, then ifan ecmp policy exists, + * it will be suppressed subsequently + */ + /* ip rule add */ + if (!bpa->installed) { + bgp_send_pbr_rule_action(bpa, true); + bgp_zebra_announce_default(bgp, nh, + AFI_IP, bpa->table_id, true); + } + + /* ipset create */ + if (bpm && !bpm->installed) + bgp_send_pbr_ipset_match(bpm, true); + /* ipset add */ + if (bpme && !bpme->installed) + bgp_send_pbr_ipset_entry_match(bpme, true); + + /* iptables */ + if (bpm && !bpm->installed_in_iptable) + bgp_send_pbr_iptable(bpa, bpm, true); + + /* A previous entry may already exist + * flush previous entry if necessary + */ + bpmer.bpme_to_match = bpme; + bpmer.bpme_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_get_remaining_entry, &bpmer); + if (bpmer.bpme_found) { + static struct bgp_pbr_match *local_bpm; + static struct bgp_pbr_action *local_bpa; + + local_bpm = bpmer.bpme_found->backpointer; + local_bpa = local_bpm->action; + bgp_pbr_flush_entry(bgp, local_bpa, + local_bpm, bpmer.bpme_found); + } + + +} + +static void bgp_pbr_handle_entry(struct bgp *bgp, + struct bgp_info *binfo, + struct bgp_pbr_entry_main *api, + bool add) +{ + struct nexthop nh; + int i = 0; + int continue_loop = 1; + float rate = 0; + struct prefix *src = NULL, *dst = NULL; + + if (api->match_bitmask & PREFIX_SRC_PRESENT) + src = &api->src_prefix; + if (api->match_bitmask & PREFIX_DST_PRESENT) + dst = &api->dst_prefix; + memset(&nh, 0, sizeof(struct nexthop)); + nh.vrf_id = VRF_UNKNOWN; + + if (!add) + return bgp_pbr_policyroute_remove_from_zebra(bgp, binfo, + api->vrf_id, src, dst); + /* no action for add = true */ + for (i = 0; i < api->action_num; i++) { + switch (api->actions[i].action) { + case ACTION_TRAFFICRATE: + /* drop packet */ + if (api->actions[i].u.r.rate == 0) { + nh.vrf_id = api->vrf_id; + nh.type = NEXTHOP_TYPE_BLACKHOLE; + bgp_pbr_policyroute_add_to_zebra(bgp, binfo, + api->vrf_id, src, dst, + &nh, &rate); + } else { + /* update rate. can be reentrant */ + rate = api->actions[i].u.r.rate; + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: ignoring Set action rate %f", + api->actions[i].u.r.rate); + } + } + break; + case ACTION_TRAFFIC_ACTION: + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_SAMPLE) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: Sample action Ignored"); + } + } +#if 0 + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_DISTRIBUTE) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: Distribute action Applies"); + } + continue_loop = 0; + /* continue forwarding entry as before + * no action + */ + } +#endif /* XXX to confirm behaviour of traffic action. for now , ignore */ + /* terminate action: run other filters + */ + break; + case ACTION_REDIRECT_IP: + nh.type = NEXTHOP_TYPE_IPV4; + nh.gate.ipv4.s_addr = + api->actions[i].u.zr.redirect_ip_v4.s_addr; + nh.vrf_id = api->vrf_id; + bgp_pbr_policyroute_add_to_zebra(bgp, binfo, + api->vrf_id, + src, dst, + &nh, &rate); + /* XXX combination with REDIRECT_VRF + * + REDIRECT_NH_IP not done + */ + continue_loop = 0; + break; + case ACTION_REDIRECT: + nh.vrf_id = api->actions[i].u.redirect_vrf; + nh.type = NEXTHOP_TYPE_IPV4; + bgp_pbr_policyroute_add_to_zebra(bgp, binfo, + api->vrf_id, + src, dst, + &nh, &rate); + continue_loop = 0; + break; + case ACTION_MARKING: + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: Set DSCP %u Ignored", + api->actions[i].u.marking_dscp); + } + break; + default: + break; + } + if (continue_loop == 0) + break; + } +} + +void bgp_pbr_update_entry(struct bgp *bgp, struct prefix *p, + struct bgp_info *info, afi_t afi, safi_t safi, + bool nlri_update) +{ + struct bgp_pbr_entry_main api; + + if (afi == AFI_IP6) + return; /* IPv6 not supported */ + if (safi != SAFI_FLOWSPEC) + return; /* not supported */ + /* Make Zebra API structure. */ + memset(&api, 0, sizeof(api)); + api.vrf_id = bgp->vrf_id; + api.afi = afi; + + if (bgp_pbr_build_and_validate_entry(p, info, &api) < 0) { + if (BGP_DEBUG(pbr, PBR_ERROR)) + zlog_err("%s: cancel updating entry in bgp pbr", + __func__); + return; + } + bgp_pbr_handle_entry(bgp, info, &api, nlri_update); +} diff --git a/bgpd/bgp_pbr.h b/bgpd/bgp_pbr.h new file mode 100644 index 0000000000..5129ada37b --- /dev/null +++ b/bgpd/bgp_pbr.h @@ -0,0 +1,256 @@ +/* + * BGP pbr + * Copyright (C) 6WIND + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __BGP_PBR_H__ +#define __BGP_PBR_H__ + +#include "nexthop.h" +#include "zclient.h" + +/* flowspec case: 0 to 3 actions maximum: + * 1 redirect + * 1 set dscp + * 1 set traffic rate + */ +#define ACTIONS_MAX_NUM 4 +enum bgp_pbr_action_enum { + ACTION_TRAFFICRATE = 1, + ACTION_TRAFFIC_ACTION = 2, + ACTION_REDIRECT = 3, + ACTION_MARKING = 4, + ACTION_REDIRECT_IP = 5 +}; + +#define TRAFFIC_ACTION_SAMPLE (1 << 0) +#define TRAFFIC_ACTION_TERMINATE (1 << 1) +#define TRAFFIC_ACTION_DISTRIBUTE (1 << 2) + +#define OPERATOR_COMPARE_LESS_THAN (1<<1) +#define OPERATOR_COMPARE_GREATER_THAN (1<<2) +#define OPERATOR_COMPARE_EQUAL_TO (1<<3) +#define OPERATOR_COMPARE_EXACT_MATCH (1<<4) + +#define OPERATOR_UNARY_OR (1<<1) +#define OPERATOR_UNARY_AND (1<<2) + +/* struct used to store values [0;65535] + * this can be used for port number of protocol + */ +#define BGP_PBR_MATCH_VAL_MAX 5 + +struct bgp_pbr_match_val { + uint16_t value; + uint8_t compare_operator; + uint8_t unary_operator; +} bgp_pbr_value_t; + +#define FRAGMENT_DONT 1 +#define FRAGMENT_IS 2 +#define FRAGMENT_FIRST 4 +#define FRAGMENT_LAST 8 + +struct bgp_pbr_fragment_val { + uint8_t bitmask; +}; + +struct bgp_pbr_entry_action { + /* used to store enum bgp_pbr_action_enum enumerate */ + uint8_t action; + union { + union { + uint8_t rate_info[4]; /* IEEE.754.1985 */ + float rate; + } r __attribute__((aligned(8))); + struct _pbr_action { + uint8_t do_sample; + uint8_t filter; + } za; + vrf_id_t redirect_vrf; + struct _pbr_redirect_ip { + struct in_addr redirect_ip_v4; + uint8_t duplicate; + } zr; + uint8_t marking_dscp; + } u __attribute__((aligned(8))); +}; + +/* BGP Policy Route structure */ +struct bgp_pbr_entry_main { + uint8_t type; + uint16_t instance; + + uint32_t flags; + + uint8_t message; + + /* + * This is an enum but we are going to treat it as a uint8_t + * for purpose of encoding/decoding + */ + afi_t afi; + safi_t safi; + +#define PREFIX_SRC_PRESENT (1 << 0) +#define PREFIX_DST_PRESENT (1 << 1) +#define FRAGMENT_PRESENT (1 << 2) + uint8_t match_bitmask; + + uint8_t match_src_port_num; + uint8_t match_dst_port_num; + uint8_t match_port_num; + uint8_t match_protocol_num; + uint8_t match_icmp_type_num; + uint8_t match_icmp_code_num; + uint8_t match_packet_length_num; + uint8_t match_dscp_num; + uint8_t match_tcpflags_num; + + struct prefix src_prefix; + struct prefix dst_prefix; + + struct bgp_pbr_match_val protocol[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val src_port[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val dst_port[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val port[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val icmp_type[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val icmp_code[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val packet_length[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val dscp[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val tcpflags[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_fragment_val fragment; + + uint16_t action_num; + struct bgp_pbr_entry_action actions[ACTIONS_MAX_NUM]; + + uint8_t distance; + + uint32_t metric; + + route_tag_t tag; + + uint32_t mtu; + + vrf_id_t vrf_id; +}; + +struct bgp_pbr_match { + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; + + /* mapped on enum ipset_type + */ + uint32_t type; + +#define MATCH_IP_SRC_SET (1 << 0) +#define MATCH_IP_DST_SET (1 << 1) + uint32_t flags; + + vrf_id_t vrf_id; + + /* unique identifier for ipset create transaction + */ + uint32_t unique; + + /* unique identifier for iptable add transaction + */ + uint32_t unique2; + + bool installed; + bool install_in_progress; + + bool installed_in_iptable; + bool install_iptable_in_progress; + + struct hash *entry_hash; + + struct bgp_pbr_action *action; + +}; + +struct bgp_pbr_match_entry { + struct bgp_pbr_match *backpointer; + + uint32_t unique; + + struct prefix src; + struct prefix dst; + + bool installed; + bool install_in_progress; +}; + +struct bgp_pbr_action { + + /* + * The Unique identifier of this specific pbrms + */ + uint32_t unique; + + uint32_t fwmark; + + uint32_t table_id; + + float rate; + + /* + * nexthop information, or drop information + * contains src vrf_id and nh contains dest vrf_id + */ + vrf_id_t vrf_id; + struct nexthop nh; + + bool installed; + bool install_in_progress; + uint32_t refcnt; + struct bgp *bgp; +}; + +extern struct bgp_pbr_action *bgp_pbr_action_rule_lookup(vrf_id_t vrf_id, + uint32_t unique); + +extern struct bgp_pbr_match *bgp_pbr_match_ipset_lookup(vrf_id_t vrf_id, + uint32_t unique); + +extern struct bgp_pbr_match_entry *bgp_pbr_match_ipset_entry_lookup( + vrf_id_t vrf_id, char *name, + uint32_t unique); +extern struct bgp_pbr_match *bgp_pbr_match_iptable_lookup(vrf_id_t vrf_id, + uint32_t unique); + +extern void bgp_pbr_cleanup(struct bgp *bgp); +extern void bgp_pbr_init(struct bgp *bgp); + +extern uint32_t bgp_pbr_action_hash_key(void *arg); +extern int bgp_pbr_action_hash_equal(const void *arg1, + const void *arg2); +extern uint32_t bgp_pbr_match_entry_hash_key(void *arg); +extern int bgp_pbr_match_entry_hash_equal(const void *arg1, + const void *arg2); +extern uint32_t bgp_pbr_match_hash_key(void *arg); +extern int bgp_pbr_match_hash_equal(const void *arg1, + const void *arg2); + +void bgp_pbr_print_policy_route(struct bgp_pbr_entry_main *api); + +struct bgp_node; +struct bgp_info; +extern void bgp_pbr_update_entry(struct bgp *bgp, struct prefix *p, + struct bgp_info *new_select, + afi_t afi, safi_t safi, + bool nlri_update); + +#endif /* __BGP_PBR_H__ */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 299486128c..cfaa04a8c9 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -75,6 +75,7 @@ #include "bgpd/bgp_evpn_vty.h" #include "bgpd/bgp_flowspec.h" #include "bgpd/bgp_flowspec_util.h" +#include "bgpd/bgp_pbr.h" #ifndef VTYSH_EXTRACT_PL #include "bgpd/bgp_route_clippy.c" @@ -2231,7 +2232,6 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_node *rn, /* If best route remains the same and this is not due to user-initiated * clear, see exactly what needs to be done. */ - if (old_select && old_select == new_select && !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR) && !CHECK_FLAG(old_select->flags, BGP_INFO_ATTR_CHANGED) @@ -4626,7 +4626,7 @@ static void bgp_static_update_safi(struct bgp *bgp, struct prefix *p, if (bgp_static->encap_tunneltype == BGP_ENCAP_TYPE_VXLAN) { struct bgp_encap_type_vxlan bet; memset(&bet, 0, sizeof(struct bgp_encap_type_vxlan)); - bet.vnid = p->u.prefix_evpn.eth_tag; + bet.vnid = p->u.prefix_evpn.prefix_addr.eth_tag; bgp_encap_type_vxlan_to_tlv(&bet, &attr); } if (bgp_static->router_mac) { @@ -5101,10 +5101,10 @@ int bgp_static_set_safi(afi_t afi, safi_t safi, struct vty *vty, return CMD_WARNING_CONFIG_FAILED; } if ((gw_ip.family == AF_INET - && IS_EVPN_PREFIX_IPADDR_V6( + && is_evpn_prefix_ipaddr_v6( (struct prefix_evpn *)&p)) || (gw_ip.family == AF_INET6 - && IS_EVPN_PREFIX_IPADDR_V4( + && is_evpn_prefix_ipaddr_v4( (struct prefix_evpn *)&p))) { vty_out(vty, "%% GatewayIp family differs with IP prefix\n"); @@ -7129,10 +7129,10 @@ void route_vty_out_overlay(struct vty *vty, struct prefix *p, vty_out(vty, "%s", str); XFREE(MTYPE_TMP, str); - if (IS_EVPN_PREFIX_IPADDR_V4((struct prefix_evpn *)p)) { + if (is_evpn_prefix_ipaddr_v4((struct prefix_evpn *)p)) { vty_out(vty, "/%s", inet_ntoa(attr->evpn_overlay.gw_ip.ipv4)); - } else if (IS_EVPN_PREFIX_IPADDR_V6((struct prefix_evpn *)p)) { + } else if (is_evpn_prefix_ipaddr_v6((struct prefix_evpn *)p)) { vty_out(vty, "/%s", inet_ntop(AF_INET6, &(attr->evpn_overlay.gw_ip.ipv6), buf, @@ -11398,14 +11398,15 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp, prefix_rd2str(prd, rdbuf, sizeof(rdbuf)); if (p->u.prefix_evpn.route_type == 5) { char local_buf[PREFIX_STRLEN]; - uint8_t family = IS_EVPN_PREFIX_IPADDR_V4(( + uint8_t family = is_evpn_prefix_ipaddr_v4(( struct prefix_evpn *)p) ? AF_INET : AF_INET6; - inet_ntop(family, &p->u.prefix_evpn.ip.ip.addr, + inet_ntop(family, + &p->u.prefix_evpn.prefix_addr.ip.ip.addr, local_buf, PREFIX_STRLEN); sprintf(buf, "%s/%u", local_buf, - p->u.prefix_evpn.ip_prefix_length); + p->u.prefix_evpn.prefix_addr.ip_prefix_length); } else { prefix2str(p, buf, sizeof(buf)); } @@ -11417,7 +11418,8 @@ static void bgp_config_write_network_evpn(struct vty *vty, struct bgp *bgp, sizeof(buf2)); vty_out(vty, " network %s rd %s ethtag %u label %u esi %s gwip %s routermac %s\n", - buf, rdbuf, p->u.prefix_evpn.eth_tag, + buf, rdbuf, + p->u.prefix_evpn.prefix_addr.eth_tag, decode_label(&bgp_static->label), esi, buf2, macrouter); diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 89b8eb70cd..00e5677fe0 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -320,7 +320,8 @@ static inline void bgp_bump_version(struct bgp_node *node) static inline int bgp_fibupd_safi(safi_t safi) { if (safi == SAFI_UNICAST || safi == SAFI_MULTICAST - || safi == SAFI_LABELED_UNICAST) + || safi == SAFI_LABELED_UNICAST + || safi == SAFI_FLOWSPEC) return 1; return 0; } diff --git a/bgpd/bgp_routemap.c b/bgpd/bgp_routemap.c index 4cc889286e..63400f7d31 100644 --- a/bgpd/bgp_routemap.c +++ b/bgpd/bgp_routemap.c @@ -635,7 +635,7 @@ static route_map_result_t route_match_mac_address(void *rule, p.family = AF_ETHERNET; p.prefixlen = ETH_ALEN * 8; - p.u.prefix_eth = prefix->u.prefix_evpn.mac; + p.u.prefix_eth = prefix->u.prefix_evpn.macip_addr.mac; return (access_list_apply(alist, &p) == FILTER_DENY ? RMAP_NOMATCH diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 581b4e6f5b..93a509c219 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -56,6 +56,7 @@ #include "bgpd/bgp_evpn.h" #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_labelpool.h" +#include "bgpd/bgp_pbr.h" /* All information about zebra. */ struct zclient *zclient = NULL; @@ -997,6 +998,9 @@ static int bgp_table_map_apply(struct route_map *map, struct prefix *p, static struct thread *bgp_tm_thread_connect; static bool bgp_tm_status_connected; +static bool bgp_tm_chunk_obtained; +#define BGP_FLOWSPEC_TABLE_CHUNK 100000 +static uint32_t bgp_tm_min, bgp_tm_max, bgp_tm_chunk_size; static int bgp_zebra_tm_connect(struct thread *t) { @@ -1017,12 +1021,27 @@ static int bgp_zebra_tm_connect(struct thread *t) if (!bgp_tm_status_connected) zlog_debug("Connecting to table manager. Success"); bgp_tm_status_connected = true; + if (!bgp_tm_chunk_obtained) { + if (bgp_zebra_get_table_range(bgp_tm_chunk_size, + &bgp_tm_min, + &bgp_tm_max) >= 0) + bgp_tm_chunk_obtained = true; + } } thread_add_timer(bm->master, bgp_zebra_tm_connect, zclient, delay, &bgp_tm_thread_connect); return 0; } +uint32_t bgp_zebra_tm_get_id(void) +{ + static int table_id; + + if (!bgp_tm_chunk_obtained) + return ++table_id; + return bgp_tm_min++; +} + void bgp_zebra_init_tm_connect(void) { int delay = 1; @@ -1032,6 +1051,9 @@ void bgp_zebra_init_tm_connect(void) if (bgp_tm_thread_connect != NULL) return; bgp_tm_status_connected = false; + bgp_tm_chunk_obtained = false; + bgp_tm_min = bgp_tm_max = 0; + bgp_tm_chunk_size = BGP_FLOWSPEC_TABLE_CHUNK; thread_add_timer(bm->master, bgp_zebra_tm_connect, zclient, delay, &bgp_tm_thread_connect); } @@ -1173,6 +1195,10 @@ void bgp_zebra_announce(struct bgp_node *rn, struct prefix *p, if (bgp_debug_zebra(p)) prefix2str(&api.prefix, buf_prefix, sizeof(buf_prefix)); + if (safi == SAFI_FLOWSPEC) + return bgp_pbr_update_entry(bgp, &rn->p, + info, afi, safi, true); + /* * vrf leaking support (will have only one nexthop) */ @@ -1459,6 +1485,7 @@ void bgp_zebra_withdraw(struct prefix *p, struct bgp_info *info, struct bgp *bgp, safi_t safi) { struct zapi_route api; + struct peer *peer; /* Don't try to install if we're not connected to Zebra or Zebra doesn't * know of this instance. @@ -1466,6 +1493,12 @@ void bgp_zebra_withdraw(struct prefix *p, struct bgp_info *info, if (!bgp_install_info_to_zebra(bgp)) return; + if (safi == SAFI_FLOWSPEC) { + peer = info->peer; + return bgp_pbr_update_entry(peer->bgp, p, + info, AFI_IP, safi, false); + } + memset(&api, 0, sizeof(api)); memcpy(&api.rmac, &(info->attr->rmac), sizeof(struct ethaddr)); api.vrf_id = bgp->vrf_id; @@ -1908,6 +1941,271 @@ int bgp_zebra_advertise_all_vni(struct bgp *bgp, int advertise) return zclient_send_message(zclient); } +static int rule_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t seqno, priority, unique; + enum zapi_rule_notify_owner note; + struct bgp_pbr_action *bgp_pbra; + ifindex_t ifi; + + if (!zapi_rule_notify_decode(zclient->ibuf, &seqno, &priority, &unique, + &ifi, ¬e)) + return -1; + + bgp_pbra = bgp_pbr_action_rule_lookup(vrf_id, unique); + if (!bgp_pbra) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP rule (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + + switch (note) { + case ZAPI_RULE_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received RULE_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgp_pbra->installed = false; + bgp_pbra->install_in_progress = false; + break; + case ZAPI_RULE_INSTALLED: + bgp_pbra->installed = true; + bgp_pbra->install_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received RULE_INSTALLED", + __PRETTY_FUNCTION__); + break; + case ZAPI_RULE_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received RULE REMOVED", + __PRETTY_FUNCTION__); + break; + } + + return 0; +} + +static int ipset_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t unique; + enum zapi_ipset_notify_owner note; + struct bgp_pbr_match *bgp_pbim; + + if (!zapi_ipset_notify_decode(zclient->ibuf, + &unique, + ¬e)) + return -1; + + bgp_pbim = bgp_pbr_match_ipset_lookup(vrf_id, unique); + if (!bgp_pbim) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP match (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + + switch (note) { + case ZAPI_IPSET_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgp_pbim->installed = false; + bgp_pbim->install_in_progress = false; + break; + case ZAPI_IPSET_INSTALLED: + bgp_pbim->installed = true; + bgp_pbim->install_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_INSTALLED", + __PRETTY_FUNCTION__); + break; + case ZAPI_IPSET_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET REMOVED", + __PRETTY_FUNCTION__); + break; + } + + return 0; +} + +static int ipset_entry_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t unique; + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; + enum zapi_ipset_entry_notify_owner note; + struct bgp_pbr_match_entry *bgp_pbime; + + if (!zapi_ipset_entry_notify_decode( + zclient->ibuf, + &unique, + ipset_name, + ¬e)) + return -1; + bgp_pbime = bgp_pbr_match_ipset_entry_lookup(vrf_id, + ipset_name, + unique); + if (!bgp_pbime) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP match entry (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + + switch (note) { + case ZAPI_IPSET_ENTRY_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_ENTRY_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgp_pbime->installed = false; + bgp_pbime->install_in_progress = false; + break; + case ZAPI_IPSET_ENTRY_INSTALLED: + bgp_pbime->installed = true; + bgp_pbime->install_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_ENTRY_INSTALLED", + __PRETTY_FUNCTION__); + break; + case ZAPI_IPSET_ENTRY_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_ENTRY_REMOVED", + __PRETTY_FUNCTION__); + break; + } + return 0; +} + +static int iptable_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t unique; + enum zapi_iptable_notify_owner note; + struct bgp_pbr_match *bgpm; + + if (!zapi_iptable_notify_decode( + zclient->ibuf, + &unique, + ¬e)) + return -1; + bgpm = bgp_pbr_match_iptable_lookup(vrf_id, unique); + if (!bgpm) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP iptable (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + switch (note) { + case ZAPI_IPTABLE_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPTABLE_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgpm->installed_in_iptable = false; + bgpm->install_iptable_in_progress = false; + break; + case ZAPI_IPTABLE_INSTALLED: + bgpm->installed_in_iptable = true; + bgpm->install_iptable_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPTABLE_INSTALLED", + __PRETTY_FUNCTION__); + bgpm->action->refcnt++; + break; + case ZAPI_IPTABLE_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPTABLE REMOVED", + __PRETTY_FUNCTION__); + break; + } + return 0; +} + +static void bgp_encode_pbr_rule_action(struct stream *s, + struct bgp_pbr_action *pbra) +{ + struct prefix any; + + stream_putl(s, 0); /* seqno unused */ + stream_putl(s, 0); /* ruleno unused */ + + stream_putl(s, pbra->unique); + + memset(&any, 0, sizeof(any)); + any.family = AF_INET; + stream_putc(s, any.family); + stream_putc(s, any.prefixlen); + stream_put(s, &any.u.prefix, prefix_blen(&any)); + + stream_putw(s, 0); /* src port */ + + stream_putc(s, any.family); + stream_putc(s, any.prefixlen); + stream_put(s, &any.u.prefix, prefix_blen(&any)); + + stream_putw(s, 0); /* dst port */ + + stream_putl(s, pbra->fwmark); /* fwmark */ + + stream_putl(s, pbra->table_id); + + stream_putl(s, 0); /* ifindex unused */ +} + +static void bgp_encode_pbr_ipset_match(struct stream *s, + struct bgp_pbr_match *pbim) +{ + stream_putl(s, pbim->unique); + stream_putl(s, pbim->type); + + stream_put(s, pbim->ipset_name, + ZEBRA_IPSET_NAME_SIZE); + + +} + +static void bgp_encode_pbr_ipset_entry_match(struct stream *s, + struct bgp_pbr_match_entry *pbime) +{ + stream_putl(s, pbime->unique); + /* check that back pointer is not null */ + stream_put(s, pbime->backpointer->ipset_name, + ZEBRA_IPSET_NAME_SIZE); + + stream_putc(s, pbime->src.family); + stream_putc(s, pbime->src.prefixlen); + stream_put(s, &pbime->src.u.prefix, prefix_blen(&pbime->src)); + + stream_putc(s, pbime->dst.family); + stream_putc(s, pbime->dst.prefixlen); + stream_put(s, &pbime->dst.u.prefix, prefix_blen(&pbime->dst)); +} + +static void bgp_encode_pbr_iptable_match(struct stream *s, + struct bgp_pbr_action *bpa, + struct bgp_pbr_match *pbm) +{ + stream_putl(s, pbm->unique2); + + stream_putl(s, pbm->type); + + stream_putl(s, pbm->flags); + + /* TODO: correlate with what is contained + * into bgp_pbr_action. + * currently only forward supported + */ + if (bpa->nh.type == NEXTHOP_TYPE_BLACKHOLE) + stream_putl(s, ZEBRA_IPTABLES_DROP); + else + stream_putl(s, ZEBRA_IPTABLES_FORWARD); + stream_putl(s, bpa->fwmark); + stream_put(s, pbm->ipset_name, + ZEBRA_IPSET_NAME_SIZE); +} + /* BGP has established connection with Zebra. */ static void bgp_zebra_connected(struct zclient *zclient) { @@ -2167,6 +2465,10 @@ void bgp_zebra_init(struct thread_master *master) zclient->local_ip_prefix_add = bgp_zebra_process_local_ip_prefix; zclient->local_ip_prefix_del = bgp_zebra_process_local_ip_prefix; zclient->label_chunk = bgp_zebra_process_label_chunk; + zclient->rule_notify_owner = rule_notify_owner; + zclient->ipset_notify_owner = ipset_notify_owner; + zclient->ipset_entry_notify_owner = ipset_entry_notify_owner; + zclient->iptable_notify_owner = iptable_notify_owner; } void bgp_zebra_destroy(void) @@ -2182,3 +2484,176 @@ int bgp_zebra_num_connects(void) { return zclient_num_connects; } + +void bgp_send_pbr_rule_action(struct bgp_pbr_action *pbra, bool install) +{ + struct stream *s; + + if (pbra->install_in_progress) + return; + zlog_debug("%s: table %d fwmark %d %d", __PRETTY_FUNCTION__, + pbra->table_id, pbra->fwmark, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_RULE_ADD : ZEBRA_RULE_DELETE, + VRF_DEFAULT); + stream_putl(s, 1); /* send one pbr action */ + + bgp_encode_pbr_rule_action(s, pbra); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) + pbra->install_in_progress = true; +} + +void bgp_send_pbr_ipset_match(struct bgp_pbr_match *pbrim, bool install) +{ + struct stream *s; + + if (pbrim->install_in_progress) + return; + zlog_debug("%s: name %s type %d %d", __PRETTY_FUNCTION__, + pbrim->ipset_name, pbrim->type, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_IPSET_CREATE : + ZEBRA_IPSET_DESTROY, + VRF_DEFAULT); + + stream_putl(s, 1); /* send one pbr action */ + + bgp_encode_pbr_ipset_match(s, pbrim); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) + pbrim->install_in_progress = true; +} + +void bgp_send_pbr_ipset_entry_match(struct bgp_pbr_match_entry *pbrime, + bool install) +{ + struct stream *s; + + if (pbrime->install_in_progress) + return; + zlog_debug("%s: name %s %d %d", __PRETTY_FUNCTION__, + pbrime->backpointer->ipset_name, + pbrime->unique, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_IPSET_ENTRY_ADD : + ZEBRA_IPSET_ENTRY_DELETE, + VRF_DEFAULT); + + stream_putl(s, 1); /* send one pbr action */ + + bgp_encode_pbr_ipset_entry_match(s, pbrime); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) + pbrime->install_in_progress = true; +} + +void bgp_send_pbr_iptable(struct bgp_pbr_action *pba, + struct bgp_pbr_match *pbm, + bool install) +{ + struct stream *s; + + if (pbm->install_iptable_in_progress) + return; + zlog_debug("%s: name %s type %d mark %d %d", __PRETTY_FUNCTION__, + pbm->ipset_name, pbm->type, pba->fwmark, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_IPTABLE_ADD : + ZEBRA_IPTABLE_DELETE, + VRF_DEFAULT); + + bgp_encode_pbr_iptable_match(s, pba, pbm); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) { + pbm->install_iptable_in_progress = true; + pba->refcnt++; + } +} + +/* inject in table <table_id> a default route to: + * - if nexthop IP is present : to this nexthop + * - if vrf is different from local : to the matching VRF + */ +void bgp_zebra_announce_default(struct bgp *bgp, struct nexthop *nh, + afi_t afi, uint32_t table_id, bool announce) +{ + struct zapi_nexthop *api_nh; + struct zapi_route api; + struct prefix p; + + if (!nh || nh->type != NEXTHOP_TYPE_IPV4 + || nh->vrf_id == VRF_UNKNOWN) + return; + memset(&p, 0, sizeof(struct prefix)); + /* default route */ + if (afi != AFI_IP) + return; + p.family = AF_INET; + memset(&api, 0, sizeof(api)); + api.vrf_id = bgp->vrf_id; + api.type = ZEBRA_ROUTE_BGP; + api.safi = SAFI_UNICAST; + api.prefix = p; + api.tableid = table_id; + api.nexthop_num = 1; + SET_FLAG(api.message, ZAPI_MESSAGE_TABLEID); + SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP); + api_nh = &api.nexthops[0]; + + /* redirect IP */ + if (nh->gate.ipv4.s_addr) { + char buff[PREFIX_STRLEN]; + + api_nh->vrf_id = nh->vrf_id; + api_nh->gate.ipv4 = nh->gate.ipv4; + api_nh->type = NEXTHOP_TYPE_IPV4; + + inet_ntop(AF_INET, &(nh->gate.ipv4), buff, INET_ADDRSTRLEN); + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_info("BGP: sending default route to %s table %d (redirect IP)", + buff, table_id); + zclient_route_send(announce ? ZEBRA_ROUTE_ADD + : ZEBRA_ROUTE_DELETE, + zclient, &api); + } else if (nh->vrf_id != bgp->vrf_id) { + struct vrf *vrf; + struct interface *ifp; + + vrf = vrf_lookup_by_id(nh->vrf_id); + if (!vrf) + return; + /* create default route with interface <VRF> + * with nexthop-vrf <VRF> + */ + ifp = if_lookup_by_name_all_vrf(vrf->name); + if (!ifp) + return; + api_nh->vrf_id = nh->vrf_id; + api_nh->type = NEXTHOP_TYPE_IFINDEX; + api_nh->ifindex = ifp->ifindex; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_info("BGP: sending default route to %s table %d (redirect VRF)", + vrf->name, table_id); + zclient_route_send(announce ? ZEBRA_ROUTE_ADD + : ZEBRA_ROUTE_DELETE, + zclient, &api); + return; + } +} diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 7263317b6f..7ac40fecff 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -25,6 +25,7 @@ extern void bgp_zebra_init(struct thread_master *master); extern void bgp_zebra_init_tm_connect(void); +extern uint32_t bgp_zebra_tm_get_id(void); extern void bgp_zebra_destroy(void); extern int bgp_zebra_get_table_range(uint32_t chunk_size, uint32_t *start, uint32_t *end); @@ -70,4 +71,20 @@ extern int bgp_zebra_advertise_all_vni(struct bgp *, int); extern int bgp_zebra_num_connects(void); +struct bgp_pbr_action; +struct bgp_pbr_match; +struct bgp_pbr_match_entry; +extern void bgp_send_pbr_rule_action(struct bgp_pbr_action *pbra, + bool install); +extern void bgp_send_pbr_ipset_match(struct bgp_pbr_match *pbrim, + bool install); +extern void bgp_send_pbr_ipset_entry_match(struct bgp_pbr_match_entry *pbrime, + bool install); +extern void bgp_send_pbr_iptable(struct bgp_pbr_action *pba, + struct bgp_pbr_match *pbm, + bool install); + +extern void bgp_zebra_announce_default(struct bgp *bgp, struct nexthop *nh, + afi_t afi, uint32_t table_id, bool announce); + #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index ccfa90419b..a331fad5d4 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -83,6 +83,7 @@ #include "bgpd/bgp_ecommunity.h" #include "bgpd/bgp_flowspec.h" #include "bgpd/bgp_labelpool.h" +#include "bgpd/bgp_pbr.h" DEFINE_MTYPE_STATIC(BGPD, PEER_TX_SHUTDOWN_MSG, "Peer shutdown message (TX)"); DEFINE_QOBJ_TYPE(bgp_master) @@ -3006,6 +3007,7 @@ static struct bgp *bgp_create(as_t *as, const char *name, bf_assign_index(bm->rd_idspace, bgp->vrf_rd_id); bgp_evpn_init(bgp); + bgp_pbr_init(bgp); return bgp; } @@ -3401,7 +3403,7 @@ void bgp_free(struct bgp *bgp) bf_release_index(bm->rd_idspace, bgp->vrf_rd_id); bgp_evpn_cleanup(bgp); - + bgp_pbr_cleanup(bgp); if (bgp->name) XFREE(MTYPE_BGP, bgp->name); if (bgp->name_pretty) diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 576c89f25e..470fd10850 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -400,6 +400,25 @@ struct bgp { /* Allocate MPLS labels */ uint8_t allocate_mpls_labels[AFI_MAX][SAFI_MAX]; + /* Allocate hash entries to store policy routing information + * The hash are used to host pbr rules somewhere. + * Actually, pbr will only be used by flowspec + * those hash elements will have relationship together as + * illustrated in below diagram: + * + * pbr_action a <----- pbr_match i <--- pbr_match_entry 1..n + * <----- pbr_match j <--- pbr_match_entry 1..m + * + * - here in BGP structure, the list of match and actions will + * stand for the list of ipset sets, and table_ids in the kernel + * - the arrow above between pbr_match and pbr_action indicate + * that a backpointer permits match to find the action + * - the arrow betwen match_entry and match is a hash list + * contained in match, that lists the whole set of entries + */ + struct hash *pbr_match_hash; + struct hash *pbr_action_hash; + /* timer to re-evaluate neighbor default-originate route-maps */ struct thread *t_rmap_def_originate_eval; #define RMAP_DEFAULT_ORIGINATE_EVAL_TIMER 5 diff --git a/doc/developer/index.rst b/doc/developer/index.rst index 17a7e1c0ba..2f4b96bc1f 100644 --- a/doc/developer/index.rst +++ b/doc/developer/index.rst @@ -6,6 +6,7 @@ Welcome to FRR's documentation! workflow building + process-architecture library bgpd ospf diff --git a/doc/developer/library.rst b/doc/developer/library.rst index c5ce1f5982..f6efa33051 100644 --- a/doc/developer/library.rst +++ b/doc/developer/library.rst @@ -1,3 +1,5 @@ +.. _libfrr: + *************************** Library Facilities (libfrr) *************************** diff --git a/doc/developer/process-architecture.rst b/doc/developer/process-architecture.rst new file mode 100644 index 0000000000..806afa644c --- /dev/null +++ b/doc/developer/process-architecture.rst @@ -0,0 +1,320 @@ +.. _process-architecture: + +Process Architecture +==================== + +FRR inherited its overall design architecture from Quagga. The chosen model for +Quagga is that of a suite of independent daemons that do IPC via Unix domain +sockets. Within each daemon, the architecture follows the event-driven model. +FRR has inherited this model as well. As FRR is deployed at larger scales and +gains ever more features, each adding to the overall processing workload, we +are approaching the saturation point for a single thread per daemon. In light +of this, there are ongoing efforts to introduce multithreading to various +components of FRR. This document aims to describe the current design choices +and overall model for integrating the event-driven and multithreaded +architectures into a cohesive whole. + +Terminology +----------- +Because this document describes the architecture for true kernel threads as +well as the event system, a digression on terminology is in order here. + +Historically Quagga's event system was viewed as an implementation of userspace +threading. Because of this design choice, the names for various datastructures +within the event system are variations on the term "thread". The primary +context datastructure in this system is called a "threadmaster". What would +today be called an 'event' or 'task' in systems such as libevent are called +"threads" and the datastructure for them is ``struct thread``. To add to the +confusion, these "threads" have various types, one of which is "event". To +hopefully avoid some of this confusion, this document refers to these "threads" +as a 'task' except where the datastructures are explicitly named. When they are +explicitly named, they will be formatted ``like this`` to differentiate from +the conceptual names. When speaking of kernel threads, the term used will be +"pthread" since FRR's kernel threading implementation is POSIX threads. + +.. This should be broken into its document under :ref:`libfrr` +.. _event-architecture: + +Event Architecture +------------------ +This section presents a brief overview of the event model as currently +implemented in FRR. This doc should be expanded and broken off into its own +section. For now it provides basic information necessary to understand the +interplay between the event system and kernel threads. + +The core event system is implemented in :file:`lib/thread.[ch]`. The primary +structure is ``struct thread_master``, hereafter referred to as a +``threadmaster``. A ``threadmaster`` is a global state object, or context, that +holds all the tasks currently pending execution as well as statistics on tasks +that have already executed. The event system is driven by adding tasks to this +data structure and then calling a function to retrieve the next task to +execute. At initialization, a daemon will typically create one +``threadmaster``, add a small set of initial tasks, and then run a loop to +fetch each task and execute it. + +These tasks have various types corresponding to their general action. The types +are given by integer macros in :file:`thread.h` and are: + +``THREAD_READ`` + Task which waits for a file descriptor to become ready for reading and then + executes. + +``THREAD_WRITE`` + Task which waits for a file descriptor to become ready for writing and then + executes. + +``THREAD_TIMER`` + Task which executes after a certain amount of time has passed since it was + scheduled. + +``THREAD_EVENT`` + Generic task that executes with high priority and carries an arbitrary + integer indicating the event type to its handler. These are commonly used to + implement the finite state machines typically found in routing protocols. + +``THREAD_READY`` + Type used internally for tasks on the ready queue. + +``THREAD_UNUSED`` + Type used internally for ``struct thread`` objects that aren't being used. + The event system pools ``struct thread`` to avoid heap allocations; this is + the type they have when they're in the pool. + +``THREAD_EXECUTE`` + Just before a task is run its type is changed to this. This is used to show + ``X`` as the type in the output of :clicmd:`show thread cpu`. + +The programmer never has to work with these types explicitly. Each type of task +is created and queued via special-purpose functions (actually macros, but +irrelevant for the time being) for the specific type. For example, to add a +``THREAD_READ`` task, you would call + +:: + + thread_add_read(struct thread_master *master, int (*handler)(struct thread *), void *arg, int fd, struct thread **ref); + +The ``struct thread`` is then created and added to the appropriate internal +datastructure within the ``threadmaster``. + +The Event Loop +^^^^^^^^^^^^^^ +To use the event system, after creating a ``threadmaster`` the program adds an +initial set of tasks. As these tasks execute, they add more tasks that execute +at some point in the future. This sequence of tasks drives the lifecycle of the +program. When no more tasks are available, the program dies. Typically at +startup the first task added is an I/O task for VTYSH as well as any network +sockets needed for peerings or IPC. + +To retrieve the next task to run the program calls ``thread_fetch()``. +``thread_fetch()`` internally computes which task to execute next based on +rudimentary priority logic. Events (type ``THREAD_EVENT``) execute with the +highest priority, followed by expired timers and finally I/O tasks (type +``THREAD_READ`` and ``THREAD_WRITE``). When scheduling a task a function and an +arbitrary argument are provided. The task returned from ``thread_fetch()`` is +then executed with ``thread_call()``. + +The following diagram illustrates a simplified version of this infrastructure. + +.. todo: replace these with SVG +.. figure:: ../figures/threadmaster-single.png + :align: center + + Lifecycle of a program using a single threadmaster. + +The series of "task" boxes represents the current ready task queue. The various +other queues for other types are not shown. The fetch-execute loop is +illustrated at the bottom. + +Mapping the general names used in the figure to specific FRR functions: + +- ``task`` is ``struct thread *`` +- ``fetch`` is ``thread_fetch()`` +- ``exec()`` is ``thread_call`` +- ``cancel()`` is ``thread_cancel()`` +- ``schedule()`` is any of the various task-specific ``thread_add_*`` functions + +Adding tasks is done with various task-specific function-like macros. These +macros wrap underlying functions in :file:`thread.c` to provide additional +information added at compile time, such as the line number the task was +scheduled from, that can be accessed at runtime for debugging, logging and +informational purposes. Each task type has its own specific scheduling function +that follow the naming convention ``thread_add_<type>``; see :file:`thread.h` +for details. + +There are some gotchas to keep in mind: + +- I/O tasks are keyed off the file descriptor associated with the I/O + operation. This means that for any given file descriptor, only one of each + type of I/O task (``THREAD_READ`` and ``THREAD_WRITE``) can be scheduled. For + example, scheduling two write tasks one after the other will overwrite the + first task with the second, resulting in total loss of the first task and + difficult bugs. + +- Timer tasks are only as accurate as the monotonic clock provided by the + underlying operating system. + +- Memory management of the arbitrary handler argument passed in the schedule + call is the responsibility of the caller. + + +Kernel Thread Architecture +-------------------------- +Efforts have begun to introduce kernel threads into FRR to improve performance +and stability. Naturally a kernel thread architecture has long been seen as +orthogonal to an event-driven architecture, and the two do have significant +overlap in terms of design choices. Since the event model is tightly integrated +into FRR, careful thought has been put into how pthreads are introduced, what +role they fill, and how they will interoperate with the event model. + +Design Overview +^^^^^^^^^^^^^^^ +Each kernel thread behaves as a lightweight process within FRR, sharing the +same process memory space. On the other hand, the event system is designed to +run in a single process and drive serial execution of a set of tasks. With this +consideration, a natural choice is to implement the event system within each +kernel thread. This allows us to leverage the event-driven execution model with +the currently existing task and context primitives. In this way the familiar +execution model of FRR gains the ability to execute tasks simultaneously while +preserving the existing model for concurrency. + +The following figure illustrates the architecture with multiple pthreads, each +running their own ``threadmaster``-based event loop. + +.. todo: replace these with SVG +.. figure:: ../figures/threadmaster-multiple.png + :align: center + + Lifecycle of a program using multiple pthreads, each running their own + ``threadmaster`` + +Each roundrect represents a single pthread running the same event loop +described under :ref:`event-architecture`. Note the arrow from the ``exec()`` +box on the right to the ``schedule()`` box in the middle pthread. This +illustrates code running in one pthread scheduling a task onto another +pthread's threadmaster. A global lock for each ``threadmaster`` is used to +synchronize these operations. The pthread names are examples. + + +.. This should be broken into its document under :ref:`libfrr` +.. _kernel-thread-wrapper: + +Kernel Thread Wrapper +^^^^^^^^^^^^^^^^^^^^^ +The basis for the integration of pthreads and the event system is a lightweight +wrapper for both systems implemented in :file:`lib/frr_pthread.[ch]`. The +header provides a core datastructure, ``struct frr_pthread``, that encapsulates +structures from both POSIX threads and :file:`thread.[ch]`. In particular, this +datastructure has a pointer to a ``threadmaster`` that runs within the pthread. +It also has fields for a name as well as start and stop functions that have +signatures similar to the POSIX arguments for ``pthread_create()``. + +Calling ``frr_pthread_new()`` creates and registers a new ``frr_pthread``. The +returned structure has a pre-initialized ``threadmaster``, and its ``start`` +and ``stop`` functions are initialized to defaults that will run a basic event +loop with the given threadmaster. Calling ``frr_pthread_run`` starts the thread +with the ``start`` function. From there, the model is the same as the regular +event model. To schedule tasks on a particular pthread, simply use the regular +:file:`thread.c` functions as usual and provide the ``threadmaster`` pointed to +from the ``frr_pthread``. As part of implementing the wrapper, the +:file:`thread.c` functions were made thread-safe. Consequently, it is safe to +schedule events on a ``threadmaster`` belonging both to the calling thread as +well as *any other pthread*. This serves as the basis for inter-thread +communication and boils down to a slightly more complicated method of message +passing, where the messages are the regular task events as used in the +event-driven model. The only difference is thread cancellation, which requires +calling ``thread_cancel_async()`` instead of ``thread_cancel`` to cancel a task +currently scheduled on a ``threadmaster`` belonging to a different pthread. +This is necessary to avoid race conditions in the specific case where one +pthread wants to guarantee that a task on another pthread is cancelled before +proceeding. + +In addition, the existing commands to show statistics and other information for +tasks within the event driven model have been expanded to handle multiple +pthreads; running :clicmd:`show thread cpu` will display the usual event +breakdown, but it will do so for each pthread running in the program. For +example, :ref:`bgpd` runs a dedicated I/O pthread and shows the following +output for :clicmd:`show thread cpu`: + +:: + + frr# show thread cpu + + Thread statistics for bgpd: + + Showing statistics for pthread main + ------------------------------------ + CPU (user+system): Real (wall-clock): + Active Runtime(ms) Invoked Avg uSec Max uSecs Avg uSec Max uSecs Type Thread + 0 1389.000 10 138900 248000 135549 255349 T subgroup_coalesce_timer + 0 0.000 1 0 0 18 18 T bgp_startup_timer_expire + 0 850.000 18 47222 222000 47795 233814 T work_queue_run + 0 0.000 10 0 0 6 14 T update_subgroup_merge_check_thread_cb + 0 0.000 8 0 0 117 160 W zclient_flush_data + 2 2.000 1 2000 2000 831 831 R bgp_accept + 0 1.000 1 1000 1000 2832 2832 E zclient_connect + 1 42082.000 240574 174 37000 178 72810 R vtysh_read + 1 152.000 1885 80 2000 96 6292 R zclient_read + 0 549346.000 2997298 183 7000 153 20242 E bgp_event + 0 2120.000 300 7066 14000 6813 22046 T (bgp_holdtime_timer) + 0 0.000 2 0 0 57 59 T update_group_refresh_default_originate_route_map + 0 90.000 1 90000 90000 73729 73729 T bgp_route_map_update_timer + 0 1417.000 9147 154 48000 132 61998 T bgp_process_packet + 300 71807.000 2995200 23 3000 24 11066 T (bgp_connect_timer) + 0 1894.000 12713 148 45000 112 33606 T (bgp_generate_updgrp_packets) + 0 0.000 1 0 0 105 105 W vtysh_write + 0 52.000 599 86 2000 138 6992 T (bgp_start_timer) + 1 1.000 8 125 1000 164 593 R vtysh_accept + 0 15.000 600 25 2000 15 153 T (bgp_routeadv_timer) + 0 11.000 299 36 3000 53 3128 RW bgp_connect_check + + + Showing statistics for pthread BGP I/O thread + ---------------------------------------------- + CPU (user+system): Real (wall-clock): + Active Runtime(ms) Invoked Avg uSec Max uSecs Avg uSec Max uSecs Type Thread + 0 1611.000 9296 173 13000 188 13685 R bgp_process_reads + 0 2995.000 11753 254 26000 182 29355 W bgp_process_writes + + + Showing statistics for pthread BGP Keepalives thread + ----------------------------------------------------- + CPU (user+system): Real (wall-clock): + Active Runtime(ms) Invoked Avg uSec Max uSecs Avg uSec Max uSecs Type Thread + No data to display yet. + +Attentive readers will notice that there is a third thread, the Keepalives +thread. This thread is responsible for -- surprise -- generating keepalives for +peers. However, there are no statistics showing for that thread. Although the +pthread uses the ``frr_pthread`` wrapper, it opts not to use the embedded +``threadmaster`` facilities. Instead it replaces the ``start`` and ``stop`` +functions with custom functions. This was done because the ``threadmaster`` +facilities introduce a small but significant amount of overhead relative to the +pthread's task. In this case since the pthread does not need the event-driven +model and does not need to receive tasks from other pthreads, it is simpler and +more efficient to implement it outside of the provided event facilities. The +point to take away from this example is that while the facilities to make using +pthreads within FRR easy are already implemented, the wrapper is flexible and +allows usage of other models while still integrating with the rest of the FRR +core infrastructure. Starting and stopping this pthread works the same as it +does for any other ``frr_pthread``; the only difference is that event +statistics are not collected for it, because there are no events. + +Notes on Design and Documentation +--------------------------------- +Because of the choice to embed the existing event system into each pthread +within FRR, at this time there is not integrated support for other models of +pthread use such as divide and conquer. Similarly, there is no explicit support +for thread pooling or similar higher level constructs. The currently existing +infrastructure is designed around the concept of long-running worker threads +responsible for specific jobs within each daemon. This is not to say that +divide and conquer, thread pooling, etc. could not be implemented in the +future. However, designs in this direction must be very careful to take into +account the existing codebase. Introducing kernel threads into programs that +have been written under the assumption of a single thread of execution must be +done very carefully to avoid insidious errors and to ensure the program remains +understandable and maintainable. + +In keeping with these goals, future work on kernel threading should be +extensively documented here and FRR developers should be very careful with +their design choices, as poor choices tightly integrated can prove to be +catastrophic for development efforts in the future. diff --git a/doc/figures/threadmaster-multiple.png b/doc/figures/threadmaster-multiple.png Binary files differnew file mode 100644 index 0000000000..2ded50c4cb --- /dev/null +++ b/doc/figures/threadmaster-multiple.png diff --git a/doc/figures/threadmaster-single.png b/doc/figures/threadmaster-single.png Binary files differnew file mode 100644 index 0000000000..a068389b2a --- /dev/null +++ b/doc/figures/threadmaster-single.png diff --git a/doc/figures/threadmaster.svg b/doc/figures/threadmaster.svg new file mode 100644 index 0000000000..a8d2c6adfe --- /dev/null +++ b/doc/figures/threadmaster.svg @@ -0,0 +1,42 @@ +<svg width="640" height="480" xmlns="http://www.w3.org/2000/svg" xmlns:svg="http://www.w3.org/2000/svg"> + <!-- Created with SVG-edit - http://svg-edit.googlecode.com/ --> + <g> + <title>Layer 1</title> + <rect stroke="#000000" id="svg_14" height="209.999998" width="78.999998" y="42.000002" x="44" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" fill="#ffaaaa"/> + <rect stroke="#000000" id="svg_7" height="391.000012" width="278.000011" y="20" x="24.999992" fill="#e5e5e5"/> + <rect stroke="#000000" id="svg_8" height="371.999978" width="259.000006" y="27.5" x="34.5" fill="#aad4ff"/> + <text stroke="#000000" transform="matrix(0.6990958452224731,0,0,0.7454545497894287,7.038336245343089,56.89090812206268) " opacity="0.95" xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" id="svg_19" y="259.756097" x="80.760476" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">I/O</text> + <text id="svg_35" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="139.834812" x="60.305235" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">task3</text> + <text id="svg_36" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="143.062739" x="62.867657" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">task3</text> + <rect stroke="#000000" id="svg_9" height="196.000002" width="53" y="35" x="44" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" fill="#ffaaaa"/> + <text id="svg_40" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="144.681173" x="161.373439" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <rect id="svg_15" stroke="#000000" height="196.000002" width="53" y="35" x="104" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" fill="#ffaaaa"/> + <rect id="svg_16" stroke="#000000" height="196.000002" width="53" y="35" x="164" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" fill="#ffaaaa"/> + <rect id="svg_17" stroke="#000000" height="196.000002" width="53" y="35" x="225" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" fill="#a2f9a2"/> + <text id="svg_20" stroke="#000000" transform="matrix(0.5625633135745076,0,0,0.5774844344081629,24.86726517334988,96.24168390657653) " opacity="0.95" xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="264.951043" x="181.542341" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">Timer</text> + <text id="svg_21" stroke="#000000" transform="matrix(0.5749753656942643,0,0,0.6334746061662456,38.254388934566045,86.97425370192815) " opacity="0.95" xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="256.836039" x="263.041264" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">Event</text> + <text id="svg_22" stroke="#000000" transform="matrix(0.5985540174236015,0,0,0.6031454293478589,42.9067138136931,90.27170546109113) " opacity="0.95" xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="263.072047" x="346.818415" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">Ready</text> + <line id="svg_24" y2="62" x2="278.034186" y1="62" x1="44" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" stroke="#000000" fill="none"/> + <line id="svg_25" y2="90" x2="278.034186" y1="90" x1="44" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" stroke="#000000" fill="none"/> + <line id="svg_26" y2="118.5" x2="278.034186" y1="118.5" x1="44" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" stroke="#000000" fill="none"/> + <rect id="svg_27" height="45" width="182" y="287" x="103" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" stroke="#000000" fill="#cccccc"/> + <rect id="svg_28" height="45" width="182" y="342" x="103" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" stroke="#000000" fill="#cccccc"/> + <text stroke="#000000" transform="matrix(0.791700541973114,0,0,0.7857142686843872,21.471117571927607,58.28571891784668) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" id="svg_31" y="325.818183" x="222.3407" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">thread_fetch()</text> + <text stroke="#000000" transform="matrix(0.8357433386864528,0,0,0.7633649135312126,6.654719490831912,79.40761438745744) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" id="svg_32" y="380.181209" x="228.949154" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">thread_call()</text> + <text stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" id="svg_33" y="47" x="56" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">task1</text> + <text style="cursor: move;" id="svg_34" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="94.51867" x="58.592092" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">task2</text> + <text style="cursor: move;" id="svg_37" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="141.444306" x="58.584825" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">task9</text> + <text style="cursor: move;" id="svg_38" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="50.812022" x="163.086583" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">task4</text> + <text style="cursor: move;" id="svg_39" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="92.891296" x="164.799726" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">task7</text> + <text id="svg_41" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="138.207438" x="158.803698" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <text id="svg_42" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="54.046518" x="265.881445" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <text id="svg_43" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="96.125793" x="264.168301" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <text id="svg_44" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="143.060369" x="259.02887" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <text id="svg_45" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="50.809651" x="370.383203" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <text id="svg_46" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="99.36266" x="372.096346" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <text id="svg_47" stroke="#000000" transform="matrix(0.5837222373092742,0,0,0.6178813716933291,36.307626092448714,21.493443100305285) " xml:space="preserve" text-anchor="middle" font-family="Monospace" font-size="24" y="141.441935" x="372.096346" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="0" fill="#000000">...</text> + <line stroke="#000000" id="svg_54" y2="286.009435" x2="275" y1="230.500001" x1="275" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" fill="none"/> + <line stroke="#000000" id="svg_57" y2="278.49643" x2="267.246429" y1="286" x1="275.5" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" fill="none"/> + <line id="svg_58" y2="285.754309" x2="274.995691" y1="278.5" x1="282.25" opacity="0.95" stroke-linecap="null" stroke-linejoin="null" stroke-dasharray="null" stroke-width="null" stroke="#000000" fill="none"/> + </g> +</svg> diff --git a/doc/user/installation.rst b/doc/user/installation.rst index 8501054fdb..cd56cbcf50 100644 --- a/doc/user/installation.rst +++ b/doc/user/installation.rst @@ -176,6 +176,14 @@ customize the build to include or exclude specific features and dependencies. With this option, we provide a way to strip out these characters for APK dev package builds. +.. option:: --enable-multipath=X + + Compile FRR with up to X way ECMP supported. This number can be from 0-999. + For backwards compatability with older configure options when setting X = 0, + we will build FRR with 64 way ECMP. This is needed because there are + hardcoded arrays that FRR builds towards, so we need to know how big to + make these arrays at build time. + You may specify any combination of the above options to the configure script. By default, the executables are placed in :file:`/usr/local/sbin` and the configuration files in :file:`/usr/local/etc`. The :file:`/usr/local/` diff --git a/doc/user/ospf6d.rst b/doc/user/ospf6d.rst index 56f95e64b0..71bc047720 100644 --- a/doc/user/ospf6d.rst +++ b/doc/user/ospf6d.rst @@ -101,7 +101,7 @@ OSPF6 interface .. index:: ipv6 ospf6 hello-interval HELLOINTERVAL .. clicmd:: ipv6 ospf6 hello-interval HELLOINTERVAL - Sets interface's Hello Interval. Default 40 + Sets interface's Hello Interval. Default 10 .. index:: ipv6 ospf6 dead-interval DEADINTERVAL .. clicmd:: ipv6 ospf6 dead-interval DEADINTERVAL diff --git a/doc/user/zebra.rst b/doc/user/zebra.rst index 0927a4dbe9..f1d194b1a3 100644 --- a/doc/user/zebra.rst +++ b/doc/user/zebra.rst @@ -31,6 +31,13 @@ Besides the common invocation options (:ref:`common-invocation-options`), the When program terminates, retain routes added by zebra. +.. option:: -e X, --ecmp X + + Run zebra with a limited ecmp ability compared to what it is compiled to. + If you are running zebra on hardware limited functionality you can + force zebra to limit the maximum ecmp allowed to X. This number + is bounded by what you compiled FRR with as the maximum number. + .. program:: configure .. _interface-commands: diff --git a/eigrpd/eigrp_hello.c b/eigrpd/eigrp_hello.c index 6d74670514..d9e89357ca 100644 --- a/eigrpd/eigrp_hello.c +++ b/eigrpd/eigrp_hello.c @@ -630,7 +630,7 @@ static struct eigrp_packet *eigrp_hello_encode(struct eigrp_interface *ei, uint16_t length = EIGRP_HEADER_LEN; // allocate a new packet to be sent - ep = eigrp_packet_new(ei->ifp->mtu, NULL); + ep = eigrp_packet_new(EIGRP_PACKET_MTU(ei->ifp->mtu), NULL); if (ep) { // encode common header feilds diff --git a/eigrpd/eigrp_macros.h b/eigrpd/eigrp_macros.h index eea7a26425..b30e19a867 100644 --- a/eigrpd/eigrp_macros.h +++ b/eigrpd/eigrp_macros.h @@ -35,6 +35,8 @@ //-------------------------------------------------------------------------- +#define EIGRP_PACKET_MTU(mtu) ((mtu) - (sizeof(struct ip))) + /* Topology Macros */ diff --git a/eigrpd/eigrp_packet.c b/eigrpd/eigrp_packet.c index 990d1dc08e..59864532cf 100644 --- a/eigrpd/eigrp_packet.c +++ b/eigrpd/eigrp_packet.c @@ -51,6 +51,7 @@ #include "eigrpd/eigrp_zebra.h" #include "eigrpd/eigrp_vty.h" #include "eigrpd/eigrp_dump.h" +#include "eigrpd/eigrp_macros.h" #include "eigrpd/eigrp_network.h" #include "eigrpd/eigrp_topology.h" #include "eigrpd/eigrp_fsm.h" @@ -1088,7 +1089,7 @@ struct eigrp_packet *eigrp_packet_duplicate(struct eigrp_packet *old, { struct eigrp_packet *new; - new = eigrp_packet_new(nbr->ei->ifp->mtu, nbr); + new = eigrp_packet_new(EIGRP_PACKET_MTU(nbr->ei->ifp->mtu), nbr); new->length = old->length; new->retrans_counter = old->retrans_counter; new->dst = old->dst; diff --git a/eigrpd/eigrp_query.c b/eigrpd/eigrp_query.c index 00234bb35c..dd4231fa00 100644 --- a/eigrpd/eigrp_query.c +++ b/eigrpd/eigrp_query.c @@ -167,6 +167,7 @@ void eigrp_send_query(struct eigrp_interface *ei) struct eigrp_prefix_entry *pe; bool has_tlv = false; bool new_packet = true; + uint16_t eigrp_mtu = EIGRP_PACKET_MTU(ei->ifp->mtu); for (ALL_LIST_ELEMENTS(ei->eigrp->topology_changes_internalIPV4, node, nnode, pe)) { @@ -174,7 +175,7 @@ void eigrp_send_query(struct eigrp_interface *ei) continue; if (new_packet) { - ep = eigrp_packet_new(ei->ifp->mtu, NULL); + ep = eigrp_packet_new(eigrp_mtu, NULL); /* Prepare EIGRP INIT UPDATE header */ eigrp_packet_header_init(EIGRP_OPC_QUERY, ei->eigrp, @@ -197,7 +198,7 @@ void eigrp_send_query(struct eigrp_interface *ei) listnode_add(pe->rij, nbr); } - if (length + EIGRP_TLV_MAX_IPV4_BYTE > (uint16_t)ei->ifp->mtu) { + if (length + EIGRP_TLV_MAX_IPV4_BYTE > eigrp_mtu) { if ((ei->params.auth_type == EIGRP_AUTH_TYPE_MD5) && ei->params.auth_keychain != NULL) { eigrp_make_md5_digest(ei, ep->s, diff --git a/eigrpd/eigrp_reply.c b/eigrpd/eigrp_reply.c index a702c1fbd1..b7490cd492 100644 --- a/eigrpd/eigrp_reply.c +++ b/eigrpd/eigrp_reply.c @@ -85,7 +85,7 @@ void eigrp_send_reply(struct eigrp_neighbor *nbr, struct eigrp_prefix_entry *pe) * End of filtering */ - ep = eigrp_packet_new(ei->ifp->mtu, nbr); + ep = eigrp_packet_new(EIGRP_PACKET_MTU(ei->ifp->mtu), nbr); /* Prepare EIGRP INIT UPDATE header */ eigrp_packet_header_init(EIGRP_OPC_REPLY, eigrp, ep->s, 0, diff --git a/eigrpd/eigrp_siaquery.c b/eigrpd/eigrp_siaquery.c index d398d75724..ff38325465 100644 --- a/eigrpd/eigrp_siaquery.c +++ b/eigrpd/eigrp_siaquery.c @@ -119,7 +119,7 @@ void eigrp_send_siaquery(struct eigrp_neighbor *nbr, struct eigrp_packet *ep; uint16_t length = EIGRP_HEADER_LEN; - ep = eigrp_packet_new(nbr->ei->ifp->mtu, nbr); + ep = eigrp_packet_new(EIGRP_PACKET_MTU(nbr->ei->ifp->mtu), nbr); /* Prepare EIGRP INIT UPDATE header */ eigrp_packet_header_init(EIGRP_OPC_SIAQUERY, nbr->ei->eigrp, ep->s, 0, diff --git a/eigrpd/eigrp_siareply.c b/eigrpd/eigrp_siareply.c index 3b7a82b665..d3dd123f90 100644 --- a/eigrpd/eigrp_siareply.c +++ b/eigrpd/eigrp_siareply.c @@ -118,7 +118,7 @@ void eigrp_send_siareply(struct eigrp_neighbor *nbr, struct eigrp_packet *ep; uint16_t length = EIGRP_HEADER_LEN; - ep = eigrp_packet_new(nbr->ei->ifp->mtu, nbr); + ep = eigrp_packet_new(EIGRP_PACKET_MTU(nbr->ei->ifp->mtu), nbr); /* Prepare EIGRP INIT UPDATE header */ eigrp_packet_header_init(EIGRP_OPC_SIAREPLY, nbr->ei->eigrp, ep->s, 0, diff --git a/eigrpd/eigrp_topology.c b/eigrpd/eigrp_topology.c index 2d1bc46e6b..becb29a95f 100644 --- a/eigrpd/eigrp_topology.c +++ b/eigrpd/eigrp_topology.c @@ -443,17 +443,24 @@ void eigrp_topology_update_node_flags(struct eigrp_prefix_entry *dest) struct eigrp *eigrp = eigrp_lookup(); for (ALL_LIST_ELEMENTS_RO(dest->entries, node, entry)) { - if (((uint64_t)entry->distance - <= (uint64_t)dest->distance * (uint64_t)eigrp->variance) - && entry->distance != EIGRP_MAX_METRIC) // is successor - { - entry->flags |= EIGRP_NEXTHOP_ENTRY_SUCCESSOR_FLAG; - entry->flags &= ~EIGRP_NEXTHOP_ENTRY_FSUCCESSOR_FLAG; - } else if (entry->reported_distance - < dest->fdistance) // is feasible successor - { - entry->flags |= EIGRP_NEXTHOP_ENTRY_FSUCCESSOR_FLAG; - entry->flags &= ~EIGRP_NEXTHOP_ENTRY_SUCCESSOR_FLAG; + if (entry->reported_distance < dest->fdistance) { + // is feasible successor, can be successor + if (((uint64_t)entry->distance + <= (uint64_t)dest->distance + * (uint64_t)eigrp->variance) + && entry->distance != EIGRP_MAX_METRIC) { + // is successor + entry->flags |= + EIGRP_NEXTHOP_ENTRY_SUCCESSOR_FLAG; + entry->flags &= + ~EIGRP_NEXTHOP_ENTRY_FSUCCESSOR_FLAG; + } else { + // is feasible successor only + entry->flags |= + EIGRP_NEXTHOP_ENTRY_FSUCCESSOR_FLAG; + entry->flags &= + ~EIGRP_NEXTHOP_ENTRY_SUCCESSOR_FLAG; + } } else { entry->flags &= ~EIGRP_NEXTHOP_ENTRY_FSUCCESSOR_FLAG; entry->flags &= ~EIGRP_NEXTHOP_ENTRY_SUCCESSOR_FLAG; diff --git a/eigrpd/eigrp_update.c b/eigrpd/eigrp_update.c index bd80ea366f..a3080136b5 100644 --- a/eigrpd/eigrp_update.c +++ b/eigrpd/eigrp_update.c @@ -420,7 +420,7 @@ void eigrp_update_send_init(struct eigrp_neighbor *nbr) struct eigrp_packet *ep; uint16_t length = EIGRP_HEADER_LEN; - ep = eigrp_packet_new(nbr->ei->ifp->mtu, nbr); + ep = eigrp_packet_new(EIGRP_PACKET_MTU(nbr->ei->ifp->mtu), nbr); /* Prepare EIGRP INIT UPDATE header */ if (IS_DEBUG_EIGRP_PACKET(0, RECV)) @@ -533,10 +533,10 @@ void eigrp_update_send_EOT(struct eigrp_neighbor *nbr) struct eigrp *eigrp = ei->eigrp; struct prefix *dest_addr; uint32_t seq_no = eigrp->sequence_number; - uint16_t mtu = ei->ifp->mtu; + uint16_t eigrp_mtu = EIGRP_PACKET_MTU(ei->ifp->mtu); struct route_node *rn; - ep = eigrp_packet_new(mtu, nbr); + ep = eigrp_packet_new(eigrp_mtu, nbr); /* Prepare EIGRP EOT UPDATE header */ eigrp_packet_header_init(EIGRP_OPC_UPDATE, eigrp, ep->s, EIGRP_EOT_FLAG, @@ -557,13 +557,13 @@ void eigrp_update_send_EOT(struct eigrp_neighbor *nbr) if (eigrp_nbr_split_horizon_check(te, ei)) continue; - if ((length + EIGRP_TLV_MAX_IPV4_BYTE) > mtu) { + if ((length + EIGRP_TLV_MAX_IPV4_BYTE) > eigrp_mtu) { eigrp_update_place_on_nbr_queue(nbr, ep, seq_no, length); seq_no++; length = EIGRP_HEADER_LEN; - ep = eigrp_packet_new(mtu, nbr); + ep = eigrp_packet_new(eigrp_mtu, nbr); eigrp_packet_header_init( EIGRP_OPC_UPDATE, nbr->ei->eigrp, ep->s, EIGRP_EOT_FLAG, seq_no, @@ -604,13 +604,14 @@ void eigrp_update_send(struct eigrp_interface *ei) struct eigrp *eigrp = ei->eigrp; struct prefix *dest_addr; uint32_t seq_no = eigrp->sequence_number; + uint16_t eigrp_mtu = EIGRP_PACKET_MTU(ei->ifp->mtu); if (ei->nbrs->count == 0) return; uint16_t length = EIGRP_HEADER_LEN; - ep = eigrp_packet_new(ei->ifp->mtu, NULL); + ep = eigrp_packet_new(eigrp_mtu, NULL); /* Prepare EIGRP INIT UPDATE header */ eigrp_packet_header_init(EIGRP_OPC_UPDATE, eigrp, ep->s, 0, seq_no, 0); @@ -633,8 +634,7 @@ void eigrp_update_send(struct eigrp_interface *ei) if (eigrp_nbr_split_horizon_check(ne, ei)) continue; - if ((length + EIGRP_TLV_MAX_IPV4_BYTE) - > (uint16_t)ei->ifp->mtu) { + if ((length + EIGRP_TLV_MAX_IPV4_BYTE) > eigrp_mtu) { if ((ei->params.auth_type == EIGRP_AUTH_TYPE_MD5) && (ei->params.auth_keychain != NULL)) { eigrp_make_md5_digest(ei, ep->s, @@ -651,7 +651,7 @@ void eigrp_update_send(struct eigrp_interface *ei) eigrp_update_send_to_all_nbrs(ei, ep); length = EIGRP_HEADER_LEN; - ep = eigrp_packet_new(ei->ifp->mtu, NULL); + ep = eigrp_packet_new(eigrp_mtu, NULL); eigrp_packet_header_init(EIGRP_OPC_UPDATE, eigrp, ep->s, 0, seq_no, 0); if ((ei->params.auth_type == EIGRP_AUTH_TYPE_MD5) @@ -790,7 +790,7 @@ static void eigrp_update_send_GR_part(struct eigrp_neighbor *nbr) } } - ep = eigrp_packet_new(ei->ifp->mtu, nbr); + ep = eigrp_packet_new(EIGRP_PACKET_MTU(ei->ifp->mtu), nbr); /* Prepare EIGRP Graceful restart UPDATE header */ eigrp_packet_header_init(EIGRP_OPC_UPDATE, eigrp, ep->s, flags, diff --git a/isisd/isis_redist.c b/isisd/isis_redist.c index 9c61512df4..e903dc8c7f 100644 --- a/isisd/isis_redist.c +++ b/isisd/isis_redist.c @@ -184,8 +184,7 @@ static void isis_redist_update_ext_reach(struct isis_area *area, int level, route_map_result_t map_ret; memcpy(&area_info, info, sizeof(area_info)); - if (redist->metric != 0xffffffff) - area_info.metric = redist->metric; + area_info.metric = redist->metric; if (redist->map_name) { map_ret = @@ -540,7 +539,7 @@ DEFUN (isis_redistribute, int afi; int type; int level; - unsigned long metric; + unsigned long metric = 0; const char *routemap = NULL; family = str2family(argv[idx_afi]->text); @@ -567,9 +566,6 @@ DEFUN (isis_redistribute, return CMD_WARNING_CONFIG_FAILED; } - metric = 0xffffffff; - routemap = NULL; - if (argc > idx_metric_rmap + 1) { if (argv[idx_metric_rmap + 1]->arg[0] == '\0') return CMD_WARNING_CONFIG_FAILED; @@ -651,7 +647,7 @@ DEFUN (isis_default_originate, int family; int originate_type = DEFAULT_ORIGINATE; int level; - unsigned long metric = 0xffffffff; + unsigned long metric = 0; const char *routemap = NULL; family = str2family(argv[idx_afi]->text); @@ -748,7 +744,7 @@ int isis_redist_config_write(struct vty *vty, struct isis_area *area, continue; vty_out(vty, " redistribute %s %s level-%d", family_str, zebra_route_string(type), level); - if (redist->metric != 0xffffffff) + if (redist->metric) vty_out(vty, " metric %u", redist->metric); if (redist->map_name) vty_out(vty, " route-map %s", redist->map_name); @@ -766,7 +762,7 @@ int isis_redist_config_write(struct vty *vty, struct isis_area *area, family_str, level); if (redist->redist == DEFAULT_ORIGINATE_ALWAYS) vty_out(vty, " always"); - if (redist->metric != 0xffffffff) + if (redist->metric) vty_out(vty, " metric %u", redist->metric); if (redist->map_name) vty_out(vty, " route-map %s", redist->map_name); diff --git a/lib/prefix.c b/lib/prefix.c index b38dd94589..05af190e9d 100644 --- a/lib/prefix.c +++ b/lib/prefix.c @@ -1206,54 +1206,104 @@ int str2prefix(const char *str, struct prefix *p) return 0; } -static const char *prefixevpn2str(const struct prefix *p, char *str, int size) +static const char *prefixevpn_ead2str(const struct prefix_evpn *p, char *str, + int size) +{ + snprintf(str, size, "Unsupported EVPN prefix"); + return str; +} + +static const char *prefixevpn_macip2str(const struct prefix_evpn *p, char *str, + int size) { uint8_t family; char buf[PREFIX2STR_BUFFER]; char buf2[ETHER_ADDR_STRLEN]; - if (p->u.prefix_evpn.route_type == 2) { - if (IS_EVPN_PREFIX_IPADDR_NONE((struct prefix_evpn *)p)) - snprintf(str, size, "[%d]:[%s]/%d", - p->u.prefix_evpn.route_type, - prefix_mac2str(&p->u.prefix_evpn.mac, buf2, - sizeof(buf2)), - p->prefixlen); - else { - family = IS_EVPN_PREFIX_IPADDR_V4( - (struct prefix_evpn *)p) - ? AF_INET - : AF_INET6; - snprintf(str, size, "[%d]:[%s]:[%s]/%d", - p->u.prefix_evpn.route_type, - prefix_mac2str(&p->u.prefix_evpn.mac, buf2, - sizeof(buf2)), - inet_ntop(family, &p->u.prefix_evpn.ip.ip.addr, - buf, PREFIX2STR_BUFFER), - p->prefixlen); - } - } else if (p->u.prefix_evpn.route_type == 3) { - family = IS_EVPN_PREFIX_IPADDR_V4((struct prefix_evpn *)p) - ? AF_INET - : AF_INET6; - snprintf(str, size, "[%d]:[%s]/%d", p->u.prefix_evpn.route_type, - inet_ntop(family, &p->u.prefix_evpn.ip.ip.addr, buf, - PREFIX2STR_BUFFER), + if (is_evpn_prefix_ipaddr_none(p)) + snprintf(str, size, "[%d]:[%s]/%d", + p->prefix.route_type, + prefix_mac2str(&p->prefix.macip_addr.mac, + buf2, sizeof(buf2)), p->prefixlen); - } else if (p->u.prefix_evpn.route_type == 5) { - family = IS_EVPN_PREFIX_IPADDR_V4((struct prefix_evpn *)p) + else { + family = is_evpn_prefix_ipaddr_v4(p) ? AF_INET : AF_INET6; - snprintf(str, size, "[%d]:[%u][%s/%d]/%d", - p->u.prefix_evpn.route_type, p->u.prefix_evpn.eth_tag, - inet_ntop(family, &p->u.prefix_evpn.ip.ip.addr, buf, - PREFIX2STR_BUFFER), - p->u.prefix_evpn.ip_prefix_length, p->prefixlen); - } else { - sprintf(str, "Unsupported EVPN route type %d", - p->u.prefix_evpn.route_type); + snprintf(str, size, "[%d]:[%s]:[%s]/%d", + p->prefix.route_type, + prefix_mac2str(&p->prefix.macip_addr.mac, + buf2, sizeof(buf2)), + inet_ntop(family, + &p->prefix.macip_addr.ip.ip.addr, + buf, PREFIX2STR_BUFFER), + p->prefixlen); } + return str; +} + +static const char *prefixevpn_imet2str(const struct prefix_evpn *p, char *str, + int size) +{ + uint8_t family; + char buf[PREFIX2STR_BUFFER]; + + family = is_evpn_prefix_ipaddr_v4(p) + ? AF_INET + : AF_INET6; + snprintf(str, size, "[%d]:[%s]/%d", p->prefix.route_type, + inet_ntop(family, + &p->prefix.imet_addr.ip.ip.addr, buf, + PREFIX2STR_BUFFER), + p->prefixlen); + return str; +} +static const char *prefixevpn_es2str(const struct prefix_evpn *p, char *str, + int size) +{ + snprintf(str, size, "Unsupported EVPN prefix"); + return str; +} + +static const char *prefixevpn_prefix2str(const struct prefix_evpn *p, char *str, + int size) +{ + uint8_t family; + char buf[PREFIX2STR_BUFFER]; + + family = is_evpn_prefix_ipaddr_v4(p) + ? AF_INET + : AF_INET6; + snprintf(str, size, "[%d]:[%u][%s/%d]/%d", + p->prefix.route_type, + p->prefix.prefix_addr.eth_tag, + inet_ntop(family, + &p->prefix.prefix_addr.ip.ip.addr, buf, + PREFIX2STR_BUFFER), + p->prefix.prefix_addr.ip_prefix_length, + p->prefixlen); + return str; +} + +static const char *prefixevpn2str(const struct prefix_evpn *p, char *str, + int size) +{ + switch (p->prefix.route_type) { + case 1: + return prefixevpn_ead2str(p, str, size); + case 2: + return prefixevpn_macip2str(p, str, size); + case 3: + return prefixevpn_imet2str(p, str, size); + case 4: + return prefixevpn_es2str(p, str, size); + case 5: + return prefixevpn_prefix2str(p, str, size); + default: + snprintf(str, size, "Unsupported EVPN prefix"); + break; + } return str; } @@ -1277,7 +1327,7 @@ const char *prefix2str(union prefixconstptr pu, char *str, int size) break; case AF_EVPN: - prefixevpn2str(p, str, size); + prefixevpn2str((const struct prefix_evpn *)p, str, size); break; case AF_FLOWSPEC: diff --git a/lib/prefix.h b/lib/prefix.h index f01c85b811..ab3c05ae74 100644 --- a/lib/prefix.h +++ b/lib/prefix.h @@ -56,26 +56,56 @@ struct ethaddr { #define PREFIX_LEN_ROUTE_TYPE_5_IPV4 (18*8) #define PREFIX_LEN_ROUTE_TYPE_5_IPV6 (30*8) -/* EVPN address (RFC 7432) */ -struct evpn_addr { - uint8_t route_type; +typedef struct esi_t_ { + uint8_t val[10]; +} esi_t; + +struct evpn_ead_addr { + esi_t esi; + uint32_t eth_tag; +}; + +struct evpn_macip_addr { + uint32_t eth_tag; uint8_t ip_prefix_length; struct ethaddr mac; + struct ipaddr ip; +}; + +struct evpn_imet_addr { uint32_t eth_tag; + uint8_t ip_prefix_length; struct ipaddr ip; -#if 0 - union - { - uint8_t addr; - struct in_addr v4_addr; - struct in6_addr v6_addr; - } ip; -#endif }; -#define IS_EVPN_PREFIX_IPADDR_NONE(evp) IS_IPADDR_NONE(&(evp)->prefix.ip) -#define IS_EVPN_PREFIX_IPADDR_V4(evp) IS_IPADDR_V4(&(evp)->prefix.ip) -#define IS_EVPN_PREFIX_IPADDR_V6(evp) IS_IPADDR_V6(&(evp)->prefix.ip) +struct evpn_es_addr { + esi_t esi; + uint8_t ip_prefix_length; + struct ipaddr ip; +}; + +struct evpn_prefix_addr { + uint32_t eth_tag; + uint8_t ip_prefix_length; + struct ipaddr ip; +}; + +/* EVPN address (RFC 7432) */ +struct evpn_addr { + uint8_t route_type; + union { + struct evpn_ead_addr _ead_addr; + struct evpn_macip_addr _macip_addr; + struct evpn_imet_addr _imet_addr; + struct evpn_es_addr _es_addr; + struct evpn_prefix_addr _prefix_addr; + } u; +#define ead_addr u._ead_addr +#define macip_addr u._macip_addr +#define imet_addr u._imet_addr +#define es_addr u._es_addr +#define prefix_addr u._prefix_addr +}; /* * A struct prefix contains an address family, a prefix length, and an @@ -177,6 +207,39 @@ struct prefix_evpn { struct evpn_addr prefix __attribute__((aligned(8))); }; +static inline int is_evpn_prefix_ipaddr_none(const struct prefix_evpn *evp) +{ + if (evp->prefix.route_type == 2) + return IS_IPADDR_NONE(&(evp)->prefix.macip_addr.ip); + if (evp->prefix.route_type == 3) + return IS_IPADDR_NONE(&(evp)->prefix.imet_addr.ip); + if (evp->prefix.route_type == 5) + return IS_IPADDR_NONE(&(evp)->prefix.prefix_addr.ip); + return 0; +} + +static inline int is_evpn_prefix_ipaddr_v4(const struct prefix_evpn *evp) +{ + if (evp->prefix.route_type == 2) + return IS_IPADDR_V4(&(evp)->prefix.macip_addr.ip); + if (evp->prefix.route_type == 3) + return IS_IPADDR_V4(&(evp)->prefix.imet_addr.ip); + if (evp->prefix.route_type == 5) + return IS_IPADDR_V4(&(evp)->prefix.prefix_addr.ip); + return 0; +} + +static inline int is_evpn_prefix_ipaddr_v6(const struct prefix_evpn *evp) +{ + if (evp->prefix.route_type == 2) + return IS_IPADDR_V6(&(evp)->prefix.macip_addr.ip); + if (evp->prefix.route_type == 3) + return IS_IPADDR_V6(&(evp)->prefix.imet_addr.ip); + if (evp->prefix.route_type == 5) + return IS_IPADDR_V6(&(evp)->prefix.prefix_addr.ip); + return 0; +} + /* Prefix for a generic pointer */ struct prefix_ptr { uint8_t family; diff --git a/lib/zclient.c b/lib/zclient.c index dc27cbef70..cb39099fc2 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -1374,6 +1374,26 @@ stream_failure: return false; } +bool zapi_iptable_notify_decode(struct stream *s, + uint32_t *unique, + enum zapi_iptable_notify_owner *note) +{ + uint32_t uni; + + STREAM_GET(note, s, sizeof(*note)); + + STREAM_GETL(s, uni); + + if (zclient_debug) + zlog_debug("%s: %u", __PRETTY_FUNCTION__, uni); + *unique = uni; + + return true; + +stream_failure: + return false; +} + struct nexthop *nexthop_from_zapi_nexthop(struct zapi_nexthop *znh) { struct nexthop *n = nexthop_new(); @@ -2765,6 +2785,22 @@ static int zclient_read(struct thread *thread) (*zclient->label_chunk)(command, zclient, length, vrf_id); break; + case ZEBRA_IPSET_NOTIFY_OWNER: + if (zclient->ipset_notify_owner) + (*zclient->ipset_notify_owner)(command, zclient, length, + vrf_id); + break; + case ZEBRA_IPSET_ENTRY_NOTIFY_OWNER: + if (zclient->ipset_entry_notify_owner) + (*zclient->ipset_entry_notify_owner)(command, + zclient, length, + vrf_id); + break; + case ZEBRA_IPTABLE_NOTIFY_OWNER: + if (zclient->iptable_notify_owner) + (*zclient->iptable_notify_owner)(command, + zclient, length, + vrf_id); default: break; } diff --git a/lib/zclient.h b/lib/zclient.h index 71f5b38384..8d26b7fe59 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -258,6 +258,10 @@ struct zclient { struct zclient *zclient, uint16_t length, vrf_id_t vrf_id); + int (*iptable_notify_owner)(int command, + struct zclient *zclient, + uint16_t length, + vrf_id_t vrf_id); }; /* Zebra API message flag. */ @@ -680,6 +684,9 @@ bool zapi_ipset_entry_notify_decode(struct stream *s, uint32_t *unique, char *ipset_name, enum zapi_ipset_entry_notify_owner *note); +bool zapi_iptable_notify_decode(struct stream *s, + uint32_t *unique, + enum zapi_iptable_notify_owner *note); extern struct nexthop *nexthop_from_zapi_nexthop(struct zapi_nexthop *znh); extern bool zapi_nexthop_update_decode(struct stream *s, diff --git a/ospf6d/ospf6_intra.c b/ospf6d/ospf6_intra.c index de4ee2e1ac..b234b10d51 100644 --- a/ospf6d/ospf6_intra.c +++ b/ospf6d/ospf6_intra.c @@ -1314,17 +1314,60 @@ int ospf6_intra_prefix_lsa_originate_transit(struct thread *thread) return 0; } +static void ospf6_intra_prefix_update_route_origin(struct ospf6_route *oa_route) +{ + struct ospf6_path *h_path; + struct ospf6_route *g_route, *nroute; + + /* Update Global ospf6 route path */ + g_route = ospf6_route_lookup(&oa_route->prefix, + ospf6->route_table); + + for (ospf6_route_lock(g_route); g_route && + ospf6_route_is_prefix(&oa_route->prefix, g_route); + g_route = nroute) { + nroute = ospf6_route_next(g_route); + if (g_route->type != oa_route->type) + continue; + if (g_route->path.area_id != oa_route->path.area_id) + continue; + if (g_route->path.type != OSPF6_PATH_TYPE_INTRA) + continue; + if (g_route->path.cost != oa_route->path.cost) + continue; + + if (ospf6_route_is_same_origin(g_route, oa_route)) { + h_path = (struct ospf6_path *)listgetdata( + listhead(g_route->paths)); + g_route->path.origin.type = h_path->origin.type; + g_route->path.origin.id = h_path->origin.id; + g_route->path.origin.adv_router = + h_path->origin.adv_router; + break; + } + } + + h_path = (struct ospf6_path *)listgetdata( + listhead(oa_route->paths)); + oa_route->path.origin.type = h_path->origin.type; + oa_route->path.origin.id = h_path->origin.id; + oa_route->path.origin.adv_router = h_path->origin.adv_router; +} + void ospf6_intra_prefix_route_ecmp_path(struct ospf6_area *oa, struct ospf6_route *old, struct ospf6_route *route) { - struct ospf6_route *old_route; + struct ospf6_route *old_route, *ls_entry; struct ospf6_path *ecmp_path, *o_path = NULL; struct listnode *anode, *anext; struct listnode *nnode, *rnode, *rnext; struct ospf6_nexthop *nh, *rnh; char buf[PREFIX2STR_BUFFER]; bool route_found = false; + struct interface *ifp; + struct ospf6_lsa *lsa; + struct ospf6_intra_prefix_lsa *intra_prefix_lsa; /* check for old entry match with new route origin, * delete old entry. @@ -1361,7 +1404,7 @@ void ospf6_intra_prefix_route_ecmp_path(struct ospf6_area *oa, o_path->cost, route->path.cost); } - /* Remove selected current rout path's nh from + /* Remove selected current path's nh from * effective nh list. */ for (ALL_LIST_ELEMENTS_RO(o_path->nh_list, nnode, nh)) { @@ -1385,22 +1428,6 @@ void ospf6_intra_prefix_route_ecmp_path(struct ospf6_area *oa, * Update FIB with effective NHs. */ if (listcount(old_route->paths)) { - if (old_route->path.origin.id == - route->path.origin.id && - old_route->path.origin.adv_router == - route->path.origin.adv_router) { - struct ospf6_path *h_path; - - h_path = (struct ospf6_path *) - listgetdata(listhead(old_route->paths)); - old_route->path.origin.type = - h_path->origin.type; - old_route->path.origin.id = - h_path->origin.id; - old_route->path.origin.adv_router = - h_path->origin.adv_router; - } - if (route_updated) { for (ALL_LIST_ELEMENTS(old_route->paths, anode, anext, o_path)) { @@ -1415,6 +1442,14 @@ void ospf6_intra_prefix_route_ecmp_path(struct ospf6_area *oa, if (oa->route_table->hook_add) (*oa->route_table->hook_add) (old_route); + + if (old_route->path.origin.id == + route->path.origin.id && + old_route->path.origin.adv_router == + route->path.origin.adv_router) { + ospf6_intra_prefix_update_route_origin( + old_route); + } break; } } else { @@ -1426,8 +1461,12 @@ void ospf6_intra_prefix_route_ecmp_path(struct ospf6_area *oa, old_route->path.cost, route->path.cost); } - ospf6_route_remove(old_route, + if (oa->route_table->hook_remove) + ospf6_route_remove(old_route, oa->route_table); + else + SET_FLAG(old_route->flag, + OSPF6_ROUTE_REMOVE); break; } } @@ -1467,72 +1506,101 @@ void ospf6_intra_prefix_route_ecmp_path(struct ospf6_area *oa, /* Add a nh_list to new ecmp path */ ospf6_copy_nexthops(ecmp_path->nh_list, route->nh_list); - /* Merge nexthop to existing route's nh_list */ - ospf6_route_merge_nexthops(old_route, route); /* Add the new path to route's path list */ listnode_add_sort(old_route->paths, ecmp_path); - UNSET_FLAG(old_route->flag, OSPF6_ROUTE_REMOVE); - SET_FLAG(old_route->flag, OSPF6_ROUTE_CHANGE); - /* Update RIB/FIB */ - if (oa->route_table->hook_add) - (*oa->route_table->hook_add) - (old_route); if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { prefix2str(&route->prefix, buf, sizeof(buf)); - zlog_debug("%s: route %s %p another path added with nh %u, effective paths %u nh %u", + zlog_debug( + "%s: route %s %p another path added with nh %u, effective paths %u nh %u", __PRETTY_FUNCTION__, buf, (void *)old_route, listcount(ecmp_path->nh_list), old_route->paths ? - listcount(old_route->paths) - : 0, + listcount(old_route->paths) : 0, listcount(old_route->nh_list)); - } - } else { - for (ALL_LIST_ELEMENTS_RO(o_path->nh_list, - nnode, nh)) { - for (ALL_LIST_ELEMENTS( - old_route->nh_list, - rnode, rnext, rnh)) { - if (!ospf6_nexthop_is_same(rnh, - nh)) - continue; - listnode_delete( - old_route->nh_list, - rnh); - ospf6_nexthop_delete(rnh); - } } + } else { list_delete_all_node(o_path->nh_list); ospf6_copy_nexthops(o_path->nh_list, route->nh_list); - /* Merge nexthop to existing route's nh_list */ - ospf6_route_merge_nexthops(old_route, - route); + } - if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { - prefix2str(&route->prefix, - buf, sizeof(buf)); - zlog_debug("%s: existing route %s %p with effective paths %u nh count %u", - __PRETTY_FUNCTION__, buf, - (void *)old_route, - listcount(old_route->paths), - old_route->nh_list ? - listcount(old_route->nh_list) - : 0); + list_delete_all_node(old_route->nh_list); + + for (ALL_LIST_ELEMENTS_RO(old_route->paths, anode, + o_path)) { + ls_entry = ospf6_route_lookup( + &o_path->ls_prefix, + oa->spf_table); + if (ls_entry == NULL) { + if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) + zlog_debug("%s: ls_prfix %s ls_entry not found.", + __PRETTY_FUNCTION__, + buf); + continue; } + lsa = ospf6_lsdb_lookup(o_path->origin.type, + o_path->origin.id, + o_path->origin.adv_router, + oa->lsdb); + if (lsa == NULL) { + if (IS_OSPF6_DEBUG_EXAMIN( + INTRA_PREFIX)) { + struct prefix adv_prefix; - UNSET_FLAG(old_route->flag, OSPF6_ROUTE_REMOVE); - SET_FLAG(old_route->flag, OSPF6_ROUTE_CHANGE); - /* Update ospf6 route table and RIB/FIB */ - if (oa->route_table->hook_add) - (*oa->route_table->hook_add) - (old_route); + ospf6_linkstate_prefix( + o_path->origin.adv_router, + o_path->origin.id, &adv_prefix); + prefix2str(&adv_prefix, buf, + sizeof(buf)); + zlog_debug("%s: adv_router %s lsa not found", + __PRETTY_FUNCTION__, + buf); + } + continue; + } + intra_prefix_lsa = + (struct ospf6_intra_prefix_lsa *) + OSPF6_LSA_HEADER_END(lsa->header); + + if (intra_prefix_lsa->ref_adv_router + == oa->ospf6->router_id) { + ifp = if_lookup_prefix( + &old_route->prefix, + VRF_DEFAULT); + if (ifp) + ospf6_route_add_nexthop( + old_route, + ifp->ifindex, + NULL); + } else { + ospf6_route_merge_nexthops(old_route, + ls_entry); + } } + + if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { + prefix2str(&route->prefix, buf, sizeof(buf)); + zlog_debug("%s: route %s %p with final effective paths %u nh%u", + __PRETTY_FUNCTION__, buf, + (void *)old_route, + old_route->paths ? + listcount(old_route->paths) : 0, + listcount(old_route->nh_list)); + } + + /* used in intra_route_calculation() to add to + * global ospf6 route table. + */ + UNSET_FLAG(old_route->flag, OSPF6_ROUTE_REMOVE); + SET_FLAG(old_route->flag, OSPF6_ROUTE_ADD); + /* Update ospf6 route table and RIB/FIB */ + if (oa->route_table->hook_add) + (*oa->route_table->hook_add)(old_route); /* Delete the new route its info added to existing * route. */ @@ -1642,7 +1710,8 @@ void ospf6_intra_prefix_lsa_add(struct ospf6_lsa *lsa) route->path.metric_type = 1; route->path.cost = ls_entry->path.cost + ntohs(op->prefix_metric); - + memcpy(&route->path.ls_prefix, &ls_prefix, + sizeof(struct prefix)); if (direct_connect) { ifp = if_lookup_prefix(&route->prefix, VRF_DEFAULT); if (ifp) @@ -1660,20 +1729,21 @@ void ospf6_intra_prefix_lsa_add(struct ospf6_lsa *lsa) if (old && (ospf6_route_cmp(route, old) == 0)) { if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { prefix2str(&route->prefix, buf, sizeof(buf)); - zlog_debug(" Update route: %s old cost %u new cost %u nh count %u paths %u", - buf, + zlog_debug("%s Update route: %s old cost %u new cost %u paths %u nh %u", + __PRETTY_FUNCTION__, buf, old->path.cost, route->path.cost, - listcount(route->nh_list), - listcount(route->paths)); + listcount(route->paths), + listcount(route->nh_list)); } ospf6_intra_prefix_route_ecmp_path(oa, old, route); } else { if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { prefix2str(&route->prefix, buf, sizeof(buf)); - zlog_debug(" route %s add with cost %u nh %u paths %u", - buf, route->path.cost, - listcount(route->nh_list), - listcount(route->paths)); + zlog_debug("%s route %s add with cost %u paths %u nh %u", + __PRETTY_FUNCTION__, buf, + route->path.cost, + listcount(route->paths), + listcount(route->nh_list)); } ospf6_route_add(route, oa->route_table); } @@ -1684,12 +1754,102 @@ void ospf6_intra_prefix_lsa_add(struct ospf6_lsa *lsa) zlog_debug("Trailing garbage ignored"); } +static void ospf6_intra_prefix_lsa_remove_update_route(struct ospf6_lsa *lsa, + struct ospf6_area *oa, + struct ospf6_route *route) +{ + struct listnode *anode, *anext; + struct listnode *nnode, *rnode, *rnext; + struct ospf6_nexthop *nh, *rnh; + struct ospf6_path *o_path; + bool nh_updated = false; + char buf[PREFIX2STR_BUFFER]; + + /* Iterate all paths of route to find maching + * with LSA remove info. + * If route->path is same, replace + * from paths list. + */ + for (ALL_LIST_ELEMENTS(route->paths, anode, anext, o_path)) { + if ((o_path->origin.type != lsa->header->type) || + (o_path->origin.adv_router != lsa->header->adv_router) || + (o_path->origin.id != lsa->header->id)) + continue; + + if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { + prefix2str(&route->prefix, buf, sizeof(buf)); + zlog_debug( + "%s: route %s path found with cost %u nh %u to remove.", + __PRETTY_FUNCTION__, buf, o_path->cost, + listcount(o_path->nh_list)); + } + + /* Remove found path's nh_list from + * the route's nh_list. + */ + for (ALL_LIST_ELEMENTS_RO(o_path->nh_list, nnode, nh)) { + for (ALL_LIST_ELEMENTS(route->nh_list, rnode, + rnext, rnh)) { + if (!ospf6_nexthop_is_same(rnh, nh)) + continue; + listnode_delete(route->nh_list, rnh); + ospf6_nexthop_delete(rnh); + } + } + /* Delete the path from route's + * path list + */ + listnode_delete(route->paths, o_path); + ospf6_path_free(o_path); + nh_updated = true; + break; + } + + if (nh_updated) { + /* Iterate all paths and merge nexthop, + * unlesss any of the nexthop similar to + * ones deleted as part of path deletion. + */ + for (ALL_LIST_ELEMENTS(route->paths, anode, anext, o_path)) + ospf6_merge_nexthops(route->nh_list, o_path->nh_list); + + + if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { + prefix2str(&route->prefix, buf, sizeof(buf)); + zlog_debug("%s: route %s update paths %u nh %u", + __PRETTY_FUNCTION__, buf, + route->paths ? listcount(route->paths) : 0, + route->nh_list ? listcount(route->nh_list) + : 0); + } + + /* Update Global Route table and + * RIB/FIB with effective + * nh_list + */ + if (oa->route_table->hook_add) + (*oa->route_table->hook_add)(route); + + /* route's primary path is similar + * to LSA, replace route's primary + * path with route's paths list + * head. + */ + if ((route->path.origin.id == lsa->header->id) && + (route->path.origin.adv_router == + lsa->header->adv_router)) { + ospf6_intra_prefix_update_route_origin(route); + } + } + +} + void ospf6_intra_prefix_lsa_remove(struct ospf6_lsa *lsa) { struct ospf6_area *oa; struct ospf6_intra_prefix_lsa *intra_prefix_lsa; struct prefix prefix; - struct ospf6_route *route, *nroute, *route_to_del; + struct ospf6_route *route, *nroute; int prefix_num; struct ospf6_prefix *op; char *start, *current, *end; @@ -1717,22 +1877,6 @@ void ospf6_intra_prefix_lsa_remove(struct ospf6_lsa *lsa) break; prefix_num--; - route_to_del = ospf6_route_create(); - - memset(&route_to_del->prefix, 0, sizeof(struct prefix)); - route_to_del->prefix.family = AF_INET6; - route_to_del->prefix.prefixlen = op->prefix_length; - ospf6_prefix_in6_addr(&route_to_del->prefix.u.prefix6, op); - - route_to_del->type = OSPF6_DEST_TYPE_NETWORK; - route_to_del->path.origin.type = lsa->header->type; - route_to_del->path.origin.id = lsa->header->id; - route_to_del->path.origin.adv_router = lsa->header->adv_router; - route_to_del->path.prefix_options = op->prefix_options; - route_to_del->path.area_id = oa->area_id; - route_to_del->path.type = OSPF6_PATH_TYPE_INTRA; - route_to_del->path.metric_type = 1; - memset(&prefix, 0, sizeof(struct prefix)); prefix.family = AF_INET6; prefix.prefixlen = op->prefix_length; @@ -1757,134 +1901,8 @@ void ospf6_intra_prefix_lsa_remove(struct ospf6_lsa *lsa) * after removal of one of the path. */ if (listcount(route->paths) > 1) { - struct listnode *anode, *anext; - struct listnode *nnode, *rnode, *rnext; - struct ospf6_nexthop *nh, *rnh; - struct ospf6_path *o_path; - bool nh_updated = false; - - /* Iterate all paths of route to find maching - * with LSA remove info. - * If route->path is same, replace - * from paths list. - */ - for (ALL_LIST_ELEMENTS(route->paths, anode, - anext, o_path)) { - if ((o_path->origin.type != - lsa->header->type) || - (o_path->origin.adv_router != - lsa->header->adv_router) || - (o_path->origin.id != - lsa->header->id)) - continue; - - if (IS_OSPF6_DEBUG_EXAMIN - (INTRA_PREFIX)) { - prefix2str(&prefix, buf, - sizeof(buf)); - zlog_debug( - "%s: route %s path found with cost %u nh %u to remove.", - __PRETTY_FUNCTION__, - buf, o_path->cost, - listcount( - o_path->nh_list)); - } - /* Remove old route from global - * ospf6 route table. - * nh_update section will add - * back with effective nh. - */ - if (oa->route_table->hook_remove) - (*oa->route_table->hook_remove) - (route); - /* Remove found path's nh_list from - * the route's nh_list. - */ - for (ALL_LIST_ELEMENTS_RO( - o_path->nh_list, - nnode, nh)) { - for (ALL_LIST_ELEMENTS( - route->nh_list, - rnode, rnext, rnh)) { - if ( - !ospf6_nexthop_is_same( - rnh, nh)) - continue; - listnode_delete( - route->nh_list, - rnh); - ospf6_nexthop_delete( - rnh); - } - } - /* Delete the path from route's - * path list - */ - listnode_delete(route->paths, o_path); - ospf6_path_free(o_path); - nh_updated = true; - break; - } - - if (nh_updated) { - - /* Iterate all paths and merge nexthop, - * unlesss any of the nexthop similar to - * ones deleted as part of path - * deletion. - */ - for (ALL_LIST_ELEMENTS(route->paths, - anode, anext, o_path)) { - ospf6_merge_nexthops( - route->nh_list, - o_path->nh_list); - } - - if (IS_OSPF6_DEBUG_EXAMIN( - INTRA_PREFIX)) { - prefix2str(&route->prefix, buf, - sizeof(buf)); - assert(route->nh_list); - zlog_debug("%s: route %s update paths %u nh %u" - , __PRETTY_FUNCTION__, - buf, - listcount(route->paths), - listcount( - route->nh_list)); - } - - /* route's primary path is similar - * to LSA, replace route's primary - * path with route's paths list - * head. - */ - if ((route->path.origin.id == - lsa->header->id) && - (route->path.origin.adv_router - == lsa->header->adv_router)) { - struct ospf6_path *h_path; - - h_path = (struct ospf6_path *) - listgetdata(listhead( - route->paths)); - route->path.origin.type = - h_path->origin.type; - route->path.origin.id = - h_path->origin.id; - route->path.origin.adv_router = - h_path->origin.adv_router; - } - - /* Update Global Route table and - * RIB/FIB with effective - * nh_list - */ - if (oa->route_table->hook_add) - (*oa->route_table->hook_add) - (route); - } - continue; - + ospf6_intra_prefix_lsa_remove_update_route( + lsa, oa, route); } else { if (route->path.origin.type != lsa->header->type @@ -1896,9 +1914,11 @@ void ospf6_intra_prefix_lsa_remove(struct ospf6_lsa *lsa) if (IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) { prefix2str(&route->prefix, buf, sizeof(buf)); - zlog_debug("route remove %s with path %u cost %u nh %u", - buf, route->path.type, + zlog_debug("%s: route remove %s with path type %u cost %u paths %u nh %u", + __PRETTY_FUNCTION__, buf, + route->path.type, route->path.cost, + listcount(route->paths), listcount(route->nh_list)); } ospf6_route_remove(route, oa->route_table); @@ -1906,8 +1926,6 @@ void ospf6_intra_prefix_lsa_remove(struct ospf6_lsa *lsa) } if (route) ospf6_route_unlock(route); - - ospf6_route_delete(route_to_del); } if (current != end && IS_OSPF6_DEBUG_EXAMIN(INTRA_PREFIX)) diff --git a/ospf6d/ospf6_route.c b/ospf6d/ospf6_route.c index 8be00d9b41..39272b3701 100644 --- a/ospf6d/ospf6_route.c +++ b/ospf6d/ospf6_route.c @@ -611,9 +611,10 @@ struct ospf6_route *ospf6_route_add(struct ospf6_route *route, prefix2str(&route->prefix, buf, sizeof(buf)); if (IS_OSPF6_DEBUG_ROUTE(MEMORY)) - zlog_debug("%s %p: route add %p: %s", + zlog_debug("%s %p: route add %p: %s paths %u nh %u", ospf6_route_table_name(table), (void *)table, - (void *)route, buf); + (void *)route, buf, listcount(route->paths), + listcount(route->nh_list)); else if (IS_OSPF6_DEBUG_ROUTE(TABLE)) zlog_debug("%s: route add: %s", ospf6_route_table_name(table), buf); @@ -664,11 +665,13 @@ struct ospf6_route *ospf6_route_add(struct ospf6_route *route, if (IS_OSPF6_DEBUG_ROUTE(MEMORY)) zlog_debug( - "%s %p: route add %p cost %u nh %u: update of %p old cost %u nh %u", + "%s %p: route add %p cost %u paths %u nh %u: update of %p cost %u paths %u nh %u", ospf6_route_table_name(table), (void *)table, (void *)route, route->path.cost, + listcount(route->paths), listcount(route->nh_list), (void *)old, - old->path.cost, listcount(old->nh_list)); + old->path.cost, listcount(old->paths), + listcount(old->nh_list)); else if (IS_OSPF6_DEBUG_ROUTE(TABLE)) zlog_debug("%s: route add: update", ospf6_route_table_name(table)); diff --git a/ospf6d/ospf6_route.h b/ospf6d/ospf6_route.h index a69e9a920f..02002533e6 100644 --- a/ospf6d/ospf6_route.h +++ b/ospf6d/ospf6_route.h @@ -91,6 +91,9 @@ struct ospf6_path { /* Cost */ uint8_t metric_type; uint32_t cost; + + struct prefix ls_prefix; + union { uint32_t cost_e2; uint32_t cost_config; diff --git a/ospf6d/ospf6d.c b/ospf6d/ospf6d.c index 8d6d5b4a26..db61fe087b 100644 --- a/ospf6d/ospf6d.c +++ b/ospf6d/ospf6d.c @@ -97,7 +97,7 @@ DEFUN_NOSH (show_debugging_ospf6, DEBUG_STR OSPF6_STR) { - vty_out(vty, "OSPF6 debugging status:"); + vty_out(vty, "OSPF6 debugging status:\n"); config_write_ospf6_debug(vty); diff --git a/pbrd/pbr_nht.c b/pbrd/pbr_nht.c index 1ccf3ebffa..5be96e86d0 100644 --- a/pbrd/pbr_nht.c +++ b/pbrd/pbr_nht.c @@ -470,6 +470,18 @@ void pbr_nht_change_group(const char *name) pbr_nht_install_nexthop_group(pnhgc, nhgc->nhg); } +/* + * Since we are writing into the name field which is PBR_MAP_NAMELEN + * size, we are expecting this to field to be at max 100 bytes. + * Newer compilers understand that the %s portion may be up to + * 100 bytes( because of the size of the string. The %u portion + * is expected to be 10 bytes. So in `theory` there are situations + * where we might truncate. The reality this is never going to + * happen( who is going to create a nexthop group name that is + * over say 30 characters? ). As such we are expecting the + * calling function to subtract 10 from the size_t l before + * we pass it in to get around this new gcc fun. + */ char *pbr_nht_nexthop_make_name(char *name, size_t l, uint32_t seqno, char *buffer) { @@ -485,7 +497,7 @@ void pbr_nht_add_individual_nexthop(struct pbr_map_sequence *pbrms) struct pbr_nexthop_cache lookup; memset(&find, 0, sizeof(find)); - pbr_nht_nexthop_make_name(pbrms->parent->name, PBR_MAP_NAMELEN, + pbr_nht_nexthop_make_name(pbrms->parent->name, PBR_MAP_NAMELEN - 10, pbrms->seqno, find.name); if (!pbrms->internal_nhg_name) pbrms->internal_nhg_name = XSTRDUP(MTYPE_TMP, find.name); diff --git a/pbrd/pbr_vty.c b/pbrd/pbr_vty.c index 03f2104835..ba5c49ad5c 100644 --- a/pbrd/pbr_vty.c +++ b/pbrd/pbr_vty.c @@ -269,7 +269,7 @@ DEFPY(pbr_map_nexthop, pbr_map_nexthop_cmd, if (pbrms->nhg) nh = nexthop_exists(pbrms->nhg, &nhop); else { - char buf[100]; + char buf[PBR_MAP_NAMELEN]; if (no) { vty_out(vty, "No nexthops to delete"); @@ -280,7 +280,7 @@ DEFPY(pbr_map_nexthop, pbr_map_nexthop_cmd, pbrms->internal_nhg_name = XSTRDUP(MTYPE_TMP, pbr_nht_nexthop_make_name(pbrms->parent->name, - PBR_MAP_NAMELEN, + PBR_MAP_NAMELEN - 10, pbrms->seqno, buf)); nh = NULL; diff --git a/pbrd/pbr_zebra.c b/pbrd/pbr_zebra.c index bc7dd20832..cdacfad4b4 100644 --- a/pbrd/pbr_zebra.c +++ b/pbrd/pbr_zebra.c @@ -479,6 +479,7 @@ static void pbr_encode_pbr_map_sequence(struct stream *s, stream_putw(s, 0); /* src port */ pbr_encode_pbr_map_sequence_prefix(s, pbrms->dst, family); stream_putw(s, 0); /* dst port */ + stream_putl(s, 0); /* fwmark */ if (pbrms->nhgrp_name) stream_putl(s, pbr_nht_get_table(pbrms->nhgrp_name)); else if (pbrms->nhg) diff --git a/pimd/pim_iface.c b/pimd/pim_iface.c index ff7238ae97..5996a3ac96 100644 --- a/pimd/pim_iface.c +++ b/pimd/pim_iface.c @@ -1547,27 +1547,20 @@ int pim_if_connected_to_source(struct interface *ifp, struct in_addr src) return 0; } -int pim_if_is_loopback(struct pim_instance *pim, struct interface *ifp) +bool pim_if_is_loopback(struct interface *ifp) { - if (if_is_loopback(ifp)) - return 1; - - if (strcmp(ifp->name, pim->vrf->name) == 0) - return 1; + if (if_is_loopback(ifp) || if_is_vrf(ifp)) + return true; - return 0; + return false; } -int pim_if_is_vrf_device(struct interface *ifp) +bool pim_if_is_vrf_device(struct interface *ifp) { - struct vrf *vrf; + if (if_is_vrf(ifp)) + return true; - RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { - if (strncmp(ifp->name, vrf->name, strlen(ifp->name)) == 0) - return 1; - } - - return 0; + return false; } int pim_if_ifchannel_count(struct pim_interface *pim_ifp) diff --git a/pimd/pim_iface.h b/pimd/pim_iface.h index 5ecd07d227..cf025cbd4a 100644 --- a/pimd/pim_iface.h +++ b/pimd/pim_iface.h @@ -207,9 +207,9 @@ void pim_if_create_pimreg(struct pim_instance *pim); int pim_if_connected_to_source(struct interface *ifp, struct in_addr src); int pim_update_source_set(struct interface *ifp, struct in_addr source); -int pim_if_is_loopback(struct pim_instance *pim, struct interface *ifp); +bool pim_if_is_loopback(struct interface *ifp); -int pim_if_is_vrf_device(struct interface *ifp); +bool pim_if_is_vrf_device(struct interface *ifp); int pim_if_ifchannel_count(struct pim_interface *pim_ifp); #endif /* PIM_IFACE_H */ diff --git a/pimd/pim_msdp.c b/pimd/pim_msdp.c index 53a3382987..829c917b0f 100644 --- a/pimd/pim_msdp.c +++ b/pimd/pim_msdp.c @@ -228,6 +228,8 @@ static void pim_msdp_sa_upstream_update(struct pim_msdp_sa *sa, /* release all mem associated with a sa */ static void pim_msdp_sa_free(struct pim_msdp_sa *sa) { + pim_msdp_sa_state_timer_setup(sa, false); + XFREE(MTYPE_PIM_MSDP_SA, sa); } @@ -1170,6 +1172,13 @@ enum pim_msdp_err pim_msdp_peer_add(struct pim_instance *pim, /* release all mem associated with a peer */ static void pim_msdp_peer_free(struct pim_msdp_peer *mp) { + /* + * Let's make sure we are not running when we delete + * the underlying data structure + */ + pim_msdp_peer_cr_timer_setup(mp, false); + pim_msdp_peer_ka_timer_setup(mp, false); + if (mp->ibuf) { stream_free(mp->ibuf); } @@ -1611,6 +1620,8 @@ void pim_msdp_init(struct pim_instance *pim, struct thread_master *master) /* counterpart to MSDP init; XXX: unused currently */ void pim_msdp_exit(struct pim_instance *pim) { + pim_msdp_sa_adv_timer_setup(pim, false); + /* XXX: stop listener and delete all peer sessions */ if (pim->msdp.peer_hash) { diff --git a/pimd/pim_pim.c b/pimd/pim_pim.c index ffe5d52a15..de09b070f4 100644 --- a/pimd/pim_pim.c +++ b/pimd/pim_pim.c @@ -653,7 +653,7 @@ static int pim_hello_send(struct interface *ifp, uint16_t holdtime) { struct pim_interface *pim_ifp = ifp->info; - if (pim_if_is_loopback(pim_ifp->pim, ifp)) + if (pim_if_is_loopback(ifp)) return 0; if (hello_send(ifp, holdtime)) { @@ -755,7 +755,7 @@ void pim_hello_restart_triggered(struct interface *ifp) /* * No need to ever start loopback or vrf device hello's */ - if (pim_if_is_loopback(pim_ifp->pim, ifp)) + if (pim_if_is_loopback(ifp)) return; /* diff --git a/redhat/frr.service b/redhat/frr.service index cc6ec429a3..3ae0aabfe2 100644 --- a/redhat/frr.service +++ b/redhat/frr.service @@ -9,7 +9,7 @@ Type=forking NotifyAccess=all StartLimitInterval=3m StartLimitBurst=3 -TimeoutSec=1m +TimeoutSec=2m WatchdogSec=60s RestartSec=5 Restart=on-abnormal diff --git a/tools/frr.service b/tools/frr.service index 8800bf6b0f..5f44274ec3 100644 --- a/tools/frr.service +++ b/tools/frr.service @@ -10,7 +10,7 @@ Type=forking NotifyAccess=all StartLimitInterval=3m StartLimitBurst=3 -TimeoutSec=1m +TimeoutSec=2m WatchdogSec=60s RestartSec=5 Restart=on-abnormal diff --git a/vtysh/Makefile.am b/vtysh/Makefile.am index 52641de72c..d82f9fd1b8 100644 --- a/vtysh/Makefile.am +++ b/vtysh/Makefile.am @@ -54,6 +54,7 @@ vtysh_scan += $(top_srcdir)/bgpd/bgp_nexthop.c vtysh_scan += $(top_srcdir)/bgpd/bgp_route.c vtysh_scan += $(top_srcdir)/bgpd/bgp_routemap.c vtysh_scan += $(top_srcdir)/bgpd/bgp_vty.c +vtysh_scan += $(top_srcdir)/bgpd/bgp_flowspec_vty.c endif if RPKI diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 16aece9747..2ae9ac5082 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -1215,6 +1215,10 @@ static void vty_show_ip_route(struct vty *vty, struct route_node *rn, json_object_int_add(json_route, "metric", re->metric); } + json_object_int_add(json_route, "internalStatus", + re->status); + json_object_int_add(json_route, "internalFlags", + re->flags); if (uptime < ONE_DAY_SECOND) sprintf(buf, "%02d:%02d:%02d", tm->tm_hour, tm->tm_min, tm->tm_sec); @@ -1231,6 +1235,9 @@ static void vty_show_ip_route(struct vty *vty, struct route_node *rn, for (ALL_NEXTHOPS(re->ng, nexthop)) { json_nexthop = json_object_new_object(); + json_object_int_add(json_nexthop, "flags", + nexthop->flags); + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE)) json_object_boolean_true_add(json_nexthop, "duplicate"); |
