diff options
| -rw-r--r-- | bgpd/bgp_bfd.c | 59 | ||||
| -rw-r--r-- | bgpd/bgp_evpn_mh.c | 10 | ||||
| -rw-r--r-- | bgpd/bgp_nexthop.c | 28 | ||||
| -rw-r--r-- | bgpd/bgp_pbr.c | 29 | ||||
| -rw-r--r-- | bgpd/bgp_zebra.c | 11 | ||||
| -rw-r--r-- | bgpd/bgpd.c | 23 | ||||
| -rw-r--r-- | fpm/fpm_pb.h | 1 | ||||
| -rw-r--r-- | lib/frr_pthread.c | 8 | ||||
| -rw-r--r-- | lib/frrcu.c | 15 | ||||
| -rw-r--r-- | lib/nexthop.c | 87 | ||||
| -rw-r--r-- | lib/nexthop.h | 104 | ||||
| -rw-r--r-- | lib/sigevent.h | 27 | ||||
| -rw-r--r-- | lib/zclient.c | 22 | ||||
| -rw-r--r-- | nhrpd/netlink_arp.c | 2 | ||||
| -rw-r--r-- | pbrd/pbr_nht.c | 8 | ||||
| -rw-r--r-- | pbrd/pbr_vty.c | 2 | ||||
| -rw-r--r-- | pimd/pim_ifchannel.c | 12 | ||||
| -rw-r--r-- | pimd/pim_upstream.c | 2 | ||||
| -rw-r--r-- | tests/topotests/v6_nexthop_group_recursive_resolution/r1/frr.conf | 4 | ||||
| -rw-r--r-- | tests/topotests/v6_nexthop_group_recursive_resolution/test_v6_nexthop_group_recursive_resolution.py | 189 | ||||
| -rw-r--r-- | zebra/zebra_nhg.c | 6 | ||||
| -rw-r--r-- | zebra/zebra_routemap.c | 7 |
22 files changed, 467 insertions, 189 deletions
diff --git a/bgpd/bgp_bfd.c b/bgpd/bgp_bfd.c index 14ff5f2e11..add93d3a5f 100644 --- a/bgpd/bgp_bfd.c +++ b/bgpd/bgp_bfd.c @@ -26,6 +26,7 @@ #include "bgpd/bgp_debug.h" #include "bgpd/bgp_vty.h" #include "bgpd/bgp_packet.h" +#include "bgpd/bgp_network.h" DEFINE_MTYPE_STATIC(BGPD, BFD_CONFIG, "BFD configuration data"); @@ -53,14 +54,23 @@ static void bfd_session_status_update(struct bfd_session_params *bsp, peer->host); return; } - peer->last_reset = PEER_DOWN_BFD_DOWN; - /* rfc9384 */ - if (BGP_IS_VALID_STATE_FOR_NOTIF(peer->connection->status)) - bgp_notify_send(peer->connection, BGP_NOTIFY_CEASE, - BGP_NOTIFY_CEASE_BFD_DOWN); - - BGP_EVENT_ADD(peer->connection, BGP_Stop); + /* Once the BFD session is UP, and later BGP session is UP, + * BFD notices that peer->su_local changed, and BFD session goes down. + * We should trigger BGP session reset if BFD session is UP + * only when BGP session is UP already. + * Otherwise, we end up resetting BGP session when BFD session is UP, + * when the source address is changed, e.g. 0.0.0.0 -> 10.0.0.1. + */ + if (bss->last_event > peer->uptime) { + peer->last_reset = PEER_DOWN_BFD_DOWN; + /* rfc9384 */ + if (BGP_IS_VALID_STATE_FOR_NOTIF(peer->connection->status)) + bgp_notify_send(peer->connection, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_BFD_DOWN); + + BGP_EVENT_ADD(peer->connection, BGP_Stop); + } } if (bss->state == BSS_UP && bss->previous_state != BSS_UP && @@ -104,6 +114,10 @@ void bgp_peer_config_apply(struct peer *p, struct peer_group *pg) */ gconfig = pg->conf; + if (CHECK_FLAG(gconfig->flags, PEER_FLAG_UPDATE_SOURCE) || + CHECK_FLAG(p->flags_override, PEER_FLAG_UPDATE_SOURCE)) + bgp_peer_bfd_update_source(p); + /* * If using default control plane independent configuration, * then prefer group's (e.g. it means it wasn't manually configured). @@ -141,24 +155,45 @@ void bgp_peer_config_apply(struct peer *p, struct peer_group *pg) void bgp_peer_bfd_update_source(struct peer *p) { - struct bfd_session_params *session = p->bfd_config->session; - const union sockunion *source; + struct bfd_session_params *session; + const union sockunion *source = NULL; bool changed = false; int family; union { struct in_addr v4; struct in6_addr v6; } src, dst; + struct interface *ifp; + union sockunion addr; + + if (!p->bfd_config) + return; + session = p->bfd_config->session; /* Nothing to do for groups. */ if (CHECK_FLAG(p->sflags, PEER_STATUS_GROUP)) return; /* Figure out the correct source to use. */ - if (CHECK_FLAG(p->flags, PEER_FLAG_UPDATE_SOURCE) && p->update_source) - source = p->update_source; - else + if (CHECK_FLAG(p->flags, PEER_FLAG_UPDATE_SOURCE)) { + if (p->update_source) { + source = p->update_source; + } else if (p->update_if) { + ifp = if_lookup_by_name(p->update_if, p->bgp->vrf_id); + if (ifp) { + sockunion_init(&addr); + if (bgp_update_address(ifp, &p->connection->su, &addr)) { + if (BGP_DEBUG(bfd, BFD_LIB)) + zlog_debug("%s: can't find the source address for interface %s", + __func__, p->update_if); + } + + source = &addr; + } + } + } else { source = p->su_local; + } /* Update peer's source/destination addresses. */ bfd_sess_addresses(session, &family, &src.v6, &dst.v6); diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c index 8acf4ea676..57f638240d 100644 --- a/bgpd/bgp_evpn_mh.c +++ b/bgpd/bgp_evpn_mh.c @@ -1198,10 +1198,9 @@ int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi, struct prefix_rd prd; esi_t esi; uint32_t eth_tag; - mpls_label_t label; + mpls_label_t label[BGP_MAX_LABELS] = {}; struct in_addr vtep_ip; struct prefix_evpn p; - uint8_t num_labels = 0; if (psize != BGP_EVPN_TYPE1_PSIZE) { flog_err(EC_BGP_EVPN_ROUTE_INVALID, @@ -1225,8 +1224,7 @@ int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi, eth_tag = ntohl(eth_tag); pfx += EVPN_ETH_TAG_BYTES; - memcpy(&label, pfx, BGP_LABEL_BYTES); - num_labels++; + memcpy(&label[0], pfx, BGP_LABEL_BYTES); /* EAD route prefix doesn't include the nexthop in the global * table @@ -1236,10 +1234,10 @@ int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi, /* Process the route. */ if (attr) { bgp_update(peer, (struct prefix *)&p, addpath_id, attr, afi, safi, ZEBRA_ROUTE_BGP, - BGP_ROUTE_NORMAL, &prd, &label, num_labels, 0, NULL); + BGP_ROUTE_NORMAL, &prd, &label[0], 1, 0, NULL); } else { bgp_withdraw(peer, (struct prefix *)&p, addpath_id, afi, safi, ZEBRA_ROUTE_BGP, - BGP_ROUTE_NORMAL, &prd, &label, num_labels); + BGP_ROUTE_NORMAL, &prd, &label[0], 1); } return 0; } diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index ba6d707109..68a1b85806 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -32,6 +32,7 @@ #include "bgpd/bgp_vty.h" #include "bgpd/bgp_rd.h" #include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_bfd.h" DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Addr Intf String"); @@ -409,17 +410,6 @@ void bgp_connected_add(struct bgp *bgp, struct connected *ifc) bgp_dest_set_bgp_connected_ref_info(dest, bc); } - for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) { - if (peer->conf_if && - (strcmp(peer->conf_if, ifc->ifp->name) == 0) && - !peer_established(peer->connection) && - !CHECK_FLAG(peer->flags, PEER_FLAG_IFPEER_V6ONLY)) { - connection = peer->connection; - if (peer_active(peer)) - BGP_EVENT_ADD(connection, BGP_Stop); - BGP_EVENT_ADD(connection, BGP_Start); - } - } } else if (addr->family == AF_INET6) { apply_mask_ipv6((struct prefix_ipv6 *)&p); @@ -443,6 +433,22 @@ void bgp_connected_add(struct bgp *bgp, struct connected *ifc) bgp_dest_set_bgp_connected_ref_info(dest, bc); } } + + /* + * Iterate over all the peers and attempt to set the bfd session + * data and if it's a bgp unnumbered get her flowing if necessary + */ + for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) { + bgp_peer_bfd_update_source(peer); + if (peer->conf_if && (strcmp(peer->conf_if, ifc->ifp->name) == 0) && + !peer_established(peer->connection) && + !CHECK_FLAG(peer->flags, PEER_FLAG_IFPEER_V6ONLY)) { + connection = peer->connection; + if (peer_active(peer)) + BGP_EVENT_ADD(connection, BGP_Stop); + BGP_EVENT_ADD(connection, BGP_Start); + } + } } void bgp_connected_delete(struct bgp *bgp, struct connected *ifc) diff --git a/bgpd/bgp_pbr.c b/bgpd/bgp_pbr.c index 2d61c0f00a..b85a8e2254 100644 --- a/bgpd/bgp_pbr.c +++ b/bgpd/bgp_pbr.c @@ -2624,7 +2624,6 @@ static void bgp_pbr_policyroute_add_to_zebra(struct bgp *bgp, static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, struct bgp_pbr_entry_main *api, bool add) { - struct nexthop nh; int i = 0; int continue_loop = 1; float rate = 0; @@ -2639,7 +2638,6 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, struct bgp_pbr_val_mask bpvm; memset(&range, 0, sizeof(range)); - memset(&nh, 0, sizeof(nh)); memset(&bpf, 0, sizeof(bpf)); memset(&bpof, 0, sizeof(bpof)); if (CHECK_FLAG(api->match_bitmask, PREFIX_SRC_PRESENT) || @@ -2652,8 +2650,6 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, dst = &api->dst_prefix; if (api->type == BGP_PBR_IPRULE) bpf.type = api->type; - memset(&nh, 0, sizeof(nh)); - nh.vrf_id = VRF_UNKNOWN; if (api->match_protocol_num) { proto = (uint8_t)api->protocol[0].value; if (api->afi == AF_INET6 && proto == IPPROTO_ICMPV6) @@ -2778,8 +2774,10 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, case ACTION_TRAFFICRATE: /* drop packet */ if (api->actions[i].u.r.rate == 0) { - nh.vrf_id = api->vrf_id; - nh.type = NEXTHOP_TYPE_BLACKHOLE; + struct nexthop nh = { + .vrf_id = api->vrf_id, + .type = NEXTHOP_TYPE_BLACKHOLE, + }; bgp_pbr_policyroute_add_to_zebra( bgp, path, &bpf, &bpof, &nh, &rate); } else { @@ -2802,18 +2800,15 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, /* terminate action: run other filters */ break; - case ACTION_REDIRECT_IP: - nh.vrf_id = api->vrf_id; + case ACTION_REDIRECT_IP: { + struct nexthop nh = { .vrf_id = api->vrf_id }; + if (api->afi == AFI_IP) { nh.type = NEXTHOP_TYPE_IPV4; - nh.gate.ipv4.s_addr = - api->actions[i].u.zr. - redirect_ip_v4.s_addr; + nh.gate.ipv4 = api->actions[i].u.zr.redirect_ip_v4; } else { nh.type = NEXTHOP_TYPE_IPV6; - memcpy(&nh.gate.ipv6, - &api->actions[i].u.zr.redirect_ip_v6, - sizeof(struct in6_addr)); + nh.gate.ipv6 = api->actions[i].u.zr.redirect_ip_v6; } bgp_pbr_policyroute_add_to_zebra(bgp, path, &bpf, &bpof, &nh, &rate); @@ -2822,7 +2817,10 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, */ continue_loop = 0; break; - case ACTION_REDIRECT: + } + case ACTION_REDIRECT: { + struct nexthop nh = {}; + if (api->afi == AFI_IP) nh.type = NEXTHOP_TYPE_IPV4; else @@ -2832,6 +2830,7 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, &nh, &rate); continue_loop = 0; break; + } case ACTION_MARKING: if (BGP_DEBUG(pbr, PBR)) { bgp_pbr_print_policy_route(api); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 07ab822b03..17596937d3 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -744,6 +744,7 @@ bool bgp_zebra_nexthop_set(union sockunion *local, union sockunion *remote, int ret = 0; struct interface *ifp = NULL; bool v6_ll_avail = true; + bool shared_network_original = peer->shared_network; memset(nexthop, 0, sizeof(struct bgp_nexthop)); @@ -908,6 +909,9 @@ bool bgp_zebra_nexthop_set(union sockunion *local, union sockunion *remote, peer->shared_network = 0; } + if (shared_network_original != peer->shared_network) + bgp_peer_bfd_update_source(peer); + /* KAME stack specific treatment. */ #ifdef KAME if (IN6_IS_ADDR_LINKLOCAL(&nexthop->v6_global) @@ -2314,6 +2318,13 @@ void bgp_zebra_instance_register(struct bgp *bgp) bgp_zebra_advertise_all_vni(bgp, 1); bgp_nht_register_nexthops(bgp); + + /* + * Request SRv6 locator information from Zebra, if SRv6 is enabled + * and a locator is configured for this BGP instance. + */ + if (bgp->srv6_enabled && bgp->srv6_locator_name[0] != '\0' && !bgp->srv6_locator) + bgp_zebra_srv6_manager_get_locator(bgp->srv6_locator_name); } /* Deregister this instance with Zebra. Invoked upon the instance diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index e97bdb3d51..0601400afd 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -3444,17 +3444,6 @@ static struct bgp *bgp_create(as_t *as, const char *name, } bgp = XCALLOC(MTYPE_BGP, sizeof(struct bgp)); - bgp->as = *as; - if (as_pretty) - bgp->as_pretty = XSTRDUP(MTYPE_BGP_NAME, as_pretty); - else - bgp->as_pretty = XSTRDUP(MTYPE_BGP_NAME, asn_asn2asplain(*as)); - - if (asnotation != ASNOTATION_UNDEFINED) { - bgp->asnotation = asnotation; - SET_FLAG(bgp->config, BGP_CONFIG_ASNOTATION); - } else - asn_str2asn_notation(bgp->as_pretty, NULL, &bgp->asnotation); if (BGP_DEBUG(zebra, ZEBRA)) { if (inst_type == BGP_INSTANCE_TYPE_DEFAULT) @@ -3498,6 +3487,18 @@ static struct bgp *bgp_create(as_t *as, const char *name, bgp->peer = list_new(); peer_init: + bgp->as = *as; + if (as_pretty) + bgp->as_pretty = XSTRDUP(MTYPE_BGP_NAME, as_pretty); + else + bgp->as_pretty = XSTRDUP(MTYPE_BGP_NAME, asn_asn2asplain(*as)); + + if (asnotation != ASNOTATION_UNDEFINED) { + bgp->asnotation = asnotation; + SET_FLAG(bgp->config, BGP_CONFIG_ASNOTATION); + } else + asn_str2asn_notation(bgp->as_pretty, NULL, &bgp->asnotation); + bgp->peer->cmp = (int (*)(void *, void *))peer_cmp; bgp->peerhash = hash_create(peer_hash_key_make, peer_hash_same, "BGP Peer Hash"); diff --git a/fpm/fpm_pb.h b/fpm/fpm_pb.h index 23d7e43993..8847365a37 100644 --- a/fpm/fpm_pb.h +++ b/fpm/fpm_pb.h @@ -111,6 +111,7 @@ static inline int fpm__nexthop__get(const Fpm__Nexthop *nh, nexthop->vrf_id = VRF_DEFAULT; nexthop->type = NEXTHOP_TYPE_IPV4; + memset(&nexthop->gate, 0, sizeof(nexthop->gate)); nexthop->gate.ipv4 = ipv4; if (ifindex) { nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX; diff --git a/lib/frr_pthread.c b/lib/frr_pthread.c index 3a4bc712fc..0b4d7c77ae 100644 --- a/lib/frr_pthread.c +++ b/lib/frr_pthread.c @@ -20,6 +20,7 @@ #include "zlog.h" #include "libfrr.h" #include "libfrr_trace.h" +#include "sigevent.h" DEFINE_MTYPE_STATIC(LIB, FRR_PTHREAD, "FRR POSIX Thread"); DEFINE_MTYPE_STATIC(LIB, PTHREAD_PRIM, "POSIX sync primitives"); @@ -185,10 +186,9 @@ int frr_pthread_run(struct frr_pthread *fpt, const pthread_attr_t *attr) assert(frr_is_after_fork || !"trying to start thread before fork()"); - /* Ensure we never handle signals on a background thread by blocking - * everything here (new thread inherits signal mask) - */ - sigfillset(&blocksigs); + sigemptyset(&blocksigs); + frr_sigset_add_mainonly(&blocksigs); + /* new thread inherits mask */ pthread_sigmask(SIG_BLOCK, &blocksigs, &oldsigs); frrtrace(1, frr_libfrr, frr_pthread_run, fpt->name); diff --git a/lib/frrcu.c b/lib/frrcu.c index b85c525c58..1e7ed99eff 100644 --- a/lib/frrcu.c +++ b/lib/frrcu.c @@ -42,6 +42,7 @@ #include "frrcu.h" #include "seqlock.h" #include "atomlist.h" +#include "sigevent.h" DEFINE_MTYPE_STATIC(LIB, RCU_THREAD, "RCU thread"); DEFINE_MTYPE_STATIC(LIB, RCU_NEXT, "RCU sequence barrier"); @@ -346,7 +347,19 @@ static void rcu_start(void) */ sigset_t oldsigs, blocksigs; - sigfillset(&blocksigs); + /* technically, the RCU thread is very poorly suited to run even just a + * crashlog handler, since zlog_sigsafe() could deadlock on transiently + * invalid (due to RCU) logging data structures + * + * but given that when we try to write a crashlog, we're already in + * b0rked territory anyway - give the crashlog handler a chance. + * + * (also cf. the SIGALRM usage in writing crashlogs to avoid hung + * processes on any kind of deadlock in crash handlers) + */ + sigemptyset(&blocksigs); + frr_sigset_add_mainonly(&blocksigs); + /* new thread inherits mask */ pthread_sigmask(SIG_BLOCK, &blocksigs, &oldsigs); rcu_active = true; diff --git a/lib/nexthop.c b/lib/nexthop.c index 98b05295b9..90931676e5 100644 --- a/lib/nexthop.c +++ b/lib/nexthop.c @@ -746,68 +746,30 @@ unsigned int nexthop_level(const struct nexthop *nexthop) return rv; } -/* Only hash word-sized things, let cmp do the rest. */ -uint32_t nexthop_hash_quick(const struct nexthop *nexthop) +uint32_t nexthop_hash(const struct nexthop *nexthop) { uint32_t key = 0x45afe398; - int i; - key = jhash_3words(nexthop->type, nexthop->vrf_id, - nexthop->nh_label_type, key); - - if (nexthop->nh_label) { - int labels = nexthop->nh_label->num_labels; + /* type, vrf, ifindex, ip addresses - see nexthop.h */ + key = _nexthop_hash_bytes(nexthop, key); - i = 0; + key = jhash_1word(nexthop->flags & NEXTHOP_FLAGS_HASHED, key); - while (labels >= 3) { - key = jhash_3words(nexthop->nh_label->label[i], - nexthop->nh_label->label[i + 1], - nexthop->nh_label->label[i + 2], - key); - labels -= 3; - i += 3; - } - - if (labels >= 2) { - key = jhash_2words(nexthop->nh_label->label[i], - nexthop->nh_label->label[i + 1], - key); - labels -= 2; - i += 2; - } + if (nexthop->nh_label) { + const struct mpls_label_stack *ls = nexthop->nh_label; - if (labels >= 1) - key = jhash_1word(nexthop->nh_label->label[i], key); + /* num_labels itself isn't useful to hash, if the number of + * labels is different, the hash value will change just due to + * that already. + */ + key = jhash(ls->label, sizeof(ls->label[0]) * ls->num_labels, key); } - key = jhash_2words(nexthop->ifindex, - CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK), - key); - /* Include backup nexthops, if present */ if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { int backups = nexthop->backup_num; - i = 0; - - while (backups >= 3) { - key = jhash_3words(nexthop->backup_idx[i], - nexthop->backup_idx[i + 1], - nexthop->backup_idx[i + 2], key); - backups -= 3; - i += 3; - } - - while (backups >= 2) { - key = jhash_2words(nexthop->backup_idx[i], - nexthop->backup_idx[i + 1], key); - backups -= 2; - i += 2; - } - - if (backups >= 1) - key = jhash_1word(nexthop->backup_idx[i], key); + key = jhash(nexthop->backup_idx, sizeof(nexthop->backup_idx[0]) * backups, key); } if (nexthop->nh_srv6) { @@ -842,31 +804,6 @@ uint32_t nexthop_hash_quick(const struct nexthop *nexthop) return key; } - -#define GATE_SIZE 4 /* Number of uint32_t words in struct g_addr */ - -/* For a more granular hash */ -uint32_t nexthop_hash(const struct nexthop *nexthop) -{ - uint32_t gate_src_rmap_raw[GATE_SIZE * 3] = {}; - /* Get all the quick stuff */ - uint32_t key = nexthop_hash_quick(nexthop); - - assert(((sizeof(nexthop->gate) + sizeof(nexthop->src) - + sizeof(nexthop->rmap_src)) - / 3) - == (GATE_SIZE * sizeof(uint32_t))); - - memcpy(gate_src_rmap_raw, &nexthop->gate, GATE_SIZE); - memcpy(gate_src_rmap_raw + GATE_SIZE, &nexthop->src, GATE_SIZE); - memcpy(gate_src_rmap_raw + (2 * GATE_SIZE), &nexthop->rmap_src, - GATE_SIZE); - - key = jhash2(gate_src_rmap_raw, (GATE_SIZE * 3), key); - - return key; -} - void nexthop_copy_no_recurse(struct nexthop *copy, const struct nexthop *nexthop, struct nexthop *rparent) diff --git a/lib/nexthop.h b/lib/nexthop.h index 02ea4d96f2..12e6424f27 100644 --- a/lib/nexthop.h +++ b/lib/nexthop.h @@ -8,6 +8,7 @@ #ifndef _LIB_NEXTHOP_H #define _LIB_NEXTHOP_H +#include "jhash.h" #include "prefix.h" #include "mpls.h" #include "vxlan.h" @@ -56,15 +57,48 @@ struct nexthop { struct nexthop *next; struct nexthop *prev; - /* - * What vrf is this nexthop associated with? + + /* begin of hashed data - all fields from here onwards are given to + * jhash() as one consecutive chunk. DO NOT create "padding holes". + * DO NOT insert pointers that need to be deep-hashed. + * + * static_assert() below needs to be updated when fields are added */ + char _hash_begin[0]; + + /* see above */ + enum nexthop_types_t type; + + /* What vrf is this nexthop associated with? */ vrf_id_t vrf_id; /* Interface index. */ ifindex_t ifindex; - enum nexthop_types_t type; + /* Type of label(s), if any */ + enum lsp_types_t nh_label_type; + + /* padding: keep 16 byte alignment here */ + + /* Nexthop address + * make sure all 16 bytes for IPv6 are zeroed when putting in an IPv4 + * address since the entire thing is hashed as-is + */ + union { + union g_addr gate; + enum blackhole_type bh_type; + }; + union g_addr src; + union g_addr rmap_src; /* Src is set via routemap */ + + /* end of hashed data - remaining fields in this struct are not + * directly fed into jhash(). Most of them are actually part of the + * hash but have special rules or handling attached. + */ + char _hash_end[0]; + + /* Weight of the nexthop ( for unequal cost ECMP ) */ + uint8_t weight; uint16_t flags; #define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */ @@ -82,18 +116,15 @@ struct nexthop { #define NEXTHOP_FLAG_EVPN (1 << 8) /* nexthop is EVPN */ #define NEXTHOP_FLAG_LINKDOWN (1 << 9) /* is not removed on link down */ + /* which flags are part of nexthop_hash(). Should probably be split + * off into a separate field... + */ +#define NEXTHOP_FLAGS_HASHED NEXTHOP_FLAG_ONLINK + #define NEXTHOP_IS_ACTIVE(flags) \ (CHECK_FLAG(flags, NEXTHOP_FLAG_ACTIVE) \ && !CHECK_FLAG(flags, NEXTHOP_FLAG_DUPLICATE)) - /* Nexthop address */ - union { - union g_addr gate; - enum blackhole_type bh_type; - }; - union g_addr src; - union g_addr rmap_src; /* Src is set via routemap */ - /* Nexthops obtained by recursive resolution. * * If the nexthop struct needs to be resolved recursively, @@ -104,15 +135,9 @@ struct nexthop { /* Recursive parent */ struct nexthop *rparent; - /* Type of label(s), if any */ - enum lsp_types_t nh_label_type; - /* Label(s) associated with this nexthop. */ struct mpls_label_stack *nh_label; - /* Weight of the nexthop ( for unequal cost ECMP ) */ - uint8_t weight; - /* Count and index of corresponding backup nexthop(s) in a backup list; * only meaningful if the HAS_BACKUP flag is set. */ @@ -138,6 +163,29 @@ struct nexthop { struct nexthop_srv6 *nh_srv6; }; +/* all hashed fields (including padding, if it is necessary to add) need to + * be listed in the static_assert below + */ + +#define S(field) sizeof(((struct nexthop *)NULL)->field) + +static_assert( + offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin) == + S(type) + S(vrf_id) + S(ifindex) + S(nh_label_type) + S(gate) + S(src) + S(rmap_src), + "struct nexthop contains padding, this can break things. insert _pad fields at appropriate places"); + +#undef S + +/* this is here to show exactly what is meant by the comments above about + * the hashing + */ +static inline uint32_t _nexthop_hash_bytes(const struct nexthop *nh, uint32_t seed) +{ + return jhash(&nh->_hash_begin, + offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin), + seed); +} + /* Utility to append one nexthop to another. */ #define NEXTHOP_APPEND(to, new) \ do { \ @@ -181,27 +229,11 @@ struct nexthop *nexthop_from_blackhole(enum blackhole_type bh_type, /* * Hash a nexthop. Suitable for use with hash tables. * - * This function uses the following values when computing the hash: - * - vrf_id - * - ifindex - * - type - * - gate - * - * nexthop - * The nexthop to hash - * - * Returns: - * 32-bit hash of nexthop + * Please double check the code on what is included in the hash, there was + * documentation here but it got outdated and the only thing worse than no + * doc is incorrect doc. */ uint32_t nexthop_hash(const struct nexthop *nexthop); -/* - * Hash a nexthop only on word-sized attributes: - * - vrf_id - * - ifindex - * - type - * - (some) flags - */ -uint32_t nexthop_hash_quick(const struct nexthop *nexthop); extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2); extern bool nexthop_same_no_labels(const struct nexthop *nh1, diff --git a/lib/sigevent.h b/lib/sigevent.h index 0b07f594c1..2c51ba3767 100644 --- a/lib/sigevent.h +++ b/lib/sigevent.h @@ -45,6 +45,33 @@ bool frr_sigevent_check(sigset_t *setp); /* check whether there are signals to handle, process any found */ extern int frr_sigevent_process(void); +/* Ensure we don't handle "application-type" signals on a secondary thread by + * blocking these signals when creating threads + * + * NB: SIGSEGV, SIGABRT, etc. must be allowed on all threads or we get no + * crashlogs. Since signals vary a little bit between platforms, below is a + * list of known things to go to the main thread. Any unknown signals should + * stay thread-local. + */ +static inline void frr_sigset_add_mainonly(sigset_t *blocksigs) +{ + /* signals we actively handle */ + sigaddset(blocksigs, SIGHUP); + sigaddset(blocksigs, SIGINT); + sigaddset(blocksigs, SIGTERM); + sigaddset(blocksigs, SIGUSR1); + + /* signals we don't actively use but that semantically belong */ + sigaddset(blocksigs, SIGUSR2); + sigaddset(blocksigs, SIGQUIT); + sigaddset(blocksigs, SIGCHLD); + sigaddset(blocksigs, SIGPIPE); + sigaddset(blocksigs, SIGTSTP); + sigaddset(blocksigs, SIGTTIN); + sigaddset(blocksigs, SIGTTOU); + sigaddset(blocksigs, SIGWINCH); +} + #ifdef __cplusplus } #endif diff --git a/lib/zclient.c b/lib/zclient.c index 0e832f0d8f..b5a8244eaf 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -2180,7 +2180,27 @@ struct nexthop *nexthop_from_zapi_nexthop(const struct zapi_nexthop *znh) n->type = znh->type; n->vrf_id = znh->vrf_id; n->ifindex = znh->ifindex; - n->gate = znh->gate; + + /* only copy values that have meaning - make sure "spare bytes" are + * left zeroed for hashing (look at _nexthop_hash_bytes) + */ + switch (znh->type) { + case NEXTHOP_TYPE_BLACKHOLE: + n->bh_type = znh->bh_type; + break; + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + n->gate.ipv4 = znh->gate.ipv4; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + n->gate.ipv6 = znh->gate.ipv6; + break; + case NEXTHOP_TYPE_IFINDEX: + /* nothing, ifindex is always copied */ + break; + } + n->srte_color = znh->srte_color; n->weight = znh->weight; diff --git a/nhrpd/netlink_arp.c b/nhrpd/netlink_arp.c index be909c862a..564e04deba 100644 --- a/nhrpd/netlink_arp.c +++ b/nhrpd/netlink_arp.c @@ -184,7 +184,7 @@ int nhrp_neighbor_operation(ZAPI_CALLBACK_ARGS) : (cmd == ZEBRA_NEIGH_ADDED) ? "new-neigh" : "del-neigh", &addr, ifp->name, &lladdr, ndm_state, c->used, c->cur.type); - if (cmd == ZEBRA_NEIGH_GET) { + if (cmd == ZEBRA_NEIGH_GET && ndm_state != ZEBRA_NEIGH_STATE_INCOMPLETE) { if (c->cur.type >= NHRP_CACHE_CACHED) { nhrp_cache_set_used(c, 1); debugf(NHRP_DEBUG_KERNEL, diff --git a/pbrd/pbr_nht.c b/pbrd/pbr_nht.c index ff252f8505..d5cee5f3e4 100644 --- a/pbrd/pbr_nht.c +++ b/pbrd/pbr_nht.c @@ -493,7 +493,7 @@ void pbr_nht_change_group(const char *name) } for (ALL_NEXTHOPS(nhgc->nhg, nhop)) { - struct pbr_nexthop_cache lookup; + struct pbr_nexthop_cache lookup = {}; struct pbr_nexthop_cache *pnhc; lookup.nexthop = *nhop; @@ -565,7 +565,7 @@ void pbr_nht_add_individual_nexthop(struct pbr_map_sequence *pbrms, struct pbr_nexthop_group_cache *pnhgc; struct pbr_nexthop_group_cache find; struct pbr_nexthop_cache *pnhc; - struct pbr_nexthop_cache lookup; + struct pbr_nexthop_cache lookup = {}; struct nexthop *nh; char buf[PBR_NHC_NAMELEN]; @@ -624,7 +624,7 @@ static void pbr_nht_release_individual_nexthop(struct pbr_map_sequence *pbrms) struct pbr_nexthop_group_cache *pnhgc; struct pbr_nexthop_group_cache find; struct pbr_nexthop_cache *pnhc; - struct pbr_nexthop_cache lup; + struct pbr_nexthop_cache lup = {}; struct nexthop *nh; enum nexthop_types_t nh_type = 0; @@ -690,7 +690,7 @@ struct pbr_nexthop_group_cache *pbr_nht_add_group(const char *name) DEBUGD(&pbr_dbg_nht, "%s: Retrieved NHGC @ %p", __func__, pnhgc); for (ALL_NEXTHOPS(nhgc->nhg, nhop)) { - struct pbr_nexthop_cache lookupc; + struct pbr_nexthop_cache lookupc = {}; struct pbr_nexthop_cache *pnhc; lookupc.nexthop = *nhop; diff --git a/pbrd/pbr_vty.c b/pbrd/pbr_vty.c index 08fe56c7bb..aa98913571 100644 --- a/pbrd/pbr_vty.c +++ b/pbrd/pbr_vty.c @@ -1488,7 +1488,7 @@ pbrms_nexthop_group_write_individual_nexthop( { struct pbr_nexthop_group_cache find; struct pbr_nexthop_group_cache *pnhgc; - struct pbr_nexthop_cache lookup; + struct pbr_nexthop_cache lookup = {}; struct pbr_nexthop_cache *pnhc; memset(&find, 0, sizeof(find)); diff --git a/pimd/pim_ifchannel.c b/pimd/pim_ifchannel.c index 8f9e41039a..3ef2ccd83e 100644 --- a/pimd/pim_ifchannel.c +++ b/pimd/pim_ifchannel.c @@ -233,10 +233,16 @@ void pim_ifchannel_delete_all(struct interface *ifp) void delete_on_noinfo(struct pim_ifchannel *ch) { - if (ch->local_ifmembership == PIM_IFMEMBERSHIP_NOINFO - && ch->ifjoin_state == PIM_IFJOIN_NOINFO - && ch->t_ifjoin_expiry_timer == NULL) + struct pim_upstream *up = ch->upstream; + /* + * (S,G) with no active traffic, KAT expires, PPT expries, + * channel state is NoInfo + */ + if (ch->local_ifmembership == PIM_IFMEMBERSHIP_NOINFO && + ch->ifjoin_state == PIM_IFJOIN_NOINFO && + (ch->t_ifjoin_expiry_timer == NULL || (up && !pim_upstream_is_kat_running(up)))) { pim_ifchannel_delete(ch); + } } void pim_ifchannel_ifjoin_switch(const char *caller, struct pim_ifchannel *ch, diff --git a/pimd/pim_upstream.c b/pimd/pim_upstream.c index 7417f31137..ddd9fe1ff0 100644 --- a/pimd/pim_upstream.c +++ b/pimd/pim_upstream.c @@ -304,7 +304,7 @@ static void on_join_timer(struct event *t) } /* - * In the case of a HFR we will not ahve anyone to send this to. + * In the case of a FHR we will not ahve anyone to send this to. */ if (PIM_UPSTREAM_FLAG_TEST_FHR(up->flags)) return; diff --git a/tests/topotests/v6_nexthop_group_recursive_resolution/r1/frr.conf b/tests/topotests/v6_nexthop_group_recursive_resolution/r1/frr.conf new file mode 100644 index 0000000000..f4da11af06 --- /dev/null +++ b/tests/topotests/v6_nexthop_group_recursive_resolution/r1/frr.conf @@ -0,0 +1,4 @@ +int r1-eth0 + ipv6 address fc00::1/64 + +ipv6 route 1::1/128 fc00::2 diff --git a/tests/topotests/v6_nexthop_group_recursive_resolution/test_v6_nexthop_group_recursive_resolution.py b/tests/topotests/v6_nexthop_group_recursive_resolution/test_v6_nexthop_group_recursive_resolution.py new file mode 100644 index 0000000000..587a951c85 --- /dev/null +++ b/tests/topotests/v6_nexthop_group_recursive_resolution/test_v6_nexthop_group_recursive_resolution.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: ISC + +# +# Copyright (c) 2024 by Nvidia Corporation +# Donald Sharp +# + +""" +Check that the v6 nexthop recursive resolution works when it changes +""" + +import os +import sys +import json +import pytest +import functools + +CWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(CWD, "../")) + +# pylint: disable=C0413 +from lib import topotest +from lib.topogen import Topogen, TopoRouter, get_topogen +from lib.common_config import step + +pytestmark = [pytest.mark.staticd] + + +def build_topo(tgen): + + tgen.add_router("r1") + + switch = tgen.add_switch("s1") + switch.add_link(tgen.gears["r1"]) + + +def setup_module(mod): + tgen = Topogen(build_topo, mod.__name__) + tgen.start_topology() + + router_list = tgen.routers() + + for rname, router in router_list.items(): + router.load_frr_config(os.path.join(CWD, "{}/frr.conf".format(rname)), + [(TopoRouter.RD_MGMTD, None), + (TopoRouter.RD_ZEBRA, None), + (TopoRouter.RD_STATIC, None), + (TopoRouter.RD_SHARP, None)]) + + tgen.start_router() + + +def teardown_module(mod): + tgen = get_topogen() + tgen.stop_topology() + + +def test_recursive_v6_nexthop_generation(): + tgen = get_topogen() + + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + step("Testing v6 nexthop resolution") + + #assert False + router = tgen.gears["r1"] + + def _v6_converge_1_1_initial(): + output = json.loads( + router.vtysh_cmd("show ipv6 route 1::1 json")) + + expected = { + "1::1/128":[ + { + "prefix":"1::1/128", + "prefixLen":128, + "protocol":"static", + "vrfName":"default", + "selected":True, + "destSelected":True, + "distance":1, + "metric":0, + "installed":True, + "table":254, + "nexthops":[ + { + "fib":True, + "ip":"fc00::2", + "afi":"ipv6", + "interfaceName":"r1-eth0", + "active":True, + "weight":1 + } + ] + } + ] + } + + return topotest.json_cmp(output, expected) + + test_func = functools.partial(_v6_converge_1_1_initial) + _, result = topotest.run_and_expect(test_func, None, count=60, wait=0.5) + assert result is None, "Failed to install v6 1::1 route" + + router.vtysh_cmd("sharp install routes 2::2 nexthop 1::1 1") + router.vtysh_cmd("conf\nipv6 route 1::1/128 fc00::3\nno ipv6 route 1::1/128 fc00::2") + + def _v6_converge_1_1_post(): + output = json.loads( + router.vtysh_cmd("show ipv6 route 1::1 json")) + + expected = { + "1::1/128":[ + { + "prefix":"1::1/128", + "prefixLen":128, + "protocol":"static", + "vrfName":"default", + "selected":True, + "destSelected":True, + "distance":1, + "metric":0, + "installed":True, + "table":254, + "nexthops":[ + { + "fib":True, + "ip":"fc00::3", + "afi":"ipv6", + "interfaceName":"r1-eth0", + "active":True, + "weight":1 + } + ] + } + ] + } + + return topotest.json_cmp(output, expected) + + test_func = functools.partial(_v6_converge_1_1_post) + _, result = topotest.run_and_expect(test_func, None, count=60, wait=0.5) + assert result is None, "Failed to change v6 1::1 route" + + router.vtysh_cmd("sharp install routes 2::2 nexthop 1::1 1") + + def _v6_change_2_2_post(): + output = json.loads( + router.vtysh_cmd("show ipv6 route 2::2 json")) + + expected = { + "2::2/128":[ + { + "prefix":"2::2/128", + "prefixLen":128, + "protocol":"sharp", + "vrfName":"default", + "selected":True, + "destSelected":True, + "distance":150, + "metric":0, + "installed":True, + "table":254, + "nexthops":[ + { + "fib":True, + "ip":"fc00::3", + "afi":"ipv6", + "interfaceName":"r1-eth0", + "active":True, + "weight":1 + } + ] + } + ] + } + + return topotest.json_cmp(output, expected) + + test_func = functools.partial(_v6_change_2_2_post) + _, result = topotest.run_and_expect(test_func, None, count=60, wait=0.5) + assert result is None, "Failed to see sharpd route correctly" + + +if __name__ == "__main__": + args = ["-s"] + sys.argv[1:] + sys.exit(pytest.main(args)) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index d1237ec7f8..252b0ce7c7 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -572,8 +572,7 @@ bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) /* Nexthops should be in-order, so we simply compare them in-place */ for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop; nexthop1 && nexthop2; - nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { - + nexthop1 = nexthop_next(nexthop1), nexthop2 = nexthop_next(nexthop2)) { if (!nhg_compare_nexthops(nexthop1, nexthop2)) return false; } @@ -608,8 +607,7 @@ bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop, nexthop2 = nhe2->backup_info->nhe->nhg.nexthop; nexthop1 && nexthop2; - nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { - + nexthop1 = nexthop_next(nexthop1), nexthop2 = nexthop_next(nexthop2)) { if (!nhg_compare_nexthops(nexthop1, nexthop2)) return false; } diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c index 46afbcecfa..3acd53496d 100644 --- a/zebra/zebra_routemap.c +++ b/zebra/zebra_routemap.c @@ -964,10 +964,11 @@ route_set_src(void *rule, const struct prefix *prefix, void *object) /* set src compilation. */ static void *route_set_src_compile(const char *arg) { - union g_addr src, *psrc; + union g_addr src = {}, *psrc; - if ((inet_pton(AF_INET6, arg, &src.ipv6) == 1) - || (inet_pton(AF_INET, arg, &src.ipv4) == 1)) { + /* IPv4 first, to ensure no garbage in the 12 unused bytes */ + if ((inet_pton(AF_INET, arg, &src.ipv4) == 1) || + (inet_pton(AF_INET6, arg, &src.ipv6) == 1)) { psrc = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(union g_addr)); *psrc = src; return psrc; |
