diff options
Diffstat (limited to 'zebra')
37 files changed, 2018 insertions, 336 deletions
diff --git a/zebra/connected.c b/zebra/connected.c index 70ea2e3805..c885c533e6 100644 --- a/zebra/connected.c +++ b/zebra/connected.c @@ -402,10 +402,10 @@ void connected_down(struct interface *ifp, struct connected *ifc) * head. */ rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, - 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false, true); + 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); rib_delete(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, - 0, 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false, true); + 0, 0, &p, NULL, &nh, 0, zvrf->table_id, 0, 0, false); /* Schedule LSP forwarding entries for processing, if appropriate. */ if (zvrf->vrf->vrf_id == VRF_DEFAULT) { diff --git a/zebra/debug_nl.c b/zebra/debug_nl.c new file mode 100644 index 0000000000..842579f89e --- /dev/null +++ b/zebra/debug_nl.c @@ -0,0 +1,1246 @@ +/* + * Copyright (c) 2018 Rafael Zalamena + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <zebra.h> + +#if defined(HAVE_NETLINK) && defined(NETLINK_DEBUG) + +#include <sys/socket.h> + +#include <linux/netlink.h> +#include <linux/nexthop.h> +#include <linux/rtnetlink.h> +#include <net/if_arp.h> + +#include <stdio.h> +#include <stdint.h> + +#include "zebra/rt_netlink.h" + +const char *nlmsg_type2str(uint16_t type) +{ + switch (type) { + /* Generic */ + case NLMSG_NOOP: + return "NOOP"; + case NLMSG_ERROR: + return "ERROR"; + case NLMSG_DONE: + return "DONE"; + case NLMSG_OVERRUN: + return "OVERRUN"; + + /* RTM */ + case RTM_NEWLINK: + return "NEWLINK"; + case RTM_DELLINK: + return "DELLINK"; + case RTM_GETLINK: + return "GETLINK"; + case RTM_SETLINK: + return "SETLINK"; + + case RTM_NEWADDR: + return "NEWADDR"; + case RTM_DELADDR: + return "DELADDR"; + case RTM_GETADDR: + return "GETADDR"; + + case RTM_NEWROUTE: + return "NEWROUTE"; + case RTM_DELROUTE: + return "DELROUTE"; + case RTM_GETROUTE: + return "GETROUTE"; + + case RTM_NEWNEIGH: + return "NEWNEIGH"; + case RTM_DELNEIGH: + return "DELNEIGH"; + case RTM_GETNEIGH: + return "GETNEIGH"; + + case RTM_NEWRULE: + return "NEWRULE"; + case RTM_DELRULE: + return "DELRULE"; + case RTM_GETRULE: + return "GETRULE"; + + case RTM_NEWNEXTHOP: + return "NEWNEXTHOP"; + case RTM_DELNEXTHOP: + return "DELNEXTHOP"; + case RTM_GETNEXTHOP: + return "GETNEXTHOP"; + + default: + return "UNKNOWN"; + } +} + +const char *af_type2str(int type) +{ + switch (type) { + case AF_UNSPEC: + return "AF_UNSPEC"; + case AF_UNIX: + return "AF_UNIX"; + case AF_INET: + return "AF_INET"; + case AF_INET6: + return "AF_INET6"; + case AF_BRIDGE: + return "AF_BRIDGE"; + case AF_NETLINK: + return "AF_NETLINK"; +#ifdef AF_MPLS + case AF_MPLS: + return "AF_MPLS"; +#endif /* AF_MPLS */ + case AF_BLUETOOTH: + return "AF_BLUETOOTH"; + case AF_VSOCK: + return "AF_VSOCK"; + case AF_KEY: + return "AF_KEY"; + case AF_PACKET: + return "AF_PACKET"; + default: + return "UNKNOWN"; + } +} + +const char *ifi_type2str(int type) +{ + switch (type) { + case ARPHRD_ETHER: + return "ETHER"; + case ARPHRD_EETHER: + return "EETHER"; + case ARPHRD_NETROM: + return "NETROM"; + case ARPHRD_AX25: + return "AX25"; + case ARPHRD_PRONET: + return "PRONET"; + case ARPHRD_CHAOS: + return "CHAOS"; + case ARPHRD_IEEE802: + return "IEEE802"; + case ARPHRD_ARCNET: + return "ARCNET"; + case ARPHRD_APPLETLK: + return "APPLETLK"; + case ARPHRD_DLCI: + return "DLCI"; + case ARPHRD_ATM: + return "ATM"; + case ARPHRD_METRICOM: + return "METRICOM"; + case ARPHRD_IEEE1394: + return "IEEE1394"; + case ARPHRD_EUI64: + return "EUI64"; + case ARPHRD_INFINIBAND: + return "INFINIBAND"; + case ARPHRD_SLIP: + return "SLIP"; + case ARPHRD_CSLIP: + return "CSLIP"; + case ARPHRD_SLIP6: + return "SLIP6"; + case ARPHRD_CSLIP6: + return "CSLIP6"; + case ARPHRD_RSRVD: + return "RSRVD"; + case ARPHRD_ADAPT: + return "ADAPT"; + case ARPHRD_ROSE: + return "ROSE"; + case ARPHRD_X25: + return "X25"; + case ARPHRD_PPP: + return "PPP"; + case ARPHRD_HDLC: + return "HDLC"; + case ARPHRD_LAPB: + return "LAPB"; + case ARPHRD_DDCMP: + return "DDCMP"; + case ARPHRD_RAWHDLC: + return "RAWHDLC"; + case ARPHRD_TUNNEL: + return "TUNNEL"; + case ARPHRD_TUNNEL6: + return "TUNNEL6"; + case ARPHRD_FRAD: + return "FRAD"; + case ARPHRD_SKIP: + return "SKIP"; + case ARPHRD_LOOPBACK: + return "LOOPBACK"; + case ARPHRD_LOCALTLK: + return "LOCALTLK"; + case ARPHRD_FDDI: + return "FDDI"; + case ARPHRD_BIF: + return "BIF"; + case ARPHRD_SIT: + return "SIT"; + case ARPHRD_IPDDP: + return "IPDDP"; + case ARPHRD_IPGRE: + return "IPGRE"; + case ARPHRD_PIMREG: + return "PIMREG"; + case ARPHRD_HIPPI: + return "HIPPI"; + case ARPHRD_ASH: + return "ASH"; + case ARPHRD_ECONET: + return "ECONET"; + case ARPHRD_IRDA: + return "IRDA"; + case ARPHRD_FCPP: + return "FCPP"; + case ARPHRD_FCAL: + return "FCAL"; + case ARPHRD_FCPL: + return "FCPL"; + case ARPHRD_FCFABRIC: + return "FCFABRIC"; + case ARPHRD_IEEE802_TR: + return "IEEE802_TR"; + case ARPHRD_IEEE80211: + return "IEEE80211"; + case ARPHRD_IEEE80211_PRISM: + return "IEEE80211_PRISM"; + case ARPHRD_IEEE80211_RADIOTAP: + return "IEEE80211_RADIOTAP"; + case ARPHRD_IEEE802154: + return "IEEE802154"; +#ifdef ARPHRD_VSOCKMON + case ARPHRD_VSOCKMON: + return "VSOCKMON"; +#endif /* ARPHRD_VSOCKMON */ + case ARPHRD_VOID: + return "VOID"; + case ARPHRD_NONE: + return "NONE"; + default: + return "UNKNOWN"; + } +} + +const char *rta_type2str(int type) +{ + switch (type) { + case IFLA_UNSPEC: + return "UNSPEC"; + case IFLA_ADDRESS: + return "ADDRESS"; + case IFLA_BROADCAST: + return "BROADCAST"; + case IFLA_IFNAME: + return "IFNAME"; + case IFLA_MTU: + return "MTU"; + case IFLA_LINK: + return "LINK"; + case IFLA_QDISC: + return "QDISC"; + case IFLA_STATS: + return "STATS"; + case IFLA_COST: + return "COST"; + case IFLA_PRIORITY: + return "PRIORITY"; + case IFLA_MASTER: + return "MASTER"; + case IFLA_WIRELESS: + return "WIRELESS"; + case IFLA_PROTINFO: + return "PROTINFO"; + case IFLA_TXQLEN: + return "TXQLEN"; + case IFLA_MAP: + return "MAP"; + case IFLA_WEIGHT: + return "WEIGHT"; + case IFLA_OPERSTATE: + return "OPERSTATE"; + case IFLA_LINKMODE: + return "LINKMODE"; + case IFLA_LINKINFO: + return "LINKINFO"; + case IFLA_NET_NS_PID: + return "NET_NS_PID"; + case IFLA_IFALIAS: + return "IFALIAS"; + case IFLA_NUM_VF: + return "NUM_VF"; + case IFLA_VFINFO_LIST: + return "VFINFO_LIST"; + case IFLA_STATS64: + return "STATS64"; + case IFLA_VF_PORTS: + return "VF_PORTS"; + case IFLA_PORT_SELF: + return "PORT_SELF"; + case IFLA_AF_SPEC: + return "AF_SPEC"; + case IFLA_GROUP: + return "GROUP"; + case IFLA_NET_NS_FD: + return "NET_NS_FD"; + case IFLA_EXT_MASK: + return "EXT_MASK"; + case IFLA_PROMISCUITY: + return "PROMISCUITY"; + case IFLA_NUM_TX_QUEUES: + return "NUM_TX_QUEUES"; + case IFLA_NUM_RX_QUEUES: + return "NUM_RX_QUEUES"; + case IFLA_CARRIER: + return "CARRIER"; + case IFLA_PHYS_PORT_ID: + return "PHYS_PORT_ID"; + case IFLA_CARRIER_CHANGES: + return "CARRIER_CHANGES"; + case IFLA_PHYS_SWITCH_ID: + return "PHYS_SWITCH_ID"; + case IFLA_LINK_NETNSID: + return "LINK_NETNSID"; + case IFLA_PHYS_PORT_NAME: + return "PHYS_PORT_NAME"; + case IFLA_PROTO_DOWN: + return "PROTO_DOWN"; +#ifdef IFLA_GSO_MAX_SEGS + case IFLA_GSO_MAX_SEGS: + return "GSO_MAX_SEGS"; +#endif /* IFLA_GSO_MAX_SEGS */ +#ifdef IFLA_GSO_MAX_SIZE + case IFLA_GSO_MAX_SIZE: + return "GSO_MAX_SIZE"; +#endif /* IFLA_GSO_MAX_SIZE */ +#ifdef IFLA_PAD + case IFLA_PAD: + return "PAD"; +#endif /* IFLA_PAD */ +#ifdef IFLA_XDP + case IFLA_XDP: + return "XDP"; +#endif /* IFLA_XDP */ +#ifdef IFLA_EVENT + case IFLA_EVENT: + return "EVENT"; +#endif /* IFLA_EVENT */ + default: + return "UNKNOWN"; + } +} + +const char *rtm_type2str(int type) +{ + switch (type) { + case RTN_UNSPEC: + return "UNSPEC"; + case RTN_UNICAST: + return "UNICAST"; + case RTN_LOCAL: + return "LOCAL"; + case RTN_BROADCAST: + return "BROADCAST"; + case RTN_ANYCAST: + return "ANYCAST"; + case RTN_MULTICAST: + return "MULTICAST"; + case RTN_BLACKHOLE: + return "BLACKHOLE"; + case RTN_UNREACHABLE: + return "UNREACHABLE"; + case RTN_PROHIBIT: + return "PROHIBIT"; + case RTN_THROW: + return "THROW"; + case RTN_NAT: + return "NAT"; + case RTN_XRESOLVE: + return "XRESOLVE"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_protocol2str(int type) +{ + switch (type) { + case RTPROT_UNSPEC: + return "UNSPEC"; + case RTPROT_REDIRECT: + return "REDIRECT"; + case RTPROT_KERNEL: + return "KERNEL"; + case RTPROT_BOOT: + return "BOOT"; + case RTPROT_STATIC: + return "STATIC"; + case RTPROT_GATED: + return "GATED"; + case RTPROT_RA: + return "RA"; + case RTPROT_MRT: + return "MRT"; + case RTPROT_ZEBRA: + return "ZEBRA"; + case RTPROT_BIRD: + return "BIRD"; + case RTPROT_DNROUTED: + return "DNROUTED"; + case RTPROT_XORP: + return "XORP"; + case RTPROT_NTK: + return "NTK"; + case RTPROT_DHCP: + return "DHCP"; + case RTPROT_MROUTED: + return "MROUTED"; + case RTPROT_BABEL: + return "BABEL"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_scope2str(int type) +{ + switch (type) { + case RT_SCOPE_UNIVERSE: + return "UNIVERSE"; + case RT_SCOPE_SITE: + return "SITE"; + case RT_SCOPE_LINK: + return "LINK"; + case RT_SCOPE_HOST: + return "HOST"; + case RT_SCOPE_NOWHERE: + return "NOWHERE"; + default: + return "UNKNOWN"; + } +} + +const char *rtm_rta2str(int type) +{ + switch (type) { + case RTA_UNSPEC: + return "UNSPEC"; + case RTA_DST: + return "DST"; + case RTA_SRC: + return "SRC"; + case RTA_IIF: + return "IIF"; + case RTA_OIF: + return "OIF"; + case RTA_GATEWAY: + return "GATEWAY"; + case RTA_PRIORITY: + return "PRIORITY"; + case RTA_PREF: + return "PREF"; + case RTA_PREFSRC: + return "PREFSRC"; + case RTA_MARK: + return "MARK"; + case RTA_METRICS: + return "METRICS"; + case RTA_MULTIPATH: + return "MULTIPATH"; + case RTA_PROTOINFO: + return "PROTOINFO"; + case RTA_FLOW: + return "FLOW"; + case RTA_CACHEINFO: + return "CACHEINFO"; + case RTA_TABLE: + return "TABLE"; + case RTA_MFC_STATS: + return "MFC_STATS"; + case RTA_NH_ID: + return "NH_ID"; + default: + return "UNKNOWN"; + } +} + +const char *neigh_rta2str(int type) +{ + switch (type) { + case NDA_UNSPEC: + return "UNSPEC"; + case NDA_DST: + return "DST"; + case NDA_LLADDR: + return "LLADDR"; + case NDA_CACHEINFO: + return "CACHEINFO"; + case NDA_PROBES: + return "PROBES"; + case NDA_VLAN: + return "VLAN"; + case NDA_PORT: + return "PORT"; + case NDA_VNI: + return "VNI"; + case NDA_IFINDEX: + return "IFINDEX"; + case NDA_MASTER: + return "MASTER"; + case NDA_LINK_NETNSID: + return "LINK_NETNSID"; + default: + return "UNKNOWN"; + } +} + +const char *ifa_rta2str(int type) +{ + switch (type) { + case IFA_UNSPEC: + return "UNSPEC"; + case IFA_ADDRESS: + return "ADDRESS"; + case IFA_LOCAL: + return "LOCAL"; + case IFA_LABEL: + return "LABEL"; + case IFA_BROADCAST: + return "BROADCAST"; + case IFA_ANYCAST: + return "ANYCAST"; + case IFA_CACHEINFO: + return "CACHEINFO"; + case IFA_MULTICAST: + return "MULTICAST"; + case IFA_FLAGS: + return "FLAGS"; + default: + return "UNKNOWN"; + } +} + +const char *nhm_rta2str(int type) +{ + switch (type) { + case NHA_UNSPEC: + return "UNSPEC"; + case NHA_ID: + return "ID"; + case NHA_GROUP: + return "GROUP"; + case NHA_GROUP_TYPE: + return "GROUP_TYPE"; + case NHA_BLACKHOLE: + return "BLACKHOLE"; + case NHA_OIF: + return "OIF"; + case NHA_GATEWAY: + return "GATEWAY"; + case NHA_ENCAP_TYPE: + return "ENCAP_TYPE"; + case NHA_ENCAP: + return "ENCAP"; + case NHA_GROUPS: + return "GROUPS"; + case NHA_MASTER: + return "MASTER"; + default: + return "UNKNOWN"; + } +} + +static inline void flag_write(int flags, int flag, const char *flagstr, + char *buf, size_t buflen) +{ + if (CHECK_FLAG(flags, flag) == 0) + return; + + if (buf[0]) + strlcat(buf, ",", buflen); + + strlcat(buf, flagstr, buflen); +} + +const char *nlmsg_flags2str(uint16_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + /* Specific flags. */ + flag_write(flags, NLM_F_REQUEST, "REQUEST", buf, buflen); + flag_write(flags, NLM_F_MULTI, "MULTI", buf, buflen); + flag_write(flags, NLM_F_ACK, "ACK", buf, buflen); + flag_write(flags, NLM_F_ECHO, "ECHO", buf, buflen); + flag_write(flags, NLM_F_DUMP, "DUMP", buf, buflen); + + /* Netlink family type dependent. */ + flag_write(flags, 0x0100, "(ROOT|REPLACE|CAPPED)", buf, buflen); + flag_write(flags, 0x0200, "(MATCH|EXCLUDE|ACK_TLVS)", buf, buflen); + flag_write(flags, 0x0400, "(ATOMIC|CREATE)", buf, buflen); + flag_write(flags, 0x0800, "(DUMP|APPEND)", buf, buflen); + + return (bufp); +} + +const char *if_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, IFF_UP, "UP", buf, buflen); + flag_write(flags, IFF_BROADCAST, "BROADCAST", buf, buflen); + flag_write(flags, IFF_DEBUG, "DEBUG", buf, buflen); + flag_write(flags, IFF_LOOPBACK, "LOOPBACK", buf, buflen); + flag_write(flags, IFF_POINTOPOINT, "POINTOPOINT", buf, buflen); + flag_write(flags, IFF_NOTRAILERS, "NOTRAILERS", buf, buflen); + flag_write(flags, IFF_RUNNING, "RUNNING", buf, buflen); + flag_write(flags, IFF_NOARP, "NOARP", buf, buflen); + flag_write(flags, IFF_PROMISC, "PROMISC", buf, buflen); + flag_write(flags, IFF_ALLMULTI, "ALLMULTI", buf, buflen); + flag_write(flags, IFF_MASTER, "MASTER", buf, buflen); + flag_write(flags, IFF_SLAVE, "SLAVE", buf, buflen); + flag_write(flags, IFF_MULTICAST, "MULTICAST", buf, buflen); + flag_write(flags, IFF_PORTSEL, "PORTSEL", buf, buflen); + flag_write(flags, IFF_AUTOMEDIA, "AUTOMEDIA", buf, buflen); + flag_write(flags, IFF_DYNAMIC, "DYNAMIC", buf, buflen); + + return (bufp); +} + +const char *rtm_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, RTM_F_NOTIFY, "NOTIFY", buf, buflen); + flag_write(flags, RTM_F_CLONED, "CLONED", buf, buflen); + flag_write(flags, RTM_F_EQUALIZE, "EQUALIZE", buf, buflen); + + return (bufp); +} + +const char *neigh_state2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, NUD_INCOMPLETE, "INCOMPLETE", buf, buflen); + flag_write(flags, NUD_REACHABLE, "REACHABLE", buf, buflen); + flag_write(flags, NUD_STALE, "STALE", buf, buflen); + flag_write(flags, NUD_DELAY, "DELAY", buf, buflen); + flag_write(flags, NUD_PROBE, "PROBE", buf, buflen); + flag_write(flags, NUD_FAILED, "FAILED", buf, buflen); + flag_write(flags, NUD_NOARP, "NOARP", buf, buflen); + flag_write(flags, NUD_PERMANENT, "PERMANENT", buf, buflen); + + return (bufp); +} + +const char *neigh_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, NTF_USE, "USE", buf, buflen); + flag_write(flags, NTF_SELF, "SELF", buf, buflen); + flag_write(flags, NTF_MASTER, "MASTER", buf, buflen); + flag_write(flags, NTF_PROXY, "PROXY", buf, buflen); + flag_write(flags, NTF_EXT_LEARNED, "EXT_LEARNED", buf, buflen); +#ifdef NTF_OFFLOADED + flag_write(flags, NTF_OFFLOADED, "OFFLOADED", buf, buflen); +#endif /* NTF_OFFLOADED */ + flag_write(flags, NTF_ROUTER, "ROUTER", buf, buflen); + + return (bufp); +} + +const char *ifa_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, IFA_F_SECONDARY, "SECONDARY", buf, buflen); + flag_write(flags, IFA_F_NODAD, "NODAD", buf, buflen); + flag_write(flags, IFA_F_OPTIMISTIC, "OPTIMISTIC", buf, buflen); + flag_write(flags, IFA_F_DADFAILED, "DADFAILED", buf, buflen); + flag_write(flags, IFA_F_HOMEADDRESS, "HOMEADDRESS", buf, buflen); + flag_write(flags, IFA_F_DEPRECATED, "DEPRECATED", buf, buflen); + flag_write(flags, IFA_F_TENTATIVE, "TENTATIVE", buf, buflen); + flag_write(flags, IFA_F_PERMANENT, "PERMANENT", buf, buflen); + flag_write(flags, IFA_F_MANAGETEMPADDR, "MANAGETEMPADDR", buf, buflen); + flag_write(flags, IFA_F_NOPREFIXROUTE, "NOPREFIXROUTE", buf, buflen); + flag_write(flags, IFA_F_MCAUTOJOIN, "MCAUTOJOIN", buf, buflen); + flag_write(flags, IFA_F_STABLE_PRIVACY, "STABLE_PRIVACY", buf, buflen); + + return (bufp); +} + +const char *nh_flags2str(uint32_t flags, char *buf, size_t buflen) +{ + const char *bufp = buf; + + *buf = 0; + flag_write(flags, RTNH_F_DEAD, "DEAD", buf, buflen); + flag_write(flags, RTNH_F_PERVASIVE, "PERVASIVE", buf, buflen); + flag_write(flags, RTNH_F_ONLINK, "ONLINK", buf, buflen); + flag_write(flags, RTNH_F_OFFLOAD, "OFFLOAD", buf, buflen); + flag_write(flags, RTNH_F_LINKDOWN, "LINKDOWN", buf, buflen); + flag_write(flags, RTNH_F_UNRESOLVED, "UNRESOLVED", buf, buflen); + + return (bufp); +} + +/* + * Netlink abstractions. + */ +static void nllink_linkinfo_dump(struct rtattr *rta, size_t msglen) +{ + size_t plen; + char dbuf[128]; + +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" linkinfo [len=%d (payload=%zu) type=(%d) %s]", + rta->rta_len, plen, rta->rta_type, + rta_type2str(rta->rta_type)); + switch (rta->rta_type) { + case IFLA_INFO_KIND: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + case IFLA_INFO_SLAVE_KIND: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nllink_dump(struct ifinfomsg *ifi, size_t msglen) +{ + uint8_t *datap; + struct rtattr *rta; + size_t plen, it; + uint32_t u32v; + char bytestr[16]; + char dbuf[128]; + + /* Get the first attribute and go from there. */ + rta = IFLA_RTA(ifi); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, rta_type2str(rta->rta_type)); + switch (rta->rta_type) { + case IFLA_IFNAME: + case IFLA_IFALIAS: + if (plen == 0) { + zlog_debug(" invalid length"); + break; + } + + snprintf(dbuf, sizeof(dbuf), "%s", (char *)RTA_DATA(rta)); + zlog_debug(" %s", dbuf); + break; + + case IFLA_MTU: + case IFLA_TXQLEN: + case IFLA_NUM_TX_QUEUES: + case IFLA_NUM_RX_QUEUES: + case IFLA_GROUP: + case IFLA_PROMISCUITY: +#ifdef IFLA_GSO_MAX_SEGS + case IFLA_GSO_MAX_SEGS: +#endif /* IFLA_GSO_MAX_SEGS */ +#ifdef IFLA_GSO_MAX_SIZE + case IFLA_GSO_MAX_SIZE: +#endif /* IFLA_GSO_MAX_SIZE */ + case IFLA_CARRIER_CHANGES: + case IFLA_MASTER: + if (plen < sizeof(uint32_t)) { + zlog_debug(" invalid length"); + break; + } + + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case IFLA_ADDRESS: + datap = RTA_DATA(rta); + dbuf[0] = 0; + for (it = 0; it < plen; it++) { + snprintf(bytestr, sizeof(bytestr), "%02X:", *datap); + strlcat(dbuf, bytestr, sizeof(dbuf)); + datap++; + } + /* Remove trailing ':'. */ + if (dbuf[0]) + dbuf[strlen(dbuf) - 1] = 0; + + zlog_debug(" %s", dbuf[0] ? dbuf : "<empty>"); + break; + + case IFLA_LINKINFO: + nllink_linkinfo_dump(RTA_DATA(rta), msglen); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlroute_dump(struct rtmsg *rtm, size_t msglen) +{ + struct rtattr *rta; + size_t plen; + uint32_t u32v; + + /* Get the first attribute and go from there. */ + rta = RTM_RTA(rtm); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, rtm_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case RTA_IIF: + case RTA_OIF: + case RTA_PRIORITY: + case RTA_TABLE: + case RTA_NH_ID: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case RTA_GATEWAY: + case RTA_DST: + case RTA_SRC: + case RTA_PREFSRC: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlneigh_dump(struct ndmsg *ndm, size_t msglen) +{ + struct rtattr *rta; + uint8_t *datap; + size_t plen, it; + uint16_t vid; + char bytestr[16]; + char dbuf[128]; + +#ifndef NDA_RTA +#define NDA_RTA(ndm) \ + /* struct ndmsg *ndm; */ \ + ((struct rtattr *)(((uint8_t *)(ndm)) \ + + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#endif /* NDA_RTA */ + + /* Get the first attribute and go from there. */ + rta = NDA_RTA(ndm); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, neigh_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case NDA_LLADDR: + datap = RTA_DATA(rta); + dbuf[0] = 0; + for (it = 0; it < plen; it++) { + snprintf(bytestr, sizeof(bytestr), "%02X:", *datap); + strlcat(dbuf, bytestr, sizeof(dbuf)); + datap++; + } + /* Remove trailing ':'. */ + if (dbuf[0]) + dbuf[strlen(dbuf) - 1] = 0; + + zlog_debug(" %s", dbuf[0] ? dbuf : "<empty>"); + break; + + case NDA_DST: + switch (plen) { + case sizeof(struct in_addr): + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case sizeof(struct in6_addr): + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + case NDA_VLAN: + vid = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %d", vid); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlifa_dump(struct ifaddrmsg *ifa, size_t msglen) +{ + struct rtattr *rta; + size_t plen; + uint32_t u32v; + + /* Get the first attribute and go from there. */ + rta = IFA_RTA(ifa); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, ifa_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case IFA_UNSPEC: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + + case IFA_LABEL: + zlog_debug(" %s", (const char *)RTA_DATA(rta)); + break; + + case IFA_ADDRESS: + case IFA_LOCAL: + case IFA_BROADCAST: + switch (plen) { + case 4: + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case 16: + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + default: + break; + } + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +static void nlnh_dump(struct nhmsg *nhm, size_t msglen) +{ + struct rtattr *rta; + int ifindex; + size_t plen; + uint16_t u16v; + uint32_t u32v; + unsigned long count, i; + struct nexthop_grp *nhgrp; + + rta = RTM_NHA(nhm); +next_rta: + /* Check the header for valid length and for outbound access. */ + if (RTA_OK(rta, msglen) == 0) + return; + + plen = RTA_PAYLOAD(rta); + zlog_debug(" rta [len=%d (payload=%zu) type=(%d) %s]", rta->rta_len, + plen, rta->rta_type, nhm_rta2str(rta->rta_type)); + switch (rta->rta_type) { + case NHA_ID: + u32v = *(uint32_t *)RTA_DATA(rta); + zlog_debug(" %u", u32v); + break; + case NHA_GROUP: + nhgrp = (struct nexthop_grp *)RTA_DATA(rta); + count = (RTA_PAYLOAD(rta) / sizeof(*nhgrp)); + if (count == 0 + || (count * sizeof(*nhgrp)) != RTA_PAYLOAD(rta)) { + zlog_debug(" invalid nexthop group received"); + return; + } + + for (i = 0; i < count; i++) + zlog_debug(" id %d weight %d", nhgrp[i].id, + nhgrp[i].weight); + break; + case NHA_ENCAP_TYPE: + case NHA_GROUP_TYPE: + u16v = *(uint16_t *)RTA_DATA(rta); + zlog_debug(" %d", u16v); + break; + case NHA_BLACKHOLE: + /* NOTHING */ + break; + case NHA_OIF: + ifindex = *(int *)RTA_DATA(rta); + zlog_debug(" %d", ifindex); + break; + case NHA_GATEWAY: + switch (nhm->nh_family) { + case AF_INET: + zlog_debug(" %pI4", + (struct in_addr *)RTA_DATA(rta)); + break; + case AF_INET6: + zlog_debug(" %pI6", + (struct in6_addr *)RTA_DATA(rta)); + break; + + default: + zlog_debug(" invalid family %d", nhm->nh_family); + break; + } + break; + case NHA_ENCAP: + /* TODO: handle MPLS labels. */ + zlog_debug(" unparsed MPLS labels"); + break; + case NHA_GROUPS: + /* TODO: handle this message. */ + zlog_debug(" unparsed GROUPS message"); + break; + + default: + /* NOTHING: unhandled. */ + break; + } + + /* Get next pointer and start iteration again. */ + rta = RTA_NEXT(rta, msglen); + goto next_rta; +} + +void nl_dump(void *msg, size_t msglen) +{ + struct nlmsghdr *nlmsg = msg; + struct nlmsgerr *nlmsgerr; + struct rtgenmsg *rtgen; + struct ifaddrmsg *ifa; + struct ndmsg *ndm; + struct rtmsg *rtm; + struct nhmsg *nhm; + struct ifinfomsg *ifi; + char fbuf[128]; + char ibuf[128]; + +next_header: + zlog_debug( + "nlmsghdr [len=%u type=(%d) %s flags=(0x%04x) {%s} seq=%u pid=%u]", + nlmsg->nlmsg_len, nlmsg->nlmsg_type, + nlmsg_type2str(nlmsg->nlmsg_type), nlmsg->nlmsg_flags, + nlmsg_flags2str(nlmsg->nlmsg_flags, fbuf, sizeof(fbuf)), + nlmsg->nlmsg_seq, nlmsg->nlmsg_pid); + + switch (nlmsg->nlmsg_type) { + /* Generic. */ + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + nlmsgerr = NLMSG_DATA(nlmsg); + zlog_debug(" nlmsgerr [error=(%d) %s]", nlmsgerr->error, + strerror(-nlmsgerr->error)); + break; + case NLMSG_DONE: + return; + case NLMSG_OVERRUN: + break; + + /* RTM. */ + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_SETLINK: + ifi = NLMSG_DATA(nlmsg); + zlog_debug( + " ifinfomsg [family=%d type=(%d) %s " + "index=%d flags=0x%04x {%s}]", + ifi->ifi_family, ifi->ifi_type, + ifi_type2str(ifi->ifi_type), ifi->ifi_index, + ifi->ifi_flags, + if_flags2str(ifi->ifi_flags, ibuf, sizeof(ibuf))); + nllink_dump(ifi, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_GETLINK: + rtgen = NLMSG_DATA(nlmsg); + zlog_debug(" rtgen [family=(%d) %s]", rtgen->rtgen_family, + af_type2str(rtgen->rtgen_family)); + break; + + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + rtm = NLMSG_DATA(nlmsg); + zlog_debug( + " rtmsg [family=(%d) %s dstlen=%d srclen=%d tos=%d " + "table=%d protocol=(%d) %s scope=(%d) %s " + "type=(%d) %s flags=0x%04x {%s}]", + rtm->rtm_family, af_type2str(rtm->rtm_family), + rtm->rtm_dst_len, rtm->rtm_src_len, rtm->rtm_tos, + rtm->rtm_table, rtm->rtm_protocol, + rtm_protocol2str(rtm->rtm_protocol), rtm->rtm_scope, + rtm_scope2str(rtm->rtm_scope), rtm->rtm_type, + rtm_type2str(rtm->rtm_type), rtm->rtm_flags, + rtm_flags2str(rtm->rtm_flags, fbuf, sizeof(fbuf))); + nlroute_dump(rtm, + nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); + break; + + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + ndm = NLMSG_DATA(nlmsg); + zlog_debug( + " ndm [family=%d (%s) ifindex=%d state=0x%04x {%s} " + "flags=0x%04x {%s} type=%d (%s)]", + ndm->ndm_family, af_type2str(ndm->ndm_family), + ndm->ndm_ifindex, ndm->ndm_state, + neigh_state2str(ndm->ndm_state, ibuf, sizeof(ibuf)), + ndm->ndm_flags, + neigh_flags2str(ndm->ndm_flags, fbuf, sizeof(fbuf)), + ndm->ndm_type, rtm_type2str(ndm->ndm_type)); + nlneigh_dump(ndm, + nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ndm))); + break; + + case RTM_NEWADDR: + case RTM_DELADDR: + ifa = NLMSG_DATA(nlmsg); + zlog_debug( + " ifa [family=(%d) %s prefixlen=%d " + "flags=0x%04x {%s} scope=%d index=%u]", + ifa->ifa_family, af_type2str(ifa->ifa_family), + ifa->ifa_prefixlen, ifa->ifa_flags, + if_flags2str(ifa->ifa_flags, fbuf, sizeof(fbuf)), + ifa->ifa_scope, ifa->ifa_index); + nlifa_dump(ifa, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + break; + + case RTM_NEWNEXTHOP: + case RTM_DELNEXTHOP: + case RTM_GETNEXTHOP: + nhm = NLMSG_DATA(nlmsg); + zlog_debug( + " nhm [family=(%d) %s scope=(%d) %s " + "protocol=(%d) %s flags=0x%08x {%s}]", + nhm->nh_family, af_type2str(nhm->nh_family), + nhm->nh_scope, rtm_scope2str(nhm->nh_scope), + nhm->nh_protocol, rtm_protocol2str(nhm->nh_protocol), + nhm->nh_flags, + nh_flags2str(nhm->nh_flags, fbuf, sizeof(fbuf))); + nlnh_dump(nhm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*nhm))); + break; + + default: + break; + } + + /* + * Try to get the next header. There should only be more + * messages if this header was flagged as MULTI, otherwise just + * end it here. + */ + nlmsg = NLMSG_NEXT(nlmsg, msglen); + if (NLMSG_OK(nlmsg, msglen) == 0) + return; + + goto next_header; +} + +#endif /* NETLINK_DEBUG */ diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index bd9966c801..51ce59c477 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -75,9 +75,6 @@ struct fpm_nl_ctx { int socket; bool disabled; bool connecting; - bool nhg_complete; - bool rib_complete; - bool rmac_complete; bool use_nhg; struct sockaddr_storage addr; @@ -377,7 +374,6 @@ static int fpm_write_config(struct vty *vty) struct sockaddr_in *sin; struct sockaddr_in6 *sin6; int written = 0; - char addrstr[INET6_ADDRSTRLEN]; if (gfnc->disabled) return written; @@ -386,8 +382,7 @@ static int fpm_write_config(struct vty *vty) case AF_INET: written = 1; sin = (struct sockaddr_in *)&gfnc->addr; - inet_ntop(AF_INET, &sin->sin_addr, addrstr, sizeof(addrstr)); - vty_out(vty, "fpm address %s", addrstr); + vty_out(vty, "fpm address %pI4", &sin->sin_addr); if (sin->sin_port != htons(SOUTHBOUND_DEFAULT_PORT)) vty_out(vty, " port %d", ntohs(sin->sin_port)); @@ -396,8 +391,7 @@ static int fpm_write_config(struct vty *vty) case AF_INET6: written = 1; sin6 = (struct sockaddr_in6 *)&gfnc->addr; - inet_ntop(AF_INET, &sin6->sin6_addr, addrstr, sizeof(addrstr)); - vty_out(vty, "fpm address %s", addrstr); + vty_out(vty, "fpm address %pI6", &sin6->sin6_addr); if (sin6->sin6_port != htons(SOUTHBOUND_DEFAULT_PORT)) vty_out(vty, " port %d", ntohs(sin6->sin6_port)); @@ -542,6 +536,13 @@ static int fpm_write(struct thread *t) fnc->connecting = false; + /* + * Starting with LSPs walk all FPM objects, marking them + * as unsent and then replaying them. + */ + thread_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); + /* Permit receiving messages now. */ thread_add_read(fnc->fthread->master, fpm_read, fnc, fnc->socket, &fnc->t_read); @@ -664,9 +665,12 @@ static int fpm_connect(struct thread *t) /* * Starting with LSPs walk all FPM objects, marking them * as unsent and then replaying them. + * + * If we are not connected, then delay the objects reset/send. */ - thread_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, - &fnc->t_lspreset); + if (!fnc->connecting) + thread_add_timer(zrouter.master, fpm_lsp_reset, fnc, 0, + &fnc->t_lspreset); return 0; } @@ -908,12 +912,8 @@ static int fpm_lsp_send(struct thread *t) WALK_FINISH(fnc, FNE_LSP_FINISHED); /* Now move onto routes */ - if (fnc->use_nhg) - thread_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0, - &fnc->t_nhgreset); - else - thread_add_timer(zrouter.master, fpm_rib_reset, fnc, 0, - &fnc->t_ribreset); + thread_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0, + &fnc->t_nhgreset); } else { /* Didn't finish - reschedule LSP walk */ thread_add_timer(zrouter.master, fpm_lsp_send, fnc, 0, @@ -966,7 +966,8 @@ static int fpm_nhg_send(struct thread *t) fna.complete = true; /* Send next hops. */ - hash_walk(zrouter.nhgs_id, fpm_nhg_send_cb, &fna); + if (fnc->use_nhg) + hash_walk(zrouter.nhgs_id, fpm_nhg_send_cb, &fna); /* `free()` allocated memory. */ dplane_ctx_fini(&fna.ctx); @@ -1124,7 +1125,6 @@ static int fpm_nhg_reset(struct thread *t) { struct fpm_nl_ctx *fnc = THREAD_ARG(t); - fnc->nhg_complete = false; hash_iterate(zrouter.nhgs_id, fpm_nhg_reset_cb, NULL); /* Schedule next step: send next hop groups. */ @@ -1167,8 +1167,6 @@ static int fpm_rib_reset(struct thread *t) struct route_table *rt; rib_tables_iter_t rt_iter; - fnc->rib_complete = false; - rt_iter.state = RIB_TABLES_ITER_S_INIT; while ((rt = rib_tables_iter_next(&rt_iter))) { for (rn = route_top(rt); rn; rn = srcdest_route_next(rn)) { @@ -1208,7 +1206,6 @@ static int fpm_rmac_reset(struct thread *t) { struct fpm_nl_ctx *fnc = THREAD_ARG(t); - fnc->rmac_complete = false; hash_iterate(zrouter.l3vni_table, fpm_unset_l3vni_table, NULL); /* Schedule next event: send RMAC entries. */ @@ -1222,24 +1219,27 @@ static int fpm_process_queue(struct thread *t) { struct fpm_nl_ctx *fnc = THREAD_ARG(t); struct zebra_dplane_ctx *ctx; - - frr_mutex_lock_autounlock(&fnc->ctxqueue_mutex); + bool no_bufs = false; + uint64_t processed_contexts = 0; while (true) { /* No space available yet. */ - if (STREAM_WRITEABLE(fnc->obuf) < NL_PKT_BUF_SIZE) + if (STREAM_WRITEABLE(fnc->obuf) < NL_PKT_BUF_SIZE) { + no_bufs = true; break; + } /* Dequeue next item or quit processing. */ - ctx = dplane_ctx_dequeue(&fnc->ctxqueue); + frr_with_mutex (&fnc->ctxqueue_mutex) { + ctx = dplane_ctx_dequeue(&fnc->ctxqueue); + } if (ctx == NULL) break; fpm_nl_enqueue(fnc, ctx); /* Account the processed entries. */ - atomic_fetch_add_explicit(&fnc->counters.dplane_contexts, 1, - memory_order_relaxed); + processed_contexts++; atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1, memory_order_relaxed); @@ -1247,13 +1247,24 @@ static int fpm_process_queue(struct thread *t) dplane_provider_enqueue_out_ctx(fnc->prov, ctx); } - /* Check for more items in the queue. */ - if (atomic_load_explicit(&fnc->counters.ctxqueue_len, - memory_order_relaxed) - > 0) + /* Update count of processed contexts */ + atomic_fetch_add_explicit(&fnc->counters.dplane_contexts, + processed_contexts, memory_order_relaxed); + + /* Re-schedule if we ran out of buffer space */ + if (no_bufs) thread_add_timer(fnc->fthread->master, fpm_process_queue, fnc, 0, &fnc->t_dequeue); + /* + * Let the dataplane thread know if there are items in the + * output queue to be processed. Otherwise they may sit + * until the dataplane thread gets scheduled for new, + * unrelated work. + */ + if (dplane_provider_out_ctx_queue_len(fnc->prov) > 0) + dplane_provider_work_ready(); + return 0; } @@ -1303,20 +1314,14 @@ static int fpm_process_event(struct thread *t) if (IS_ZEBRA_DEBUG_FPM) zlog_debug("%s: next hop groups walk finished", __func__); - - fnc->nhg_complete = true; break; case FNE_RIB_FINISHED: if (IS_ZEBRA_DEBUG_FPM) zlog_debug("%s: RIB walk finished", __func__); - - fnc->rib_complete = true; break; case FNE_RMAC_FINISHED: if (IS_ZEBRA_DEBUG_FPM) zlog_debug("%s: RMAC walk finished", __func__); - - fnc->rmac_complete = true; break; case FNE_LSP_FINISHED: if (IS_ZEBRA_DEBUG_FPM) @@ -1412,7 +1417,7 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) struct zebra_dplane_ctx *ctx; struct fpm_nl_ctx *fnc; int counter, limit; - uint64_t cur_queue, peak_queue; + uint64_t cur_queue, peak_queue = 0, stored_peak_queue; fnc = dplane_provider_get_data(prov); limit = dplane_provider_get_work_limit(prov); @@ -1426,22 +1431,22 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) * anyway. */ if (fnc->socket != -1 && fnc->connecting == false) { - frr_mutex_lock_autounlock(&fnc->ctxqueue_mutex); - dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx); - - /* Account the number of contexts. */ + /* + * Update the number of queued contexts *before* + * enqueueing, to ensure counter consistency. + */ atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len, 1, memory_order_relaxed); + + frr_with_mutex (&fnc->ctxqueue_mutex) { + dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx); + } + cur_queue = atomic_load_explicit( &fnc->counters.ctxqueue_len, memory_order_relaxed); - peak_queue = atomic_load_explicit( - &fnc->counters.ctxqueue_len_peak, - memory_order_relaxed); if (peak_queue < cur_queue) - atomic_store_explicit( - &fnc->counters.ctxqueue_len_peak, - peak_queue, memory_order_relaxed); + peak_queue = cur_queue; continue; } @@ -1449,12 +1454,23 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov) dplane_provider_enqueue_out_ctx(prov, ctx); } + /* Update peak queue length, if we just observed a new peak */ + stored_peak_queue = atomic_load_explicit( + &fnc->counters.ctxqueue_len_peak, memory_order_relaxed); + if (stored_peak_queue < peak_queue) + atomic_store_explicit(&fnc->counters.ctxqueue_len_peak, + peak_queue, memory_order_relaxed); + if (atomic_load_explicit(&fnc->counters.ctxqueue_len, memory_order_relaxed) > 0) thread_add_timer(fnc->fthread->master, fpm_process_queue, fnc, 0, &fnc->t_dequeue); + /* Ensure dataplane thread is rescheduled if we hit the work limit */ + if (counter >= limit) + dplane_provider_work_ready(); + return 0; } diff --git a/zebra/if_netlink.c b/zebra/if_netlink.c index a68873882d..e4dd745f42 100644 --- a/zebra/if_netlink.c +++ b/zebra/if_netlink.c @@ -691,7 +691,7 @@ static int netlink_bridge_interface(struct nlmsghdr *h, int len, ns_id_t ns_id, return 0; } -/* If the interface is and es bond member then it must follow EVPN's +/* If the interface is an es bond member then it must follow EVPN's * protodown setting */ static void netlink_proc_dplane_if_protodown(struct zebra_if *zif, diff --git a/zebra/interface.c b/zebra/interface.c index ddad9c9e56..4072eb1568 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -1428,6 +1428,14 @@ const char *zebra_protodown_rc_str(enum protodown_reasons protodown_rc, return pd_buf; } +static inline bool if_is_protodown_applicable(struct interface *ifp) +{ + if (IS_ZEBRA_IF_BOND(ifp)) + return false; + + return true; +} + /* Interface's information print out to vty interface. */ static void if_dump_vty(struct vty *vty, struct interface *ifp) { @@ -1592,14 +1600,13 @@ static void if_dump_vty(struct vty *vty, struct interface *ifp) } zebra_evpn_if_es_print(vty, zebra_if); - vty_out(vty, " protodown: %s", - (zebra_if->flags & ZIF_FLAG_PROTODOWN) ? "on" : "off"); + vty_out(vty, " protodown: %s %s\n", + (zebra_if->flags & ZIF_FLAG_PROTODOWN) ? "on" : "off", + if_is_protodown_applicable(ifp) ? "" : "(n/a)"); if (zebra_if->protodown_rc) - vty_out(vty, " rc: %s\n", + vty_out(vty, " protodown reasons: %s\n", zebra_protodown_rc_str(zebra_if->protodown_rc, pd_buf, sizeof(pd_buf))); - else - vty_out(vty, "\n"); if (zebra_if->link_ifindex != IFINDEX_INTERNAL) { if (zebra_if->link) diff --git a/zebra/interface.h b/zebra/interface.h index ab1a245e5e..8dcb477f10 100644 --- a/zebra/interface.h +++ b/zebra/interface.h @@ -279,8 +279,12 @@ struct irdp_interface; /* Ethernet segment info used for setting up EVPN multihoming */ struct zebra_evpn_es; struct zebra_es_if_info { + /* type-3 esi config */ struct ethaddr sysmac; uint32_t lid; /* local-id; has to be unique per-ES-sysmac */ + + esi_t esi; + uint16_t df_pref; struct zebra_evpn_es *es; /* local ES */ }; diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 76da00c619..5d64f57b3e 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -712,7 +712,11 @@ static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf, if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) { zlog_debug("%s: >> netlink message dump [sent]", __func__); +#ifdef NETLINK_DEBUG + nl_dump(buf, buflen); +#else zlog_hexdump(buf, buflen); +#endif /* NETLINK_DEBUG */ } if (status == -1) { @@ -770,7 +774,11 @@ static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg, if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) { zlog_debug("%s: << netlink message dump [recv]", __func__); +#ifdef NETLINK_DEBUG + nl_dump(buf, status); +#else zlog_hexdump(buf, status); +#endif /* NETLINK_DEBUG */ } return status; diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 9d74aeca28..adbdf54c1f 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -1107,7 +1107,7 @@ void rtm_read(struct rt_msghdr *rtm) if (rtm->rtm_type == RTM_CHANGE) rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0, zebra_flags, &p, NULL, NULL, 0, RT_TABLE_MAIN, 0, - 0, true, false); + 0, true); if (rtm->rtm_type == RTM_GET || rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0, @@ -1116,7 +1116,7 @@ void rtm_read(struct rt_msghdr *rtm) else rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0, zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0, - 0, true, false); + 0, true); } /* Interface function for the kernel routing table updates. Support diff --git a/zebra/label_manager.c b/zebra/label_manager.c index d312a661f3..2634a333ee 100644 --- a/zebra/label_manager.c +++ b/zebra/label_manager.c @@ -175,11 +175,9 @@ void label_manager_init(void) } /* alloc and fill a label chunk */ -struct label_manager_chunk *create_label_chunk(uint8_t proto, - unsigned short instance, - uint32_t session_id, - uint8_t keep, uint32_t start, - uint32_t end) +struct label_manager_chunk * +create_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t start, uint32_t end) { /* alloc chunk, fill it and return it */ struct label_manager_chunk *lmc = @@ -302,15 +300,13 @@ assign_specific_label_chunk(uint8_t proto, unsigned short instance, * @param base Desired starting label of the chunk; if MPLS_LABEL_BASE_ANY it does not apply * @return Pointer to the assigned label chunk, or NULL if the request could not be satisfied */ -struct label_manager_chunk *assign_label_chunk(uint8_t proto, - unsigned short instance, - uint32_t session_id, - uint8_t keep, uint32_t size, - uint32_t base) +struct label_manager_chunk * +assign_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t size, uint32_t base) { struct label_manager_chunk *lmc; struct listnode *node; - uint32_t prev_end = 0; + uint32_t prev_end = MPLS_LABEL_UNRESERVED_MIN; /* handle chunks request with a specific base label */ if (base != MPLS_LABEL_BASE_ANY) @@ -332,8 +328,7 @@ struct label_manager_chunk *assign_label_chunk(uint8_t proto, } /* check if we hadve a "hole" behind us that we can squeeze into */ - if ((lmc->start > prev_end) - && (lmc->start - prev_end >= size)) { + if ((lmc->start > prev_end) && (lmc->start - prev_end > size)) { lmc = create_label_chunk(proto, instance, session_id, keep, prev_end + 1, prev_end + size); @@ -390,6 +385,7 @@ static int label_manager_release_label_chunk(struct zserv *client, * * @param proto Daemon protocol of client, to identify the owner * @param instance Instance, to identify the owner + * @param session_id Zclient session ID, to identify the zclient session * @param start First label of the chunk * @param end Last label of the chunk * @return 0 on success, -1 otherwise diff --git a/zebra/label_manager.h b/zebra/label_manager.h index 82154982c2..8636c79219 100644 --- a/zebra/label_manager.h +++ b/zebra/label_manager.h @@ -95,11 +95,9 @@ int lm_get_chunk_response(struct label_manager_chunk *lmc, struct zserv *client, vrf_id_t vrf_id); /* convenience function to allocate an lmc to be consumed by the above API */ -struct label_manager_chunk *create_label_chunk(uint8_t proto, - unsigned short instance, - uint32_t session_id, - uint8_t keep, uint32_t start, - uint32_t end); +struct label_manager_chunk * +create_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t start, uint32_t end); void delete_label_chunk(void *val); /* register/unregister callbacks for hooks */ @@ -115,11 +113,9 @@ struct label_manager { }; void label_manager_init(void); -struct label_manager_chunk *assign_label_chunk(uint8_t proto, - unsigned short instance, - uint32_t session_id, - uint8_t keep, uint32_t size, - uint32_t base); +struct label_manager_chunk * +assign_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, + uint8_t keep, uint32_t size, uint32_t base); int release_label_chunk(uint8_t proto, unsigned short instance, uint32_t session_id, uint32_t start, uint32_t end); int lm_client_disconnect_cb(struct zserv *client); diff --git a/zebra/redistribute.c b/zebra/redistribute.c index 1f075cfb4b..370dbaa240 100644 --- a/zebra/redistribute.c +++ b/zebra/redistribute.c @@ -715,7 +715,7 @@ int zebra_del_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn, rib_delete(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_TABLE, re->table, re->flags, &p, NULL, re->nhe->nhg.nexthop, re->nhe_id, zvrf->table_id, re->metric, re->distance, - false, false); + false); return 0; } diff --git a/zebra/rib.h b/zebra/rib.h index 3bce62bfa8..d653425f0d 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -84,6 +84,11 @@ struct rnh { PREDECL_LIST(re_list) +struct opaque { + uint16_t length; + uint8_t data[]; +}; + struct route_entry { /* Link list. */ struct re_list_item next; @@ -157,6 +162,8 @@ struct route_entry { /* Distance. */ uint8_t distance; + + struct opaque *opaque; }; #define RIB_SYSTEM_ROUTE(R) RSYSTEM_ROUTE((R)->type) @@ -336,8 +343,8 @@ int route_entry_update_nhe(struct route_entry *re, struct nhg_hash_entry *new_nhghe); /* NHG replace has happend, we have to update route_entry pointers to new one */ -void rib_handle_nhg_replace(struct nhg_hash_entry *old, - struct nhg_hash_entry *new); +void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry); #define route_entry_dump(prefix, src, re) _route_entry_dump(__func__, prefix, src, re) extern void _route_entry_dump(const char *func, union prefixconstptr pp, @@ -386,7 +393,7 @@ extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint8_t distance, - bool fromkernel, bool connected_down); + bool fromkernel); extern struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, union g_addr *addr, diff --git a/zebra/router-id.c b/zebra/router-id.c index 7af60a389b..ac21978ee8 100644 --- a/zebra/router-id.c +++ b/zebra/router-id.c @@ -522,7 +522,8 @@ DEFUN (show_ip_router_id, inet_ntop(AF_INET6, &zvrf->rid6_user_assigned.u.prefix6, addr_name, sizeof(addr_name)); } else { - if (zvrf->rid_user_assigned.u.prefix4.s_addr == 0) + if (zvrf->rid_user_assigned.u.prefix4.s_addr + == INADDR_ANY) return CMD_SUCCESS; inet_ntop(AF_INET, &zvrf->rid_user_assigned.u.prefix4, addr_name, sizeof(addr_name)); diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index f59fbae3af..547700d0c5 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -266,6 +266,10 @@ static inline int zebra2proto(int proto) case ZEBRA_ROUTE_NHG: proto = RTPROT_ZEBRA; break; + case ZEBRA_ROUTE_CONNECT: + case ZEBRA_ROUTE_KERNEL: + proto = RTPROT_KERNEL; + break; default: /* * When a user adds a new protocol this will show up @@ -869,7 +873,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, if (nhe_id) { rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p, &src_p, NULL, nhe_id, table, metric, - distance, true, false); + distance, true); } else { if (!tb[RTA_MULTIPATH]) { struct nexthop nh; @@ -879,13 +883,13 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id, gate, afi, vrf_id); rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p, &src_p, &nh, 0, table, - metric, distance, true, false); + metric, distance, true); } else { /* XXX: need to compare the entire list of * nexthops here for NLM_F_APPEND stupidity */ rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p, &src_p, NULL, 0, table, - metric, distance, true, false); + metric, distance, true); } } } @@ -3300,6 +3304,8 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) bool is_ext; bool is_router; bool local_inactive; + uint32_t ext_flags = 0; + bool dp_static = false; ndm = NLMSG_DATA(h); @@ -3391,9 +3397,15 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED); is_router = !!(ndm->ndm_flags & NTF_ROUTER); + if (tb[NDA_EXT_FLAGS]) { + ext_flags = *(uint32_t *)RTA_DATA(tb[NDA_EXT_FLAGS]); + if (ext_flags & NTF_E_MH_PEER_SYNC) + dp_static = true; + } + if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug( - "Rx %s family %s IF %s(%u) vrf %s(%u) IP %s MAC %s state 0x%x flags 0x%x", + "Rx %s family %s IF %s(%u) vrf %s(%u) IP %s MAC %s state 0x%x flags 0x%x ext_flags 0x%x", nl_msg_type_to_str(h->nlmsg_type), nl_family_to_str(ndm->ndm_family), ifp->name, ndm->ndm_ifindex, VRF_LOGNAME(vrf), ifp->vrf_id, @@ -3401,7 +3413,7 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) mac_present ? prefix_mac2str(&mac, buf, sizeof(buf)) : "", - ndm->ndm_state, ndm->ndm_flags); + ndm->ndm_state, ndm->ndm_flags, ext_flags); /* If the neighbor state is valid for use, process as an add or * update @@ -3410,15 +3422,19 @@ static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id) * in re-adding the neighbor if it is a valid "remote" neighbor. */ if (ndm->ndm_state & NUD_VALID) { - local_inactive = !(ndm->ndm_state & NUD_LOCAL_ACTIVE); + if (zebra_evpn_mh_do_adv_reachable_neigh_only()) + local_inactive = + !(ndm->ndm_state & NUD_LOCAL_ACTIVE); + else + /* If EVPN-MH is not enabled we treat STALE + * neighbors as locally-active and advertise + * them + */ + local_inactive = false; - /* XXX - populate dp-static based on the sync flags - * in the kernel - */ return zebra_vxlan_handle_kernel_neigh_update( - ifp, link_if, &ip, &mac, ndm->ndm_state, - is_ext, is_router, local_inactive, - false /* dp_static */); + ifp, link_if, &ip, &mac, ndm->ndm_state, is_ext, + is_router, local_inactive, dp_static); } return zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip); @@ -3689,12 +3705,12 @@ static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx, char buf2[ETHER_ADDR_STRLEN]; zlog_debug( - "Tx %s family %s IF %s(%u) Neigh %s MAC %s flags 0x%x state 0x%x", + "Tx %s family %s IF %s(%u) Neigh %s MAC %s flags 0x%x state 0x%x %sext_flags 0x%x", nl_msg_type_to_str(cmd), nl_family_to_str(family), dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx), ipaddr2str(ip, buf, sizeof(buf)), mac ? prefix_mac2str(mac, buf2, sizeof(buf2)) : "null", - flags, state); + flags, state, ext ? "ext " : "", ext_flags); } return netlink_neigh_update_msg_encode( diff --git a/zebra/rt_netlink.h b/zebra/rt_netlink.h index 9ffb50983d..4e41ff984b 100644 --- a/zebra/rt_netlink.h +++ b/zebra/rt_netlink.h @@ -121,6 +121,29 @@ netlink_put_lsp_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); extern enum netlink_msg_status netlink_put_pw_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); +#ifdef NETLINK_DEBUG +const char *nlmsg_type2str(uint16_t type); +const char *af_type2str(int type); +const char *ifi_type2str(int type); +const char *rta_type2str(int type); +const char *rtm_type2str(int type); +const char *rtm_protocol2str(int type); +const char *rtm_scope2str(int type); +const char *rtm_rta2str(int type); +const char *neigh_rta2str(int type); +const char *ifa_rta2str(int type); +const char *nhm_rta2str(int type); +const char *nlmsg_flags2str(uint16_t flags, char *buf, size_t buflen); +const char *if_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *rtm_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *neigh_state2str(uint32_t flags, char *buf, size_t buflen); +const char *neigh_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *ifa_flags2str(uint32_t flags, char *buf, size_t buflen); +const char *nh_flags2str(uint32_t flags, char *buf, size_t buflen); + +void nl_dump(void *msg, size_t msglen); +#endif /* NETLINK_DEBUG */ + #ifdef __cplusplus } #endif diff --git a/zebra/rule_netlink.c b/zebra/rule_netlink.c index a63504992e..08a675ef3a 100644 --- a/zebra/rule_netlink.c +++ b/zebra/rule_netlink.c @@ -79,7 +79,15 @@ netlink_rule_msg_encode(int cmd, const struct zebra_dplane_ctx *ctx, if (buflen < sizeof(*req)) return 0; memset(req, 0, sizeof(*req)); - family = PREFIX_FAMILY(src_ip); + + /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */ + if (PREFIX_FAMILY(src_ip)) + family = PREFIX_FAMILY(src_ip); + else if (PREFIX_FAMILY(dst_ip)) + family = PREFIX_FAMILY(dst_ip); + else + family = AF_INET; + bytelen = (family == AF_INET ? 4 : 16); req->n.nlmsg_type = cmd; diff --git a/zebra/sample_plugin.c b/zebra/sample_plugin.c index c96a86cc73..464205f2f3 100644 --- a/zebra/sample_plugin.c +++ b/zebra/sample_plugin.c @@ -92,7 +92,6 @@ static int sample_process(struct zebra_dplane_provider *prov) static int init_sample_plugin(struct thread_master *tm) { int ret; - struct zebra_dplane_provider *prov = NULL; /* Note that we don't use or store the thread_master 'tm'. We * don't use the zebra main pthread: our plugin code will run in diff --git a/zebra/subdir.am b/zebra/subdir.am index 4533d6bafc..f842a8c0f3 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -40,6 +40,11 @@ if LINUX module_LTLIBRARIES += zebra/zebra_cumulus_mlag.la endif +# Dataplane sample plugin +if DEV_BUILD +module_LTLIBRARIES += zebra/dplane_sample_plugin.la +endif + man8 += $(MANBUILD)/frr-zebra.8 ## endif ZEBRA endif @@ -206,6 +211,12 @@ zebra/zebra_fpm_dt.lo: fpm/fpm.pb-c.h qpb/qpb.pb-c.h endif endif +# Sample dataplane plugin +if DEV_BUILD +zebra_dplane_sample_plugin_la_SOURCES = zebra/sample_plugin.c +zebra_dplane_sample_plugin_la_LDFLAGS = -module -shared -avoid-version -export-dynamic +endif + nodist_zebra_zebra_SOURCES = \ yang/frr-zebra.yang.c \ # end @@ -222,3 +233,9 @@ zebra_dplane_fpm_nl_la_LIBADD = vtysh_scan += $(top_srcdir)/zebra/dplane_fpm_nl.c endif + +if NETLINK_DEBUG +zebra_zebra_SOURCES += \ + zebra/debug_nl.c \ + # end +endif diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index f7c123231e..90c6a24e7b 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -995,7 +995,6 @@ int zsend_pw_update(struct zserv *client, struct zebra_pw *pw) int zsend_assign_label_chunk_response(struct zserv *client, vrf_id_t vrf_id, struct label_manager_chunk *lmc) { - int ret; struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); zclient_create_header(s, ZEBRA_GET_LABEL_CHUNK, vrf_id); @@ -1015,16 +1014,13 @@ int zsend_assign_label_chunk_response(struct zserv *client, vrf_id_t vrf_id, /* Write packet size. */ stream_putw_at(s, 0, stream_get_endp(s)); - ret = writen(client->sock, s->data, stream_get_endp(s)); - stream_free(s); - return ret; + return zserv_send_message(client, s); } /* Send response to a label manager connect request to client */ int zsend_label_manager_connect_response(struct zserv *client, vrf_id_t vrf_id, unsigned short result) { - int ret; struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); zclient_create_header(s, ZEBRA_LABEL_MANAGER_CONNECT, vrf_id); @@ -1041,10 +1037,7 @@ int zsend_label_manager_connect_response(struct zserv *client, vrf_id_t vrf_id, /* Write packet size. */ stream_putw_at(s, 0, stream_get_endp(s)); - ret = writen(client->sock, s->data, stream_get_endp(s)); - stream_free(s); - - return ret; + return zserv_send_message(client, s); } /* Send response to a get table chunk request to client */ @@ -1980,6 +1973,13 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) if (CHECK_FLAG(api.message, ZAPI_MESSAGE_MTU)) re->mtu = api.mtu; + if (CHECK_FLAG(api.message, ZAPI_MESSAGE_OPAQUE)) { + re->opaque = XMALLOC(MTYPE_OPAQUE, + sizeof(struct opaque) + api.opaque.length); + re->opaque->length = api.opaque.length; + memcpy(re->opaque->data, api.opaque.data, re->opaque->length); + } + afi = family2afi(api.prefix.family); if (afi != AFI_IP6 && CHECK_FLAG(api.message, ZAPI_MESSAGE_SRCPFX)) { flog_warn(EC_ZEBRA_RX_SRCDEST_WRONG_AFI, @@ -2080,7 +2080,7 @@ static void zread_route_del(ZAPI_HANDLER_ARGS) rib_delete(afi, api.safi, zvrf_id(zvrf), api.type, api.instance, api.flags, &api.prefix, src_p, NULL, 0, table_id, api.metric, - api.distance, false, false); + api.distance, false); /* Stats */ switch (api.prefix.family) { @@ -2522,6 +2522,22 @@ int zsend_sr_policy_notify_status(uint32_t color, struct ipaddr *endpoint, return zserv_send_message(client, s); } +/* Send client close notify to client */ +int zsend_client_close_notify(struct zserv *client, struct zserv *closed_client) +{ + struct stream *s = stream_new(ZEBRA_MAX_PACKET_SIZ); + + zclient_create_header(s, ZEBRA_CLIENT_CLOSE_NOTIFY, VRF_DEFAULT); + + stream_putc(s, closed_client->proto); + stream_putw(s, closed_client->instance); + stream_putl(s, closed_client->session_id); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + /* Send response to a table manager connect request to client */ static void zread_table_manager_connect(struct zserv *client, struct stream *msg, vrf_id_t vrf_id) diff --git a/zebra/zapi_msg.h b/zebra/zapi_msg.h index efc52059b6..9822d72022 100644 --- a/zebra/zapi_msg.h +++ b/zebra/zapi_msg.h @@ -105,6 +105,9 @@ extern int zsend_sr_policy_notify_status(uint32_t color, struct ipaddr *endpoint, char *name, int status); +extern int zsend_client_close_notify(struct zserv *client, + struct zserv *closed_client); + #ifdef __cplusplus } #endif diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 41ff73d77e..db2b9e002e 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -2014,16 +2014,6 @@ int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop)) { UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); - /* Check for available encapsulations. */ - if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) - continue; - - zl3vni = zl3vni_from_vrf(nexthop->vrf_id); - if (zl3vni && is_l3vni_oper_up(zl3vni)) { - nexthop->nh_encap_type = NET_VXLAN; - nexthop->nh_encap.vni = zl3vni->vni; - } - /* Optionally capture extra interface info while we're in the * main zebra pthread - a plugin has to ask for this info. */ @@ -2044,6 +2034,16 @@ int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, if_extra, link); } } + + /* Check for available evpn encapsulations. */ + if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) + continue; + + zl3vni = zl3vni_from_vrf(nexthop->vrf_id); + if (zl3vni && is_l3vni_oper_up(zl3vni)) { + nexthop->nh_encap_type = NET_VXLAN; + nexthop->nh_encap.vni = zl3vni->vni; + } } /* Don't need some info when capturing a system notification */ @@ -3903,6 +3903,12 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, return ret; } +uint32_t dplane_provider_out_ctx_queue_len(struct zebra_dplane_provider *prov) +{ + return atomic_load_explicit(&(prov->dp_out_counter), + memory_order_relaxed); +} + /* * Enqueue and maintain associated counter */ @@ -4547,6 +4553,7 @@ static int dplane_thread_loop(struct thread *event) struct zebra_dplane_ctx *ctx, *tctx; int limit, counter, error_counter; uint64_t curr, high; + bool reschedule = false; /* Capture work limit per cycle */ limit = zdplane_info.dg_updates_per_cycle; @@ -4683,6 +4690,9 @@ static int dplane_thread_loop(struct thread *event) dplane_provider_unlock(prov); + if (counter >= limit) + reschedule = true; + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug("dplane dequeues %d completed work from provider %s", counter, dplane_provider_get_name(prov)); @@ -4693,6 +4703,13 @@ static int dplane_thread_loop(struct thread *event) DPLANE_UNLOCK(); } + /* + * We hit the work limit while processing at least one provider's + * output queue - ensure we come back and finish it. + */ + if (reschedule) + dplane_provider_work_ready(); + /* After all providers have been serviced, enqueue any completed * work and any errors back to zebra so it can process the results. */ diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 3b4f049068..595d3fe562 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -763,6 +763,9 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, struct dplane_ctx_q *listp); +/* Current completed work queue length */ +uint32_t dplane_provider_out_ctx_queue_len(struct zebra_dplane_provider *prov); + /* Enqueue completed work, maintain associated counter and locking */ void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, struct zebra_dplane_ctx *ctx); diff --git a/zebra/zebra_evpn.c b/zebra/zebra_evpn.c index 67df841b21..b232c664bc 100644 --- a/zebra/zebra_evpn.c +++ b/zebra/zebra_evpn.c @@ -1046,6 +1046,9 @@ int zebra_evpn_del(zebra_evpn_t *zevpn) hash_free(zevpn->mac_table); zevpn->mac_table = NULL; + /* Remove references to the zevpn in the MH databases */ + if (zevpn->vxlan_if) + zebra_evpn_vxl_evpn_set(zevpn->vxlan_if->info, zevpn, false); zebra_evpn_es_evi_cleanup(zevpn); /* Free the EVPN hash entry and allocated memory. */ @@ -1333,7 +1336,8 @@ zebra_evpn_process_sync_macip_add(zebra_evpn_t *zevpn, struct ethaddr *macaddr, if (ipa_len) { n = zebra_evpn_neigh_lookup(zevpn, ipaddr); if (n - && !zebra_evpn_neigh_is_bgp_seq_ok(zevpn, n, macaddr, seq)) + && !zebra_evpn_neigh_is_bgp_seq_ok(zevpn, n, macaddr, seq, + true)) return; } diff --git a/zebra/zebra_evpn_mac.c b/zebra/zebra_evpn_mac.c index 44394b95aa..376721f83a 100644 --- a/zebra/zebra_evpn_mac.c +++ b/zebra/zebra_evpn_mac.c @@ -1395,16 +1395,21 @@ void zebra_evpn_sync_mac_del(zebra_mac_t *mac) static inline bool zebra_evpn_mac_is_bgp_seq_ok(zebra_evpn_t *zevpn, zebra_mac_t *mac, uint32_t seq, uint16_t ipa_len, - struct ipaddr *ipaddr) + struct ipaddr *ipaddr, + bool sync) { char macbuf[ETHER_ADDR_STRLEN]; char ipbuf[INET6_ADDRSTRLEN]; uint32_t tmp_seq; + const char *n_type; - if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) + if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) { tmp_seq = mac->loc_seq; - else + n_type = "local"; + } else { tmp_seq = mac->rem_seq; + n_type = "remote"; + } if (seq < tmp_seq) { /* if the mac was never advertised to bgp we must accept @@ -1413,10 +1418,11 @@ static inline bool zebra_evpn_mac_is_bgp_seq_ok(zebra_evpn_t *zevpn, */ if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL) && !zebra_evpn_mac_is_ready_for_bgp(mac->flags)) { - if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC || IS_ZEBRA_DEBUG_VXLAN) zlog_debug( - "sync-macip accept vni %u mac %s%s%s lower seq %u f 0x%x", - zevpn->vni, + "%s-macip accept vni %u %s-mac %s%s%s lower seq %u f 0x%x", + sync ? "sync" : "rem", zevpn->vni, + n_type, prefix_mac2str(&mac->macaddr, macbuf, sizeof(macbuf)), ipa_len ? " IP " : "", @@ -1427,10 +1433,10 @@ static inline bool zebra_evpn_mac_is_bgp_seq_ok(zebra_evpn_t *zevpn, return true; } - if (IS_ZEBRA_DEBUG_EVPN_MH_MAC) + if (IS_ZEBRA_DEBUG_EVPN_MH_MAC || IS_ZEBRA_DEBUG_VXLAN) zlog_debug( - "sync-macip ignore vni %u mac %s%s%s as existing has higher seq %u f 0x%x", - zevpn->vni, + "%s-macip ignore vni %u %s-mac %s%s%s as existing has higher seq %u f 0x%x", + sync ? "sync" : "rem", zevpn->vni, n_type, prefix_mac2str(&mac->macaddr, macbuf, sizeof(macbuf)), ipa_len ? " IP " : "", @@ -1518,7 +1524,7 @@ zebra_evpn_proc_sync_mac_update(zebra_evpn_t *zevpn, struct ethaddr *macaddr, return NULL; } if (!zebra_evpn_mac_is_bgp_seq_ok(zevpn, mac, seq, ipa_len, - ipaddr)) { + ipaddr, true)) { ctx->ignore_macip = true; return NULL; } @@ -1768,7 +1774,6 @@ int process_mac_remote_macip_add(zebra_evpn_t *zevpn, struct zebra_vrf *zvrf, { char buf[ETHER_ADDR_STRLEN]; char buf1[INET6_ADDRSTRLEN]; - uint32_t tmp_seq; bool sticky; bool remote_gw; int update_mac = 0; @@ -1828,8 +1833,6 @@ int process_mac_remote_macip_add(zebra_evpn_t *zevpn, struct zebra_vrf *zvrf, if (ipa_len) SET_FLAG(mac->flags, ZEBRA_MAC_AUTO); } else { - zebra_evpn_es_mac_ref(mac, esi); - /* When host moves but changes its (MAC,IP) * binding, BGP may install a MACIP entry that * corresponds to "older" location of the host @@ -1838,26 +1841,11 @@ int process_mac_remote_macip_add(zebra_evpn_t *zevpn, struct zebra_vrf *zvrf, * the sequence number and ignore this update * if appropriate. */ - if (CHECK_FLAG(mac->flags, ZEBRA_MAC_LOCAL)) - tmp_seq = mac->loc_seq; - else - tmp_seq = mac->rem_seq; - - if (seq < tmp_seq) { - if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug( - "Ignore remote MACIP ADD VNI %u MAC %s%s%s as existing MAC has higher seq %u flags 0x%x", - zevpn->vni, - prefix_mac2str(macaddr, buf, - sizeof(buf)), - ipa_len ? " IP " : "", - ipa_len ? ipaddr2str( - ipaddr, buf1, - sizeof(buf1)) - : "", - tmp_seq, mac->flags); + if (!zebra_evpn_mac_is_bgp_seq_ok( + zevpn, mac, seq, ipa_len, ipaddr, false)) return -1; - } + + zebra_evpn_es_mac_ref(mac, esi); } /* Check MAC's curent state is local (this is the case diff --git a/zebra/zebra_evpn_mh.c b/zebra/zebra_evpn_mh.c index 53412a434e..7e712bf1ee 100644 --- a/zebra/zebra_evpn_mh.c +++ b/zebra/zebra_evpn_mh.c @@ -63,13 +63,14 @@ static void zebra_evpn_es_get_one_base_evpn(void); static int zebra_evpn_es_evi_send_to_client(struct zebra_evpn_es *es, zebra_evpn_t *zevpn, bool add); static void zebra_evpn_local_es_del(struct zebra_evpn_es **esp); -static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid, - struct ethaddr *sysmac); +static int zebra_evpn_local_es_update(struct zebra_if *zif, esi_t *esi); static bool zebra_evpn_es_br_port_dplane_update(struct zebra_evpn_es *es, const char *caller); static void zebra_evpn_mh_uplink_cfg_update(struct zebra_if *zif, bool set); -static void zebra_evpn_mh_update_protodown_es(struct zebra_evpn_es *es); +static void zebra_evpn_mh_update_protodown_es(struct zebra_evpn_es *es, + bool resync_dplane); static void zebra_evpn_mh_clear_protodown_es(struct zebra_evpn_es *es); +static void zebra_evpn_mh_startup_delay_timer_start(const char *rc); esi_t zero_esi_buf, *zero_esi = &zero_esi_buf; @@ -416,15 +417,12 @@ void zebra_evpn_es_evi_show_vni(struct vty *vty, bool uj, vni_t vni, int detail) vty_out(vty, "Type: L local, R remote\n"); vty_out(vty, "%-8s %-30s %-4s\n", "VNI", "ESI", "Type"); } + zebra_evpn_es_evi_show_one_evpn(zevpn, vty, json_array, detail); } else { if (!uj) vty_out(vty, "VNI %d doesn't exist\n", vni); - - return; } - zebra_evpn_es_evi_show_one_evpn(zevpn, vty, json_array, detail); - if (uj) { vty_out(vty, "%s\n", json_object_to_json_string_ext( @@ -935,7 +933,7 @@ void zebra_evpn_if_init(struct zebra_if *zif) /* if an es_id and sysmac are already present against the interface * activate it */ - zebra_evpn_local_es_update(zif, zif->es_info.lid, &zif->es_info.sysmac); + zebra_evpn_local_es_update(zif, &zif->es_info.esi); } /* handle deletion of an access port by removing it from all associated @@ -1469,16 +1467,16 @@ static bool zebra_evpn_es_br_port_dplane_update(struct zebra_evpn_es *es, /* returns TRUE if dplane entry was updated */ static bool zebra_evpn_es_df_change(struct zebra_evpn_es *es, bool new_non_df, - const char *caller) + const char *caller, const char *reason) { bool old_non_df; old_non_df = !!(es->flags & ZEBRA_EVPNES_NON_DF); if (IS_ZEBRA_DEBUG_EVPN_MH_ES) - zlog_debug("df-change(%s) es %s old %s new %s", caller, - es->esi_str, old_non_df ? "non-df" : "df", - new_non_df ? "non-df" : "df"); + zlog_debug("df-change es %s %s to %s; %s: %s", es->esi_str, + old_non_df ? "non-df" : "df", + new_non_df ? "non-df" : "df", caller, reason); if (old_non_df == new_non_df) return false; @@ -1506,7 +1504,8 @@ static bool zebra_evpn_es_run_df_election(struct zebra_evpn_es *es, */ if (!(es->flags & ZEBRA_EVPNES_LOCAL) || !zmh_info->es_originator_ip.s_addr) - return zebra_evpn_es_df_change(es, new_non_df, caller); + return zebra_evpn_es_df_change(es, new_non_df, caller, + "not-ready"); /* if oper-state is down DF filtering must be on. when the link comes * up again dataplane should block BUM till FRR has had the chance @@ -1514,7 +1513,18 @@ static bool zebra_evpn_es_run_df_election(struct zebra_evpn_es *es, */ if (!(es->flags & ZEBRA_EVPNES_OPER_UP)) { new_non_df = true; - return zebra_evpn_es_df_change(es, new_non_df, caller); + return zebra_evpn_es_df_change(es, new_non_df, caller, + "oper-down"); + } + + /* ES was just created; we need to wait for the peers to rx the + * our Type-4 routes and for the switch to import the peers' Type-4 + * routes + */ + if (es->df_delay_timer) { + new_non_df = true; + return zebra_evpn_es_df_change(es, new_non_df, caller, + "df-delay"); } for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { @@ -1546,7 +1556,7 @@ static bool zebra_evpn_es_run_df_election(struct zebra_evpn_es *es, } } - return zebra_evpn_es_df_change(es, new_non_df, caller); + return zebra_evpn_es_df_change(es, new_non_df, caller, "elected"); } static void zebra_evpn_es_vtep_add(struct zebra_evpn_es *es, @@ -1636,6 +1646,9 @@ static struct zebra_evpn_es *zebra_evpn_es_new(esi_t *esi) { struct zebra_evpn_es *es; + if (!memcmp(esi, zero_esi, sizeof(esi_t))) + return NULL; + es = XCALLOC(MTYPE_ZES, sizeof(struct zebra_evpn_es)); /* fill in ESI */ @@ -1851,6 +1864,8 @@ static void zebra_evpn_es_setup_evis(struct zebra_evpn_es *es) uint16_t vid; struct zebra_evpn_access_bd *acc_bd; + if (!bf_is_inited(zif->vlan_bitmap)) + return; bf_for_each_set_bit(zif->vlan_bitmap, vid, IF_VLAN_BITMAP_MAX) { acc_bd = zebra_evpn_acc_vl_find(vid); @@ -1928,6 +1943,37 @@ static void zebra_evpn_mh_dup_addr_detect_off(void) } } +/* On config of first local-ES turn off advertisement of STALE/DELAY/PROBE + * neighbors + */ +static void zebra_evpn_mh_advertise_reach_neigh_only(void) +{ + if (zmh_info->flags & ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY) + return; + + zmh_info->flags |= ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY; + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("evpn-mh: only REACHABLE neigh advertised"); + + /* XXX - if STALE/DELAY/PROBE neighs were previously advertised we + * need to withdraw them + */ +} + +static int zebra_evpn_es_df_delay_exp_cb(struct thread *t) +{ + struct zebra_evpn_es *es; + + es = THREAD_ARG(t); + + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("es %s df-delay expired", es->esi_str); + + zebra_evpn_es_run_df_election(es, __func__); + + return 0; +} + static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es, struct zebra_if *zif) { @@ -1939,6 +1985,7 @@ static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es, es->nhg_id, zif->ifp->name); zebra_evpn_mh_dup_addr_detect_off(); + zebra_evpn_mh_advertise_reach_neigh_only(); es->flags |= ZEBRA_EVPNES_LOCAL; listnode_init(&es->local_es_listnode, es); @@ -1967,6 +2014,12 @@ static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es, zebra_evpn_es_re_eval_send_to_client(es, false /* es_evi_re_reval */); + /* Start the DF delay timer on the local ES */ + if (!es->df_delay_timer) + thread_add_timer(zrouter.master, zebra_evpn_es_df_delay_exp_cb, + es, ZEBRA_EVPN_MH_DF_DELAY_TIME, + &es->df_delay_timer); + /* See if the local VTEP can function as DF on the ES */ if (!zebra_evpn_es_run_df_election(es, __func__)) { /* check if the dplane entry needs to be re-programmed as a @@ -1989,7 +2042,7 @@ static void zebra_evpn_es_local_info_set(struct zebra_evpn_es *es, false /* force_clear_static */); /* inherit EVPN protodown flags on the access port */ - zebra_evpn_mh_update_protodown_es(es); + zebra_evpn_mh_update_protodown_es(es, true /*resync_dplane*/); } static void zebra_evpn_es_local_info_clear(struct zebra_evpn_es **esp) @@ -2003,6 +2056,8 @@ static void zebra_evpn_es_local_info_clear(struct zebra_evpn_es **esp) es->flags &= ~(ZEBRA_EVPNES_LOCAL | ZEBRA_EVPNES_READY_FOR_BGP); + THREAD_OFF(es->df_delay_timer); + /* remove the DF filter */ dplane_updated = zebra_evpn_es_run_df_election(es, __func__); @@ -2090,17 +2145,50 @@ static void zebra_evpn_es_remote_info_re_eval(struct zebra_evpn_es **esp) /* A new local es is created when a local-es-id and sysmac is configured * against an interface. */ -static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid, - struct ethaddr *sysmac) +static int zebra_evpn_local_es_update(struct zebra_if *zif, esi_t *esi) { struct zebra_evpn_es *old_es = zif->es_info.es; struct zebra_evpn_es *es; + + memcpy(&zif->es_info.esi, esi, sizeof(*esi)); + if (old_es && !memcmp(&old_es->esi, esi, sizeof(*esi))) + /* dup - nothing to be done */ + return 0; + + /* release the old_es against the zif */ + if (old_es) + zebra_evpn_local_es_del(&old_es); + + es = zebra_evpn_es_find(esi); + if (es) { + /* if it exists against another interface flag an error */ + if (es->zif && es->zif != zif) { + memset(&zif->es_info.esi, 0, sizeof(*esi)); + return -1; + } + } else { + /* create new es */ + es = zebra_evpn_es_new(esi); + } + + if (es) + zebra_evpn_es_local_info_set(es, zif); + + return 0; +} + +static int zebra_evpn_type3_esi_update(struct zebra_if *zif, uint32_t lid, + struct ethaddr *sysmac) +{ + struct zebra_evpn_es *old_es = zif->es_info.es; esi_t esi; int offset = 0; int field_bytes = 0; /* Complete config of the ES-ID bootstraps the ES */ if (!lid || is_zero_mac(sysmac)) { + /* clear old esi */ + memset(&zif->es_info.esi, 0, sizeof(zif->es_info.esi)); /* if in ES is attached to zif delete it */ if (old_es) zebra_evpn_local_es_del(&old_es); @@ -2122,27 +2210,7 @@ static int zebra_evpn_local_es_update(struct zebra_if *zif, uint32_t lid, esi.val[offset++] = (uint8_t)(lid >> 8); esi.val[offset++] = (uint8_t)lid; - if (old_es && !memcmp(&old_es->esi, &esi, sizeof(esi_t))) - /* dup - nothing to be done */ - return 0; - - /* release the old_es against the zif */ - if (old_es) - zebra_evpn_local_es_del(&old_es); - - es = zebra_evpn_es_find(&esi); - if (es) { - /* if it exists against another interface flag an error */ - if (es->zif && es->zif != zif) - return -1; - } else { - /* create new es */ - es = zebra_evpn_es_new(&esi); - } - - zebra_evpn_es_local_info_set(es, zif); - - return 0; + return zebra_evpn_local_es_update(zif, &esi); } static int zebra_evpn_remote_es_del(esi_t *esi, struct in_addr vtep_ip) @@ -2349,7 +2417,7 @@ static int zebra_evpn_es_sys_mac_update(struct zebra_if *zif, { int rv; - rv = zebra_evpn_local_es_update(zif, zif->es_info.lid, sysmac); + rv = zebra_evpn_type3_esi_update(zif, zif->es_info.lid, sysmac); if (!rv) memcpy(&zif->es_info.sysmac, sysmac, sizeof(struct ethaddr)); @@ -2361,13 +2429,29 @@ static int zebra_evpn_es_lid_update(struct zebra_if *zif, uint32_t lid) { int rv; - rv = zebra_evpn_local_es_update(zif, lid, &zif->es_info.sysmac); + rv = zebra_evpn_type3_esi_update(zif, lid, &zif->es_info.sysmac); if (!rv) zif->es_info.lid = lid; return rv; } +/* type-0 esi has changed */ +static int zebra_evpn_es_type0_esi_update(struct zebra_if *zif, esi_t *esi) +{ + int rv; + + rv = zebra_evpn_local_es_update(zif, esi); + + /* clear the old es_lid, es_sysmac - type-0 is being set so old + * type-3 params need to be flushed + */ + memset(&zif->es_info.sysmac, 0, sizeof(struct ethaddr)); + zif->es_info.lid = 0; + + return rv; +} + void zebra_evpn_es_cleanup(void) { struct zebra_evpn_es *es; @@ -2425,10 +2509,10 @@ void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif) char buf[ETHER_ADDR_STRLEN]; char mh_buf[80]; bool vty_print = false; + char esi_buf[ESI_STR_LEN]; mh_buf[0] = '\0'; - snprintf(mh_buf + strlen(mh_buf), sizeof(mh_buf) - strlen(mh_buf), - " EVPN-MH:"); + strlcat(mh_buf, " EVPN-MH:", sizeof(mh_buf)); if (zif->es_info.lid || !is_zero_mac(&zif->es_info.sysmac)) { vty_print = true; snprintf( @@ -2436,17 +2520,21 @@ void zebra_evpn_if_es_print(struct vty *vty, struct zebra_if *zif) sizeof(mh_buf) - strlen(mh_buf), " ES id %u ES sysmac %s", zif->es_info.lid, prefix_mac2str(&zif->es_info.sysmac, buf, sizeof(buf))); + } else if (memcmp(&zif->es_info.esi, zero_esi, sizeof(*zero_esi))) { + vty_print = true; + snprintf(mh_buf + strnlen(mh_buf, sizeof(mh_buf)), + sizeof(mh_buf) - strnlen(mh_buf, sizeof(mh_buf)), + " ES id %s", + esi_to_str(&zif->es_info.esi, esi_buf, + sizeof(esi_buf))); } if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK) { vty_print = true; if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK_OPER_UP) - snprintf(mh_buf + strlen(mh_buf), - sizeof(mh_buf) - strlen(mh_buf), " uplink-up"); + strlcat(mh_buf, " uplink (up)", sizeof(mh_buf)); else - snprintf(mh_buf + strlen(mh_buf), - sizeof(mh_buf) - strlen(mh_buf), - " uplink-down"); + strlcat(mh_buf, " uplink (down)", sizeof(mh_buf)); } if (vty_print) @@ -2659,6 +2747,7 @@ static void zebra_evpn_es_show_entry_detail(struct vty *vty, char alg_buf[EVPN_DF_ALG_STR_LEN]; struct zebra_evpn_es_vtep *es_vtep; struct listnode *node; + char thread_buf[THREAD_TIMER_STRLEN]; if (json) { json_object *json_vteps; @@ -2695,6 +2784,12 @@ static void zebra_evpn_es_show_entry_detail(struct vty *vty, listcount(es->es_evi_list)); json_object_int_add(json, "macCount", listcount(es->mac_list)); json_object_int_add(json, "dfPreference", es->df_pref); + if (es->df_delay_timer) + json_object_string_add( + json, "dfDelayTimer", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + es->df_delay_timer)); json_object_int_add(json, "nexthopGroup", es->nhg_id); if (listcount(es->es_vtep_list)) { json_vteps = json_object_new_array(); @@ -2729,9 +2824,16 @@ static void zebra_evpn_es_show_entry_detail(struct vty *vty, "yes" : "no"); vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list)); vty_out(vty, " MAC Count: %d\n", listcount(es->mac_list)); - vty_out(vty, " DF: status: %s preference: %u\n", - (es->flags & ZEBRA_EVPNES_NON_DF) ? "non-df" : "df", - es->df_pref); + if (es->flags & ZEBRA_EVPNES_LOCAL) + vty_out(vty, " DF status: %s \n", + (es->flags & ZEBRA_EVPNES_NON_DF) ? "non-df" + : "df"); + if (es->df_delay_timer) + vty_out(vty, " DF delay: %s\n", + thread_timer_to_hhmmss(thread_buf, + sizeof(thread_buf), + es->df_delay_timer)); + vty_out(vty, " DF preference: %u\n", es->df_pref); vty_out(vty, " Nexthop group: %u\n", es->nhg_id); vty_out(vty, " VTEPs:\n"); for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { @@ -2833,14 +2935,25 @@ int zebra_evpn_mh_if_write(struct vty *vty, struct interface *ifp) { struct zebra_if *zif = ifp->info; char buf[ETHER_ADDR_STRLEN]; + bool type_3_esi = false; + char esi_buf[ESI_STR_LEN]; - if (zif->es_info.lid) + if (zif->es_info.lid) { vty_out(vty, " evpn mh es-id %u\n", zif->es_info.lid); + type_3_esi = true; + } - if (!is_zero_mac(&zif->es_info.sysmac)) + if (!is_zero_mac(&zif->es_info.sysmac)) { vty_out(vty, " evpn mh es-sys-mac %s\n", prefix_mac2str(&zif->es_info.sysmac, buf, sizeof(buf))); + type_3_esi = true; + } + + if (!type_3_esi + && memcmp(&zif->es_info.esi, zero_esi, sizeof(*zero_esi))) + vty_out(vty, " evpn mh es-id %s\n", + esi_to_str(&zif->es_info.esi, esi_buf, sizeof(esi_buf))); if (zif->es_info.df_pref) vty_out(vty, " evpn mh es-df-pref %u\n", zif->es_info.df_pref); @@ -2929,22 +3042,28 @@ DEFPY(zebra_evpn_es_sys_mac, /* CLI for setting up local-ID part of ESI on an access port */ DEFPY(zebra_evpn_es_id, zebra_evpn_es_id_cmd, - "[no$no] evpn mh es-id [(1-16777215)$es_lid]", + "[no$no] evpn mh es-id [(1-16777215)$es_lid | NAME$esi_str]", NO_STR "EVPN\n" EVPN_MH_VTY_STR - "Ethernet segment local identifier\n" - "ID\n" + "Ethernet segment identifier\n" + "local discriminator\n" + "10-byte ID - 00:AA:BB:CC:DD:EE:FF:GG:HH:II\n" ) { VTY_DECLVAR_CONTEXT(interface, ifp); struct zebra_if *zif; - int ret; + int ret = 0; + esi_t esi; zif = ifp->info; if (no) { - ret = zebra_evpn_es_lid_update(zif, 0); + if (zif->es_info.lid) + ret = zebra_evpn_es_lid_update(zif, 0); + else if (memcmp(&zif->es_info.esi, zero_esi, sizeof(*zero_esi))) + ret = zebra_evpn_es_type0_esi_update(zif, zero_esi); + if (ret == -1) { vty_out(vty, "%%Failed to clear ES local id\n"); return CMD_WARNING; @@ -2956,14 +3075,23 @@ DEFPY(zebra_evpn_es_id, return CMD_WARNING; } - if (!es_lid) { - vty_out(vty, "%%Specify local ES ID\n"); - return CMD_WARNING; + if (esi_str) { + if (!str_to_esi(esi_str, &esi)) { + vty_out(vty, "%% Malformed ESI\n"); + return CMD_WARNING; + } + ret = zebra_evpn_es_type0_esi_update(zif, &esi); + } else { + if (!es_lid) { + vty_out(vty, "%%Specify local ES ID\n"); + return CMD_WARNING; + } + ret = zebra_evpn_es_lid_update(zif, es_lid); } - ret = zebra_evpn_es_lid_update(zif, es_lid); + if (ret == -1) { vty_out(vty, - "%%ESI already exists on a different interface\n"); + "%%ESI already exists on a different interface\n"); return CMD_WARNING; } } @@ -3029,7 +3157,7 @@ void zebra_evpn_mh_print(struct vty *vty) vty_out(vty, " uplink-cfg-cnt: %u, uplink-active-cnt: %u\n", zmh_info->uplink_cfg_cnt, zmh_info->uplink_oper_up_cnt); if (zmh_info->protodown_rc) - vty_out(vty, " protodown: %s\n", + vty_out(vty, " protodown reasons: %s\n", zebra_protodown_rc_str(zmh_info->protodown_rc, pd_buf, sizeof(pd_buf))); } @@ -3174,16 +3302,14 @@ void zebra_evpn_mh_update_protodown_bond_mbr(struct zebra_if *zif, bool clear, protodown_rc = bond_zif->protodown_rc; } - if (zif->protodown_rc == protodown_rc) - return; - old_protodown = !!(zif->flags & ZIF_FLAG_PROTODOWN); old_protodown_rc = zif->protodown_rc; zif->protodown_rc &= ~ZEBRA_PROTODOWN_EVPN_ALL; zif->protodown_rc |= (protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL); new_protodown = !!zif->protodown_rc; - if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + if (IS_ZEBRA_DEBUG_EVPN_MH_ES + && (zif->protodown_rc != old_protodown_rc)) zlog_debug( "%s bond mbr %s protodown_rc changed; old 0x%x new 0x%x", caller, zif->ifp->name, old_protodown_rc, @@ -3220,14 +3346,20 @@ static void zebra_evpn_mh_update_protodown_bond(struct zebra_if *bond_zif) } /* The global EVPN MH protodown rc is applied to all local ESs */ -static void zebra_evpn_mh_update_protodown_es(struct zebra_evpn_es *es) +static void zebra_evpn_mh_update_protodown_es(struct zebra_evpn_es *es, + bool resync_dplane) { struct zebra_if *zif; enum protodown_reasons old_protodown_rc; zif = es->zif; - if ((zif->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL) - == (zmh_info->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL)) + /* if the reason code is the same bail unless it is a new + * ES bond in that case we would need to ensure that the + * dplane is really in sync with zebra + */ + if (!resync_dplane + && (zif->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL) + == (zmh_info->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL)) return; old_protodown_rc = zif->protodown_rc; @@ -3235,7 +3367,8 @@ static void zebra_evpn_mh_update_protodown_es(struct zebra_evpn_es *es) zif->protodown_rc |= (zmh_info->protodown_rc & ZEBRA_PROTODOWN_EVPN_ALL); - if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + if (IS_ZEBRA_DEBUG_EVPN_MH_ES + && (old_protodown_rc != zif->protodown_rc)) zlog_debug( "es %s ifp %s protodown_rc changed; old 0x%x new 0x%x", es->esi_str, zif->ifp->name, old_protodown_rc, @@ -3273,7 +3406,7 @@ static void zebra_evpn_mh_update_protodown_es_all(void) struct zebra_evpn_es *es; for (ALL_LIST_ELEMENTS_RO(zmh_info->local_es_list, node, es)) - zebra_evpn_mh_update_protodown_es(es); + zebra_evpn_mh_update_protodown_es(es, false /*resync_dplane*/); } static void zebra_evpn_mh_update_protodown(enum protodown_reasons protodown_rc, @@ -3381,6 +3514,13 @@ void zebra_evpn_mh_uplink_oper_update(struct zebra_if *zif) if (old_protodown == new_protodown) return; + /* if protodown_rc XXX_UPLINK_DOWN is about to be cleared + * fire up the start-up delay timer to allow the EVPN network + * to converge (Type-2 routes need to be advertised and processed) + */ + if (!new_protodown && (zmh_info->uplink_oper_up_cnt == 1)) + zebra_evpn_mh_startup_delay_timer_start("uplink-up"); + zebra_evpn_mh_update_protodown(ZEBRA_PROTODOWN_EVPN_UPLINK_DOWN, new_protodown); } @@ -3396,26 +3536,19 @@ static int zebra_evpn_mh_startup_delay_exp_cb(struct thread *t) return 0; } -static void zebra_evpn_mh_startup_delay_timer_start(bool init) +static void zebra_evpn_mh_startup_delay_timer_start(const char *rc) { - /* 1. This timer can be started during init. - * 2. It can also be restarted if it is alreay running and the - * admin wants to increase or decrease its value - */ - if (!init && !zmh_info->startup_delay_timer) - return; - if (zmh_info->startup_delay_timer) { if (IS_ZEBRA_DEBUG_EVPN_MH_ES) zlog_debug("startup-delay timer cancelled"); - thread_cancel(&zmh_info->startup_delay_timer); - zmh_info->startup_delay_timer = NULL; + THREAD_OFF(zmh_info->startup_delay_timer); } if (zmh_info->startup_delay_time) { if (IS_ZEBRA_DEBUG_EVPN_MH_ES) - zlog_debug("startup-delay timer started for %d sec", - zmh_info->startup_delay_time); + zlog_debug( + "startup-delay timer started for %d sec on %s", + zmh_info->startup_delay_time, rc); thread_add_timer(zrouter.master, zebra_evpn_mh_startup_delay_exp_cb, NULL, zmh_info->startup_delay_time, @@ -3476,7 +3609,12 @@ int zebra_evpn_mh_startup_delay_update(struct vty *vty, uint32_t duration, duration = ZEBRA_EVPN_MH_STARTUP_DELAY_DEF; zmh_info->startup_delay_time = duration; - zebra_evpn_mh_startup_delay_timer_start(false /* init */); + + /* if startup_delay_timer is running allow it to be adjusted + * up or down + */ + if (zmh_info->startup_delay_timer) + zebra_evpn_mh_startup_delay_timer_start("config"); return 0; } @@ -3526,7 +3664,7 @@ void zebra_evpn_mh_init(void) zebra_evpn_acc_vl_cmp, "access VLAN hash table"); zmh_info->startup_delay_time = ZEBRA_EVPN_MH_STARTUP_DELAY_DEF; - zebra_evpn_mh_startup_delay_timer_start(true /*init*/); + zebra_evpn_mh_startup_delay_timer_start("init"); } void zebra_evpn_mh_terminate(void) diff --git a/zebra/zebra_evpn_mh.h b/zebra/zebra_evpn_mh.h index dc2c299cf2..81ae740d49 100644 --- a/zebra/zebra_evpn_mh.h +++ b/zebra/zebra_evpn_mh.h @@ -88,6 +88,13 @@ struct zebra_evpn_es { * advertised via the ESR */ uint16_t df_pref; + + /* When a new ES is configured it is held in a non-DF state + * for 3 seconds. This allows the peer Type-4 routes to be + * imported before running the DF election. + */ +#define ZEBRA_EVPN_MH_DF_DELAY_TIME 3 /* seconds */ + struct thread *df_delay_timer; }; RB_HEAD(zebra_es_rb_head, zebra_evpn_es); RB_PROTOTYPE(zebra_es_rb_head, zebra_evpn_es, rb_node, zebra_es_rb_cmp); @@ -188,6 +195,11 @@ struct zebra_evpn_mh_info { * first local ES, DAD is turned off */ #define ZEBRA_EVPN_MH_DUP_ADDR_DETECT_OFF (1 << 1) +/* If EVPN MH is enabled we only advertise REACHABLE neigh entries as Type-2 + * routes. As there is no global config knob for enabling EVPN MH we turn + * this flag when the first local ES is detected. + */ +#define ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY (1 << 2) /* RB tree of Ethernet segments (used for EVPN-MH) */ struct zebra_es_rb_head es_rb_tree; @@ -268,6 +280,12 @@ static inline bool zebra_evpn_mh_do_dup_addr_detect(void) return !(zmh_info->flags & ZEBRA_EVPN_MH_DUP_ADDR_DETECT_OFF); } +static inline bool zebra_evpn_mh_do_adv_reachable_neigh_only(void) +{ + return !!(zmh_info->flags & ZEBRA_EVPN_MH_ADV_REACHABLE_NEIGH_ONLY); +} + + /*****************************************************************************/ extern esi_t *zero_esi; extern void zebra_evpn_mh_init(void); diff --git a/zebra/zebra_evpn_neigh.c b/zebra/zebra_evpn_neigh.c index 6d72bc570e..1f45b72e3a 100644 --- a/zebra/zebra_evpn_neigh.c +++ b/zebra/zebra_evpn_neigh.c @@ -529,16 +529,21 @@ static void zebra_evpn_local_neigh_deref_mac(zebra_neigh_t *n, } bool zebra_evpn_neigh_is_bgp_seq_ok(zebra_evpn_t *zevpn, zebra_neigh_t *n, - struct ethaddr *macaddr, uint32_t seq) + struct ethaddr *macaddr, uint32_t seq, + bool sync) { char macbuf[ETHER_ADDR_STRLEN]; char ipbuf[INET6_ADDRSTRLEN]; uint32_t tmp_seq; + const char *n_type; - if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) + if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { tmp_seq = n->loc_seq; - else + n_type = "local"; + } else { tmp_seq = n->rem_seq; + n_type = "remote"; + } if (seq < tmp_seq) { /* if the neigh was never advertised to bgp we must accept @@ -547,10 +552,12 @@ bool zebra_evpn_neigh_is_bgp_seq_ok(zebra_evpn_t *zevpn, zebra_neigh_t *n, */ if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL) && !zebra_evpn_neigh_is_ready_for_bgp(n)) { - if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH + || IS_ZEBRA_DEBUG_VXLAN) zlog_debug( - "sync-macip accept vni %u mac %s IP %s lower seq %u f 0x%x", - zevpn->vni, + "%s-macip accept vni %u %s mac %s IP %s lower seq %u f 0x%x", + sync ? "sync" : "remote", zevpn->vni, + n_type, prefix_mac2str(macaddr, macbuf, sizeof(macbuf)), ipaddr2str(&n->ip, ipbuf, @@ -559,10 +566,10 @@ bool zebra_evpn_neigh_is_bgp_seq_ok(zebra_evpn_t *zevpn, zebra_neigh_t *n, return true; } - if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) + if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH || IS_ZEBRA_DEBUG_VXLAN) zlog_debug( - "sync-macip ignore vni %u mac %s IP %s as existing has higher seq %u f 0x%x", - zevpn->vni, + "%s-macip ignore vni %u %s mac %s IP %s as existing has higher seq %u f 0x%x", + sync ? "sync" : "remote", zevpn->vni, n_type, prefix_mac2str(macaddr, macbuf, sizeof(macbuf)), ipaddr2str(&n->ip, ipbuf, sizeof(ipbuf)), tmp_seq, n->flags); @@ -1453,6 +1460,9 @@ int zebra_evpn_local_neigh_update(zebra_evpn_t *zevpn, struct interface *ifp, new_bgp_ready = zebra_evpn_neigh_is_ready_for_bgp(n); + if (dp_static != new_static) + inform_dataplane = true; + /* Neigh is in freeze state and freeze action * is enabled, do not send update to client. */ @@ -1467,6 +1477,12 @@ int zebra_evpn_local_neigh_update(zebra_evpn_t *zevpn, struct interface *ifp, old_bgp_ready, new_bgp_ready, false, false, "flag-update"); + if (inform_dataplane) + zebra_evpn_sync_neigh_dp_install( + n, false /* set_inactive */, + false /* force_clear_static */, + __func__); + /* if the neigh can no longer be advertised * remove it from bgp */ @@ -1578,15 +1594,11 @@ int zebra_evpn_local_neigh_update(zebra_evpn_t *zevpn, struct interface *ifp, else UNSET_FLAG(n->flags, ZEBRA_NEIGH_ROUTER_FLAG); - /* if the dataplane thinks that this is a sync entry but - * zebra doesn't we need to re-concile the diff - * by re-installing the dataplane entry - */ - if (dp_static) { - new_static = zebra_evpn_neigh_is_static(n); - if (!new_static) - inform_dataplane = true; - } + /* if zebra and dataplane don't agree this is a sync entry + * re-install in the dataplane */ + new_static = zebra_evpn_neigh_is_static(n); + if (dp_static != new_static) + inform_dataplane = true; /* Check old and/or new MAC detected as duplicate mark * the neigh as duplicate @@ -2128,7 +2140,6 @@ void process_neigh_remote_macip_add(zebra_evpn_t *zevpn, struct zebra_vrf *zvrf, { zebra_neigh_t *n; int update_neigh = 0; - uint32_t tmp_seq; char buf[ETHER_ADDR_STRLEN]; char buf1[INET6_ADDRSTRLEN]; zebra_mac_t *old_mac = NULL; @@ -2165,8 +2176,6 @@ void process_neigh_remote_macip_add(zebra_evpn_t *zevpn, struct zebra_vrf *zvrf, } } else { - const char *n_type; - /* When host moves but changes its (MAC,IP) * binding, BGP may install a MACIP entry that * corresponds to "older" location of the host @@ -2175,27 +2184,10 @@ void process_neigh_remote_macip_add(zebra_evpn_t *zevpn, struct zebra_vrf *zvrf, * the sequence number and ignore this update * if appropriate. */ - if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { - tmp_seq = n->loc_seq; - n_type = "local"; - } else { - tmp_seq = n->rem_seq; - n_type = "remote"; - } - if (seq < tmp_seq) { - if (IS_ZEBRA_DEBUG_VXLAN) - zlog_debug( - "Ignore remote MACIP ADD VNI %u MAC %s%s%s as existing %s Neigh has higher seq %u", - zevpn->vni, - prefix_mac2str(&mac->macaddr, - buf, - sizeof(buf)), - " IP ", - ipaddr2str(ipaddr, buf1, - sizeof(buf1)), - n_type, tmp_seq); + + if (!zebra_evpn_neigh_is_bgp_seq_ok( + zevpn, n, &mac->macaddr, seq, false)) return; - } if (CHECK_FLAG(n->flags, ZEBRA_NEIGH_LOCAL)) { old_static = zebra_evpn_neigh_is_static(n); if (IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) diff --git a/zebra/zebra_evpn_neigh.h b/zebra/zebra_evpn_neigh.h index 50efdc0e0d..eac17a09b4 100644 --- a/zebra/zebra_evpn_neigh.h +++ b/zebra/zebra_evpn_neigh.h @@ -237,7 +237,8 @@ int zebra_evpn_neigh_send_del_to_client(vni_t vni, struct ipaddr *ip, struct ethaddr *macaddr, uint32_t flags, int state, bool force); bool zebra_evpn_neigh_is_bgp_seq_ok(zebra_evpn_t *zevpn, zebra_neigh_t *n, - struct ethaddr *macaddr, uint32_t seq); + struct ethaddr *macaddr, uint32_t seq, + bool sync); int zebra_evpn_neigh_del(zebra_evpn_t *zevpn, zebra_neigh_t *n); void zebra_evpn_sync_neigh_del(zebra_neigh_t *n); zebra_neigh_t * diff --git a/zebra/zebra_memory.c b/zebra/zebra_memory.c index da8121774e..17b52a2bcb 100644 --- a/zebra/zebra_memory.c +++ b/zebra/zebra_memory.c @@ -30,3 +30,4 @@ DEFINE_MTYPE(ZEBRA, RE, "Route Entry") DEFINE_MTYPE(ZEBRA, RIB_DEST, "RIB destination") DEFINE_MTYPE(ZEBRA, ZVLAN, "VLAN") DEFINE_MTYPE(ZEBRA, ZVLAN_BITMAP, "VLAN bitmap") +DEFINE_MTYPE(ZEBRA, OPAQUE, "Opaque Data") diff --git a/zebra/zebra_memory.h b/zebra/zebra_memory.h index e15f972493..71901b765f 100644 --- a/zebra/zebra_memory.h +++ b/zebra/zebra_memory.h @@ -32,6 +32,7 @@ DECLARE_MGROUP(ZEBRA) DECLARE_MTYPE(ZEBRA_NS) DECLARE_MTYPE(RE) DECLARE_MTYPE(RIB_DEST) +DECLARE_MTYPE(OPAQUE) #ifdef __cplusplus } diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index eb6587f82f..07cf0604fa 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -250,8 +250,8 @@ done: return ret; } -void rib_handle_nhg_replace(struct nhg_hash_entry *old, - struct nhg_hash_entry *new) +void rib_handle_nhg_replace(struct nhg_hash_entry *old_entry, + struct nhg_hash_entry *new_entry) { struct zebra_router_table *zrt; struct route_node *rn; @@ -259,15 +259,15 @@ void rib_handle_nhg_replace(struct nhg_hash_entry *old, if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: replacing routes nhe (%u) OLD %p NEW %p", - __func__, new->id, new, old); + __func__, new_entry->id, new_entry, old_entry); /* We have to do them ALL */ RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) { for (rn = route_top(zrt->table); rn; rn = srcdest_route_next(rn)) { RNODE_FOREACH_RE_SAFE (rn, re, next) { - if (re->nhe && re->nhe == old) - route_entry_update_nhe(re, new); + if (re->nhe && re->nhe == old_entry) + route_entry_update_nhe(re, new_entry); } } } @@ -2665,6 +2665,8 @@ void rib_unlink(struct route_node *rn, struct route_entry *re) nexthops_free(re->fib_ng.nexthop); + XFREE(MTYPE_OPAQUE, re->opaque); + XFREE(MTYPE_RE, re); } @@ -2748,7 +2750,7 @@ static void _route_entry_dump_nh(const struct route_entry *re, if (nexthop->weight) snprintf(wgt_str, sizeof(wgt_str), "wgt %d,", nexthop->weight); - zlog_debug("%s: %s %s[%u] vrf %s(%u) %s%s with flags %s%s%s%s%s", + zlog_debug("%s: %s %s[%u] vrf %s(%u) %s%s with flags %s%s%s%s%s%s%s%s", straddr, (nexthop->rparent ? " NH" : "NH"), nhname, nexthop->ifindex, vrf ? vrf->name : "Unknown", nexthop->vrf_id, @@ -2767,7 +2769,13 @@ static void _route_entry_dump_nh(const struct route_entry *re, : ""), (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_DUPLICATE) ? "DUPLICATE " - : "")); + : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RNH_FILTERED) + ? "FILTERED " : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP) + ? "BACKUP " : ""), + (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_SRTE) + ? "SRTE " : "")); } @@ -3109,7 +3117,7 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, uint32_t flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh, uint32_t nhe_id, uint32_t table_id, uint32_t metric, - uint8_t distance, bool fromkernel, bool connected_down) + uint8_t distance, bool fromkernel) { struct route_table *table; struct route_node *rn; @@ -3315,19 +3323,6 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, rib_delnode(rn, same); } - /* - * This is to force an immediate re-eval of this particular - * node via nexthop tracking. Why? Because there are scenarios - * where the interface is flapping and the normal queuing methodology - * will cause down/up events to very very rarely be combined into - * a non-event from nexthop tracking perspective. Leading - * to some fun timing situations with upper level routing protocol - * trying to and failing to install routes during this blip. Especially - * when zebra is under load. - */ - if (connected_down) - zebra_rib_evaluate_rn_nexthops(rn, - zebra_router_get_next_sequence()); route_unlock_node(rn); return; } diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c index 7b0a1e3d9c..bbc8b6f19d 100644 --- a/zebra/zebra_routemap.c +++ b/zebra/zebra_routemap.c @@ -1668,20 +1668,13 @@ void zebra_routemap_finish(void) route_map_finish(); } -void zebra_route_map_write_delay_timer(struct vty *vty) -{ - if (vty && (zebra_rmap_update_timer != ZEBRA_RMAP_DEFAULT_UPDATE_TIMER)) - vty_out(vty, "zebra route-map delay-timer %d\n", - zebra_rmap_update_timer); - return; -} - route_map_result_t zebra_route_map_check(int family, int rib_type, uint8_t instance, const struct prefix *p, struct nexthop *nexthop, struct zebra_vrf *zvrf, route_tag_t tag) { struct route_map *rmap = NULL; + char *rm_name; route_map_result_t ret = RMAP_PERMITMATCH; struct nh_rmap_obj nh_obj; @@ -1692,10 +1685,20 @@ zebra_route_map_check(int family, int rib_type, uint8_t instance, nh_obj.metric = 0; nh_obj.tag = tag; - if (rib_type >= 0 && rib_type < ZEBRA_ROUTE_MAX) + if (rib_type >= 0 && rib_type < ZEBRA_ROUTE_MAX) { + rm_name = PROTO_RM_NAME(zvrf, family, rib_type); rmap = PROTO_RM_MAP(zvrf, family, rib_type); - if (!rmap && PROTO_RM_NAME(zvrf, family, ZEBRA_ROUTE_MAX)) + + if (rm_name && !rmap) + return RMAP_DENYMATCH; + } + if (!rmap) { + rm_name = PROTO_RM_NAME(zvrf, family, ZEBRA_ROUTE_MAX); rmap = PROTO_RM_MAP(zvrf, family, ZEBRA_ROUTE_MAX); + + if (rm_name && !rmap) + return RMAP_DENYMATCH; + } if (rmap) { ret = route_map_apply(rmap, p, &nh_obj); } @@ -1859,7 +1862,8 @@ void zebra_routemap_config_write_protocol(struct vty *vty, vty_out(vty, "%sipv6 nht %s route-map %s\n", space, "any", NHT_RM_NAME(zvrf, AFI_IP6, ZEBRA_ROUTE_MAX)); - if (zebra_rmap_update_timer != ZEBRA_RMAP_DEFAULT_UPDATE_TIMER) + if (zvrf_id(zvrf) == VRF_DEFAULT + && zebra_rmap_update_timer != ZEBRA_RMAP_DEFAULT_UPDATE_TIMER) vty_out(vty, "zebra route-map delay-timer %d\n", zebra_rmap_update_timer); } diff --git a/zebra/zebra_routemap.h b/zebra/zebra_routemap.h index 56e805ea03..251e07af72 100644 --- a/zebra/zebra_routemap.h +++ b/zebra/zebra_routemap.h @@ -36,8 +36,6 @@ extern void zebra_add_import_table_route_map(afi_t afi, const char *rmap_name, uint32_t table); extern void zebra_del_import_table_route_map(afi_t afi, uint32_t table); -extern void zebra_route_map_write_delay_timer(struct vty *); - extern route_map_result_t zebra_import_table_route_map_check(int family, int rib_type, uint8_t instance, const struct prefix *p, diff --git a/zebra/zebra_vrf.c b/zebra/zebra_vrf.c index b7cbf5262a..be4fb29aae 100644 --- a/zebra/zebra_vrf.c +++ b/zebra/zebra_vrf.c @@ -107,6 +107,8 @@ static int zebra_vrf_new(struct vrf *vrf) zvrf = zebra_vrf_alloc(); vrf->info = zvrf; zvrf->vrf = vrf; + if (!vrf_is_backend_netns()) + zvrf->zns = zebra_ns_lookup(NS_DEFAULT); otable_init(&zvrf->other_tables); diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 072ed169b6..f18d8fbb6d 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -419,6 +419,33 @@ static void show_nexthop_detail_helper(struct vty *vty, } } +static void zebra_show_ip_route_opaque(struct vty *vty, struct route_entry *re, + struct json_object *json) +{ + if (!re->opaque) + return; + + switch (re->type) { + case ZEBRA_ROUTE_SHARP: + if (json) + json_object_string_add(json, "opaque", + (char *)re->opaque->data); + else + vty_out(vty, " Opaque Data: %s", + (char *)re->opaque->data); + break; + case ZEBRA_ROUTE_BGP: + if (json) + json_object_string_add(json, "asPath", + (char *)re->opaque->data); + else + vty_out(vty, " AS-Path: %s", + (char *)re->opaque->data); + default: + break; + } +} + /* New RIB. Detailed information for IPv4 route. */ static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn, int mcast, bool use_fib, bool show_ng) @@ -495,6 +522,8 @@ static void vty_show_ip_route_detail(struct vty *vty, struct route_node *rn, if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) show_nh_backup_helper(vty, re, nexthop); } + zebra_show_ip_route_opaque(vty, re, NULL); + vty_out(vty, "\n"); } } @@ -927,6 +956,7 @@ static void vty_show_ip_route(struct vty *vty, struct route_node *rn, json_object_object_add(json_route, "backupNexthops", json_nexthops); } + zebra_show_ip_route_opaque(NULL, re, json_route); json_object_array_add(json, json_route); return; @@ -1274,11 +1304,11 @@ DEFPY (show_ip_nht, VRF_GET_ID(vrf_id, vrf_name, false); memset(&prefix, 0, sizeof(prefix)); - if (addr) + if (addr) { p = sockunion2hostprefix(addr, &prefix); - - if (!p) - return CMD_WARNING; + if (!p) + return CMD_WARNING; + } zebra_print_rnh_table(vrf_id, afi, vty, rtype, p); return CMD_SUCCESS; @@ -1301,7 +1331,7 @@ DEFUN (ip_nht_default_route, zvrf->zebra_rnh_ip_default_route = 1; - zebra_evaluate_rnh(zvrf, AFI_IP, 1, RNH_NEXTHOP_TYPE, NULL); + zebra_evaluate_rnh(zvrf, AFI_IP, 0, RNH_NEXTHOP_TYPE, NULL); return CMD_SUCCESS; } @@ -1623,7 +1653,7 @@ DEFUN (no_ip_nht_default_route, return CMD_SUCCESS; zvrf->zebra_rnh_ip_default_route = 0; - zebra_evaluate_rnh(zvrf, AFI_IP, 1, RNH_NEXTHOP_TYPE, NULL); + zebra_evaluate_rnh(zvrf, AFI_IP, 0, RNH_NEXTHOP_TYPE, NULL); return CMD_SUCCESS; } @@ -1643,7 +1673,7 @@ DEFUN (ipv6_nht_default_route, return CMD_SUCCESS; zvrf->zebra_rnh_ipv6_default_route = 1; - zebra_evaluate_rnh(zvrf, AFI_IP6, 1, RNH_NEXTHOP_TYPE, NULL); + zebra_evaluate_rnh(zvrf, AFI_IP6, 0, RNH_NEXTHOP_TYPE, NULL); return CMD_SUCCESS; } @@ -1665,7 +1695,7 @@ DEFUN (no_ipv6_nht_default_route, return CMD_SUCCESS; zvrf->zebra_rnh_ipv6_default_route = 0; - zebra_evaluate_rnh(zvrf, AFI_IP6, 1, RNH_NEXTHOP_TYPE, NULL); + zebra_evaluate_rnh(zvrf, AFI_IP6, 0, RNH_NEXTHOP_TYPE, NULL); return CMD_SUCCESS; } diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index 09cb1cffc1..697a6eecf1 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -116,7 +116,7 @@ static void zebra_vxlan_sg_deref(struct in_addr local_vtep_ip, struct in_addr mcast_grp); static void zebra_vxlan_sg_ref(struct in_addr local_vtep_ip, struct in_addr mcast_grp); -static void zebra_vxlan_sg_cleanup(struct hash_bucket *bucket, void *arg); +static void zebra_vxlan_cleanup_sg_table(struct zebra_vrf *zvrf); bool zebra_evpn_do_dup_addr_detect(struct zebra_vrf *zvrf) { @@ -1992,7 +1992,10 @@ static void zevpn_add_to_l3vni_list(struct hash_bucket *bucket, void *ctxt) } /* - * handle transition of vni from l2 to l3 and vice versa + * Handle transition of vni from l2 to l3 and vice versa. + * This function handles only the L2VNI add/delete part of + * the above transition. + * L3VNI add/delete is handled by the calling functions. */ static int zebra_vxlan_handle_vni_transition(struct zebra_vrf *zvrf, vni_t vni, int add) @@ -2033,11 +2036,71 @@ static int zebra_vxlan_handle_vni_transition(struct zebra_vrf *zvrf, vni_t vni, return -1; } } else { - /* TODO_MITESH: This needs to be thought through. We don't have - * enough information at this point to reprogram the vni as - * l2-vni. One way is to store the required info in l3-vni and - * used it solely for this purpose - */ + struct zebra_ns *zns; + struct route_node *rn; + struct interface *ifp; + struct zebra_if *zif; + struct zebra_l2info_vxlan *vxl; + struct interface *vlan_if; + bool found = false; + + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_debug("Adding L2-VNI %u - transition from L3-VNI", + vni); + + /* Find VxLAN interface for this VNI. */ + zns = zebra_ns_lookup(NS_DEFAULT); + for (rn = route_top(zns->if_table); rn; rn = route_next(rn)) { + ifp = (struct interface *)rn->info; + if (!ifp) + continue; + zif = ifp->info; + if (!zif || zif->zif_type != ZEBRA_IF_VXLAN) + continue; + + vxl = &zif->l2info.vxl; + if (vxl->vni == vni) { + found = true; + break; + } + } + + if (!found) { + if (IS_ZEBRA_DEBUG_VXLAN) + zlog_err( + "Adding L2-VNI - Failed to find VxLAN interface for VNI %u", + vni); + return -1; + } + + /* Create VNI hash entry for L2VNI */ + zevpn = zebra_evpn_lookup(vni); + if (zevpn) + return 0; + + zevpn = zebra_evpn_add(vni); + if (!zevpn) { + flog_err(EC_ZEBRA_VNI_ADD_FAILED, + "Adding L2-VNI - Failed to add VNI hash, VNI %u", + vni); + + return -1; + } + + /* Find bridge interface for the VNI */ + vlan_if = zvni_map_to_svi(vxl->access_vlan, + zif->brslave_info.br_if); + if (vlan_if) + zevpn->vrf_id = vlan_if->vrf_id; + + zevpn->vxlan_if = ifp; + zevpn->local_vtep_ip = vxl->vtep_ip; + + /* Inform BGP if the VNI is up and mapped to a bridge. */ + if (if_is_operative(ifp) && zif->brslave_info.br_if) { + zebra_evpn_send_add_to_client(zevpn); + zebra_evpn_read_mac_neigh(zevpn, ifp); + } } return 0; @@ -3678,13 +3741,13 @@ int zebra_vxlan_handle_kernel_neigh_update(struct interface *ifp, if (IS_ZEBRA_DEBUG_VXLAN || IS_ZEBRA_DEBUG_EVPN_MH_NEIGH) zlog_debug( - "Add/Update neighbor %s MAC %s intf %s(%u) state 0x%x %s%s%s-> L2-VNI %u", + "Add/Update neighbor %s MAC %s intf %s(%u) state 0x%x %s%s%s%s-> L2-VNI %u", ipaddr2str(ip, buf2, sizeof(buf2)), prefix_mac2str(macaddr, buf, sizeof(buf)), ifp->name, ifp->ifindex, state, is_ext ? "ext-learned " : "", is_router ? "router " : "", local_inactive ? "local_inactive " : "", - zevpn->vni); + dp_static ? "peer_sync " : "", zevpn->vni); /* Is this about a local neighbor or a remote one? */ if (!is_ext) @@ -5201,6 +5264,7 @@ int zebra_vxlan_process_vrf_vni_cmd(struct zebra_vrf *zvrf, vni_t vni, if (add) { + /* Remove L2VNI if present */ zebra_vxlan_handle_vni_transition(zvrf, vni, add); /* check if the vni is already present under zvrf */ @@ -5295,6 +5359,7 @@ int zebra_vxlan_process_vrf_vni_cmd(struct zebra_vrf *zvrf, vni_t vni, zvrf->l3vni = 0; zl3vni_del(zl3vni); + /* Add L2VNI for this VNI */ zebra_vxlan_handle_vni_transition(zvrf, vni, add); } return 0; @@ -5784,7 +5849,7 @@ void zebra_vxlan_cleanup_tables(struct zebra_vrf *zvrf) if (!zvrf) return; hash_iterate(zvrf->evpn_table, zebra_evpn_vxlan_cleanup_all, zvrf); - hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_cleanup, NULL); + zebra_vxlan_cleanup_sg_table(zvrf); if (zvrf == evpn_zvrf) zebra_evpn_es_cleanup(); @@ -5797,6 +5862,11 @@ void zebra_vxlan_close_tables(struct zebra_vrf *zvrf) return; hash_iterate(zvrf->evpn_table, zebra_evpn_vxlan_cleanup_all, zvrf); hash_free(zvrf->evpn_table); + if (zvrf->vxlan_sg_table) { + zebra_vxlan_cleanup_sg_table(zvrf); + hash_free(zvrf->vxlan_sg_table); + zvrf->vxlan_sg_table = NULL; + } } /* init the l3vni table */ @@ -6045,6 +6115,30 @@ static void zebra_vxlan_sg_ref(struct in_addr local_vtep_ip, zebra_vxlan_sg_do_ref(zvrf, local_vtep_ip, mcast_grp); } +static void zebra_vxlan_xg_pre_cleanup(struct hash_bucket *backet, void *arg) +{ + zebra_vxlan_sg_t *vxlan_sg = (zebra_vxlan_sg_t *)backet->data; + + /* increment the ref count against (*,G) to prevent them from being + * deleted + */ + if (vxlan_sg->sg.src.s_addr == INADDR_ANY) + ++vxlan_sg->ref_cnt; +} + +static void zebra_vxlan_xg_post_cleanup(struct hash_bucket *backet, void *arg) +{ + zebra_vxlan_sg_t *vxlan_sg = (zebra_vxlan_sg_t *)backet->data; + + /* decrement the dummy ref count against (*,G) to delete them */ + if (vxlan_sg->sg.src.s_addr == INADDR_ANY) { + if (vxlan_sg->ref_cnt) + --vxlan_sg->ref_cnt; + if (!vxlan_sg->ref_cnt) + zebra_vxlan_sg_del(vxlan_sg); + } +} + static void zebra_vxlan_sg_cleanup(struct hash_bucket *backet, void *arg) { zebra_vxlan_sg_t *vxlan_sg = (zebra_vxlan_sg_t *)backet->data; @@ -6052,6 +6146,19 @@ static void zebra_vxlan_sg_cleanup(struct hash_bucket *backet, void *arg) zebra_vxlan_sg_del(vxlan_sg); } +static void zebra_vxlan_cleanup_sg_table(struct zebra_vrf *zvrf) +{ + /* increment the ref count against (*,G) to prevent them from being + * deleted + */ + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_xg_pre_cleanup, NULL); + + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_sg_cleanup, NULL); + + /* decrement the dummy ref count against the XG entries */ + hash_iterate(zvrf->vxlan_sg_table, zebra_vxlan_xg_post_cleanup, NULL); +} + static void zebra_vxlan_sg_replay_send(struct hash_bucket *backet, void *arg) { zebra_vxlan_sg_t *vxlan_sg = (zebra_vxlan_sg_t *)backet->data; diff --git a/zebra/zserv.c b/zebra/zserv.c index 4b5791530d..484d94fac8 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -1033,6 +1033,9 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client) } else vty_out(vty, "Not registered for Nexthop Updates\n"); + vty_out(vty, "Client will %sbe notified about it's routes status\n", + client->notify_owner ? "" : "Not "); + last_read_time = (time_t)atomic_load_explicit(&client->last_read_time, memory_order_relaxed); last_write_time = (time_t)atomic_load_explicit(&client->last_write_time, @@ -1300,6 +1303,21 @@ DEFUN (show_zebra_client_summary, return CMD_SUCCESS; } +static int zserv_client_close_cb(struct zserv *closed_client) +{ + struct listnode *node, *nnode; + struct zserv *client = NULL; + + for (ALL_LIST_ELEMENTS(zrouter.client_list, node, nnode, client)) { + if (client->proto == closed_client->proto) + continue; + + zsend_client_close_notify(client, closed_client); + } + + return 0; +} + void zserv_init(void) { /* Client list init. */ @@ -1312,4 +1330,6 @@ void zserv_init(void) install_element(ENABLE_NODE, &show_zebra_client_cmd); install_element(ENABLE_NODE, &show_zebra_client_summary_cmd); + + hook_register(zserv_client_close, zserv_client_close_cb); } |
