]> git.puffer.fish Git - mirror/frr.git/commitdiff
zebra: Bring up 514 BGP neighbor sessions
authorSoumya Roy <souroy@souroy-mlt.client.nvidia.com>
Sat, 15 Feb 2025 02:13:37 +0000 (18:13 -0800)
committerSoumya Roy <souroy@nvidia.com>
Wed, 5 Mar 2025 06:15:56 +0000 (06:15 +0000)
Issue:
When 514 inerfaces/neighbors are configured, it creates socket error,
"Cannot allocate memory", when back to back V6 RA messages are tried
to be sent over the socket. This prevents interface, to know its peer's
link local address. Socket error comes when 1) try to join ICMPv6 all
router multicast group, back to back for all interfaces 2)send back to
back RA for all interfaces

Fix:
1)For ICMPv6 join case, we check if the interface has already joined
all router group, if not try to join. On failure, retry joining after
random amount of time determined 1 ms to ICMPV6_JOIN_TIMER_EXP_MS(100 ms)
2) For RA issue case, batch sending of RA mesages using wheel timer

Testing:
Monitor BGP session running sh bgp summary command

Before fix:
r1# sh bgp summary

IPv4 Unicast Summary:
BGP router identifier 192.168.1.1, local AS number 1001 VRF default vrf-id 0
BGP table version 0
RIB entries 0, using 0 bytes of memory
Peers 515, using 12 MiB of memory

Neighbor        V         AS   MsgRcvd   MsgSent   TblVer  InQ OutQ  Up/Down State/PfxRcd   PfxSnt Desc
r1-eth0         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth1         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth2         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth3         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth4         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth5         4       1002        89        90        0    0    0 00:07:10            0        0 N/A

…..<snip>...
r1-eth252       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth253       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth254       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth255       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth256       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth257       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth258       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth259       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth260       4          0         0         0        0    0    0    never         Idle        0 N/A
……..<snip>…..
r1-eth511       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth512       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth513       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth514       4          0         0         0        0    0    0    never         Idle        0 N/A
After fix:
r1# show bgp summary

IPv4 Unicast Summary:
BGP router identifier 192.168.1.1, local AS number 1001 VRF default vrf-id 0
BGP table version 0
RIB entries 0, using 0 bytes of memory
Peers 515, using 12 MiB of memory

Neighbor        V         AS   MsgRcvd   MsgSent   TblVer  InQ OutQ  Up/Down State/PfxRcd   PfxSnt Desc
r1-eth0         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth1         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth2         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth3         4       1002        64        67        0    0    0 00:05:09            0        0 N/A
r1-eth4         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth5         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth6         4       1002        67        70        0    0    0 00:05:22            0        0 N/A
r1-eth7         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth8         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
....
r1-eth499       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth500       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth501       4       1002        19        22        0    0    0 00:01:21            0        0 N/A
r1-eth502       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth503       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth504       4       1002        20        23        0    0    0 00:01:30            0        0 N/A
r1-eth505       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth506       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth507       4       1002        22        25        0    0    0 00:01:39            0        0 N/A
r1-eth508       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth509       4       1002        17        20        0    0    0 00:01:13            0        0 N/A
r1-eth510       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth511       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth512       4       1002        19        22        0    0    0 00:01:22            0        0 N/A
r1-eth513       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth514       4       1002        43        43        0    0    0 00:03:22            0        0 N/A

Signed-off-by: Soumya Roy <souroy@nvidia.com>
tests/topotests/high_ecmp/test_high_ecmp.py
zebra/interface.h
zebra/rtadv.c
zebra/rtadv.h
zebra/zebra_router.c
zebra/zebra_router.h

index d28a1ee0690e74c9ef20df67bc69a3ed43970169..daaf5d4d277bf7bb48762d125ccfaabb3b0cb4ce 100644 (file)
@@ -43,7 +43,6 @@ from lib.topolog import logger
 
 
 def build_topo(tgen):
-
     tgen.add_router("r1")
     tgen.add_router("r2")
 
index 2c7a079bf474cf05060be84ed87a8526d6eccd25..995dffdc3297b91ea0b63cfa0f3e20496c5c6b1e 100644 (file)
@@ -99,6 +99,9 @@ struct zebra_if {
        /* back pointer to the interface */
        struct interface *ifp;
 
+       /* Event timer to batch  ICMPv6 join requests */
+       struct event *icmpv6_join_timer;
+
        enum zebra_if_flags flags;
 
        /* Shutdown configuration. */
index 8f6713517d35472418cdb6fb7dc749f22d50f2e1..014021dba6214ba61ffd7b150fc771a9dfe98935 100644 (file)
@@ -21,6 +21,8 @@
 #include "vrf.h"
 #include "ns.h"
 #include "lib_errors.h"
+#include "wheel.h"
+#include "network.h"
 
 #include "zebra/interface.h"
 #include "zebra/rtadv.h"
@@ -36,6 +38,19 @@ extern struct zebra_privs_t zserv_privs;
 static uint32_t interfaces_configured_for_ra_from_bgp;
 #define RTADV_ADATA_SIZE 1024
 
+#define PROC_IGMP6 "/proc/net/igmp6"
+
+/* 32 hex chars 
+ * say for 2001:db8:85a3::8a2e:370:7334
+ * hex string is 20010db885a3000000008a2e03707334, 
+ * which is 32 chars long
+*/
+#define MAX_V6ADDR_LEN 32
+
+#define MAX_INTERFACE_NAME_LEN 25
+
+#define MAX_CHARS_PER_LINE 1024
+
 #if defined(HAVE_RTADV)
 
 #include "zebra/rtadv_clippy.c"
@@ -58,6 +73,12 @@ DEFINE_MTYPE_STATIC(ZEBRA, ADV_IF, "Advertised Interface");
 #define ALLNODE   "ff02::1"
 #define ALLROUTER "ff02::2"
 
+static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in);
+
+#ifdef __linux__
+static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr);
+#endif
+
 /* adv list node */
 struct adv_if {
        char name[IFNAMSIZ];
@@ -462,6 +483,60 @@ no_more_opts:
                zif->ra_sent++;
 }
 
+static void start_icmpv6_join_timer(struct event *thread)
+{
+       struct interface *ifp = EVENT_ARG(thread);
+       struct zebra_if *zif = ifp->info;
+       struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp);
+
+       if (if_join_all_router(zvrf->rtadv.sock, ifp)) {
+               /*Wait random amount of time between 1 ms to ICMPV6_JOIN_TIMER_EXP_MS ms*/
+               int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1;
+               event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, ifp, random_ms,
+                                    &zif->icmpv6_join_timer);
+       }
+
+       if (IS_ZEBRA_DEBUG_EVENT)
+               zlog_debug("Processing ICMPv6 join on interface %s(%s:%u)", ifp->name,
+                          ifp->vrf->name, ifp->ifindex);
+}
+
+void process_rtadv(void *arg)
+{
+       struct interface *ifp = arg;
+       struct zebra_if *zif = ifp->info;
+       struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp);
+
+       if (zif->rtadv.inFastRexmit && zif->rtadv.UseFastRexmit) {
+               if (--zif->rtadv.NumFastReXmitsRemain <= 0)
+                       zif->rtadv.inFastRexmit = 0;
+
+               if (IS_ZEBRA_DEBUG_SEND)
+                       zlog_debug("Doing fast RA Rexmit on interface %s(%s:%u)", ifp->name,
+                                  ifp->vrf->name, ifp->ifindex);
+
+               rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE);
+       } else {
+               zif->rtadv.AdvIntervalTimer -= RTADV_TIMER_WHEEL_PERIOD_MS;
+               /* Wait atleast AdvIntervalTimer time before sending next RA
+                * AdvIntervalTimer can go negative, when ra_wheel timer expiry
+                * interval is not a multiple of AdvIntervalTimer. Say ra_wheel
+                * expiry time is 10 ms and, AdvIntervalTimer == 1005 ms. Allowing 
+                * AdvIntervalTimer to go negative and checking, gurantees that
+                * we have waited Wait atleast AdvIntervalTimer, so RA can be 
+                * sent now.
+               */
+               if (zif->rtadv.AdvIntervalTimer <= 0) {
+                       zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval;
+                       if (IS_ZEBRA_DEBUG_SEND)
+                               zlog_debug("Doing regular RA Rexmit on interface %s(%s:%u)",
+                                          ifp->name, ifp->vrf->name, ifp->ifindex);
+
+                       rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE);
+               }
+       }
+}
+
 static void rtadv_timer(struct event *thread)
 {
        struct zebra_vrf *zvrf = EVENT_ARG(thread);
@@ -1261,7 +1336,13 @@ static void rtadv_start_interface_events(struct zebra_vrf *zvrf,
        if (adv_if != NULL)
                return; /* Already added */
 
-       if_join_all_router(zvrf->rtadv.sock, zif->ifp);
+       if (if_join_all_router(zvrf->rtadv.sock, zif->ifp)) {
+               /*Failed to join on 1st attempt, wait random amount of time between 1 ms 
+                to ICMPV6_JOIN_TIMER_EXP_MS ms*/
+               int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1;
+               event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, zif->ifp, random_ms,
+                                    &zif->icmpv6_join_timer);
+       }
 
        if (adv_if_list_count(&zvrf->rtadv.adv_if) == 1)
                rtadv_event(zvrf, RTADV_START, 0);
@@ -1281,6 +1362,8 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp,
        if (status == RA_SUPPRESS) {
                /* RA is currently enabled */
                if (zif->rtadv.AdvSendAdvertisements) {
+                       /* Try to delete from the ra wheel */
+                       wheel_remove_item(zrouter.ra_wheel, ifp);
                        rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS);
                        zif->rtadv.AdvSendAdvertisements = 0;
                        zif->rtadv.AdvIntervalTimer = 0;
@@ -1311,6 +1394,7 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp,
                                        RTADV_NUM_FAST_REXMITS;
                        }
 
+                       wheel_add_item(zrouter.ra_wheel, ifp);
                        rtadv_start_interface_events(zvrf, zif);
                }
        }
@@ -1438,6 +1522,12 @@ void rtadv_stop_ra(struct interface *ifp)
        zif = ifp->info;
        zvrf = rtadv_interface_get_zvrf(ifp);
 
+       /*Try to delete from ra wheels */
+       wheel_remove_item(zrouter.ra_wheel, ifp);
+
+       /*Turn off event for ICMPv6 join*/
+       EVENT_OFF(zif->icmpv6_join_timer);
+
        if (zif->rtadv.AdvSendAdvertisements)
                rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS);
 }
@@ -1730,8 +1820,7 @@ static void rtadv_event(struct zebra_vrf *zvrf, enum rtadv_event event, int val)
        case RTADV_START:
                event_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock,
                               &rtadv->ra_read);
-               event_add_event(zrouter.master, rtadv_timer, zvrf, 0,
-                               &rtadv->ra_timer);
+
                break;
        case RTADV_STOP:
                EVENT_OFF(rtadv->ra_timer);
@@ -1862,24 +1951,114 @@ void rtadv_cmd_init(void)
        install_element(VIEW_NODE, &show_ipv6_nd_ra_if_cmd);
 }
 
+#ifdef __linux__
+static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr)
+{
+       size_t str_len = strlen(hex_str);
+
+       if (str_len != MAX_V6ADDR_LEN) {
+               flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid V6 addr hex len %zu", str_len);
+               return false;
+       }
+
+       for (int i = 0; i < 16; i++) {
+               char byte_str[3] = { hex_str[i * 2], hex_str[i * 2 + 1], '\0' };
+               addr->s6_addr[i] = (uint8_t)strtol(byte_str, NULL, 16);
+       }
+
+       return true;
+}
+#endif
+
+/* Checks if an interface is part of a multicast group, no null check for input strings */
+static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in)
+{
+#ifdef __linux__
+       char line[MAX_CHARS_PER_LINE];
+       char ifname_found[MAX_INTERFACE_NAME_LEN];
+       char mcast_addr_found_hex_str[MAX_V6ADDR_LEN + 5];
+       struct in6_addr mcast_addr_in_bin;
+       struct in6_addr mcast_addr_found_bin;
+       int if_index = -1;
+       int ifname_in_len = 0;
+       int ifname_found_len = 0;
+
+       FILE *fp = fopen(PROC_IGMP6, "r");
+
+       if (!fp) {
+               flog_err_sys(EC_LIB_SYSTEM_CALL, "Failed to open %s", PROC_IGMP6);
+               return false;
+       }
+
+       /* Convert input IPv6 address to binary */
+       if (inet_pton(AF_INET6, mcast_addr_in, &mcast_addr_in_bin) != 1) {
+               flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid IPv6 address format %s", mcast_addr_in);
+               fclose(fp);
+               return false;
+       }
+
+       /* Convert binary to hex format */
+       while (fgets(line, sizeof(line), fp)) {
+               sscanf(line, "%d %s %s", &if_index, ifname_found, mcast_addr_found_hex_str);
+
+               ifname_in_len = strlen(ifname_in);
+               ifname_found_len = strlen(ifname_found);
+               if (ifname_in_len != ifname_found_len)
+                       continue;
+
+               /* Locate 'x' if "0x" is present or not, if present go past that */
+               const char *clean_mcast_addr_hex_str = strchr(mcast_addr_found_hex_str, 'x');
+               if (clean_mcast_addr_hex_str) {
+                       clean_mcast_addr_hex_str++;
+               } else {
+                       clean_mcast_addr_hex_str = mcast_addr_found_hex_str;
+               }
+
+               if (!v6_addr_hex_str_to_in6_addr(clean_mcast_addr_hex_str, &mcast_addr_found_bin))
+                       continue;
+
+               if ((!strncmp(ifname_in, ifname_found, ifname_in_len)) &&
+                   (!IPV6_ADDR_CMP(&mcast_addr_in_bin, &mcast_addr_found_bin))) {
+                       fclose(fp);
+                       /* Already joined */
+                       return true;
+               }
+       }
+
+       fclose(fp);
+
+#endif
+
+       /* Not joined */
+       return false;
+}
+
 static int if_join_all_router(int sock, struct interface *ifp)
 {
        int ret;
 
        struct ipv6_mreq mreq;
 
+       if (is_interface_in_group(ifp->name, ALLROUTER))
+               /* Interface is already part of the group, so return sucess */
+               return 0;
+
        memset(&mreq, 0, sizeof(mreq));
        inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr);
        mreq.ipv6mr_interface = ifp->ifindex;
 
        ret = setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, (char *)&mreq,
                         sizeof(mreq));
-       if (ret < 0)
+
+       if (ret < 0) {
                flog_err_sys(EC_LIB_SOCKET,
                             "%s(%u): Failed to join group, socket %u error %s",
                             ifp->name, ifp->ifindex, sock,
                             safe_strerror(errno));
 
+               return ret;
+       }
+
        if (IS_ZEBRA_DEBUG_EVENT)
                zlog_debug(
                        "%s(%s:%u): Join All-Routers multicast group, socket %u",
index 0983ea578fe160b4c30da947bd254faeb3763655..73d737ce418def77c53ff65997ef6ac063dc0cc7 100644 (file)
@@ -460,6 +460,7 @@ extern void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS);
 extern uint32_t rtadv_get_interfaces_configured_from_bgp(void);
 extern bool rtadv_compiled_in(void);
 extern void rtadv_init(void);
+extern void process_rtadv(void *arg);
 
 #ifdef __cplusplus
 }
index ae2910af410a863d6f5a9162c40ec616166256d7..15b7e317c9e9714d8b72871e246386089a5aa32c 100644 (file)
@@ -17,6 +17,7 @@
 #include "zebra/zebra_tc.h"
 #include "debug.h"
 #include "zebra_script.h"
+#include "wheel.h"
 
 DEFINE_MTYPE_STATIC(ZEBRA, RIB_TABLE_INFO, "RIB table info");
 DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_RT_TABLE, "Zebra VRF table");
@@ -220,10 +221,22 @@ uint32_t zebra_router_get_next_sequence(void)
                                           memory_order_relaxed);
 }
 
+static inline unsigned int interface_hash_key(const void *arg)
+{
+       const struct interface *ifp = arg;
+
+       return ifp->ifindex;
+}
+
 void zebra_router_terminate(void)
 {
        struct zebra_router_table *zrt, *tmp;
 
+       if (zrouter.ra_wheel) {
+               wheel_delete(zrouter.ra_wheel);
+               zrouter.ra_wheel = NULL;
+       }
+
        EVENT_OFF(zrouter.t_rib_sweep);
 
        RB_FOREACH_SAFE (zrt, zebra_router_table_head, &zrouter.tables, tmp)
@@ -278,6 +291,11 @@ void zebra_router_init(bool asic_offload, bool notify_on_ack,
 
        zrouter.nhg_keep = ZEBRA_DEFAULT_NHG_KEEP_TIMER;
 
+       /*Init V6 RA batching stuffs*/
+       zrouter.ra_wheel = wheel_init(zrouter.master, RTADV_TIMER_WHEEL_PERIOD_MS,
+                                     RTADV_TIMER_WHEEL_SLOTS_NO, interface_hash_key, process_rtadv,
+                                     NULL);
+
        zebra_vxlan_init();
        zebra_mlag_init();
        zebra_neigh_init();
index 28c4cf0790d73892f115cd9986509ac07af7b390..d357994ec26b5e3bf9ef00c2fa27f37aa9c95eec 100644 (file)
@@ -112,12 +112,19 @@ struct zebra_mlag_info {
        struct event *t_write;
 };
 
+#define RTADV_TIMER_WHEEL_PERIOD_MS 1000
+#define RTADV_TIMER_WHEEL_SLOTS_NO  100
+#define ICMPV6_JOIN_TIMER_EXP_MS    100
+
 struct zebra_router {
        atomic_bool in_shutdown;
 
        /* Thread master */
        struct event_loop *master;
 
+       /* Wheel to process V6 RA update */
+       struct timer_wheel *ra_wheel;
+
        /* Lists of clients who have connected to us */
        struct list *client_list;