]> git.puffer.fish Git - matthieu/frr.git/commitdiff
pim: DF election for tunnel termination mroutes in an anycast-VTEP setup
authorAnuradha Karuppiah <anuradhak@cumulusnetworks.com>
Thu, 6 Feb 2020 17:30:36 +0000 (09:30 -0800)
committerAnuradha Karuppiah <anuradhak@cumulusnetworks.com>
Fri, 14 Feb 2020 17:18:30 +0000 (09:18 -0800)
1. Upstream entries associated with tunnel termination mroutes are
synced to the MLAG peer via the local MLAG daemon.
2. These entries are installed in the peer switch (via an upstream
ref flag).
3. DF (Designated Forwarder) election is run per-upstream entry by both
the MLAG switches -
a. The switch with the lowest RPF cost is the DF winner
b. If both switches have the same RPF cost the MLAG role is
used as a tie breaker with the MLAG primary becoming the DF
winner.
4. The DF winner terminates the multicast traffic by adding the tunnel
termination device to the OIL. The non-DF suppresses the termination
device from the OIL.

Note: Before the PIM-MLAG interface was available hidden config was
used to test the EVPN-PIM functionality with MLAG. I have removed the
code to persist that config to avoid confusion. The hidden commands are
still available.

Signed-off-by: Anuradha Karuppiah <anuradhak@cumulusnetworks.com>
pimd/pim_cmd.c
pimd/pim_instance.h
pimd/pim_mlag.c
pimd/pim_mlag.h
pimd/pim_rpf.c
pimd/pim_upstream.c
pimd/pim_upstream.h
pimd/pim_vty.c
pimd/pim_vxlan.c
pimd/pim_vxlan.h
pimd/pim_zebra.c

index 6508fb4453f71f4abea06b5173f3aa70385c14ab..1fa674a6f78dbf428efdb14baadd879ba0f69f69 100644 (file)
@@ -5294,7 +5294,7 @@ static void pim_cmd_show_ip_multicast_helper(struct pim_instance *pim,
        pim = vrf->info;
 
        vty_out(vty, "Router MLAG Role: %s\n",
-               mlag_role2str(router->role, mlag_role, sizeof(mlag_role)));
+               mlag_role2str(router->mlag_role, mlag_role, sizeof(mlag_role)));
        vty_out(vty, "Mroute socket descriptor:");
 
        vty_out(vty, " %d(%s)\n", pim->mroute_socket, vrf->name);
@@ -10259,7 +10259,7 @@ DEFUN_HIDDEN (no_ip_pim_mlag,
 
        addr.s_addr = 0;
        pim_vxlan_mlag_update(true/*mlag_enable*/,
-               false/*peer_state*/, PIM_VXLAN_MLAG_ROLE_SECONDARY,
+               false/*peer_state*/, MLAG_ROLE_NONE,
                NULL/*peerlink*/, &addr);
 
        return CMD_SUCCESS;
@@ -10299,9 +10299,9 @@ DEFUN_HIDDEN (ip_pim_mlag,
 
        idx += 2;
        if (!strcmp(argv[idx]->arg, "primary")) {
-               role = PIM_VXLAN_MLAG_ROLE_PRIMARY;
+               role = MLAG_ROLE_PRIMARY;
        } else if (!strcmp(argv[idx]->arg, "secondary")) {
-               role = PIM_VXLAN_MLAG_ROLE_SECONDARY;
+               role = MLAG_ROLE_SECONDARY;
        } else {
                vty_out(vty, "unknown MLAG role %s\n", argv[idx]->arg);
                return CMD_WARNING;
index da0c75decb6c8b07b602207b7a42c5b782a6e76b..7b1fd2e172fb64faf204ea7a293435dadd7e92b8 100644 (file)
@@ -48,6 +48,46 @@ enum pim_spt_switchover {
        PIM_SPT_INFINITY,
 };
 
+/* stats for updates rxed from the MLAG component during the life of a
+ * session
+ */
+struct pim_mlag_msg_stats {
+       uint32_t mroute_add_rx;
+       uint32_t mroute_add_tx;
+       uint32_t mroute_del_rx;
+       uint32_t mroute_del_tx;
+       uint32_t mlag_status_updates;
+       uint32_t pim_status_updates;
+       uint32_t vxlan_updates;
+       uint32_t peer_zebra_status_updates;
+};
+
+struct pim_mlag_stats {
+       /* message stats are reset when the connection to mlagd flaps */
+       struct pim_mlag_msg_stats msg;
+       uint32_t mlagd_session_downs;
+       uint32_t peer_session_downs;
+       uint32_t peer_zebra_downs;
+};
+
+enum pim_mlag_flags {
+       PIM_MLAGF_NONE = 0,
+       /* connection to the local MLAG daemon is up */
+       PIM_MLAGF_LOCAL_CONN_UP = (1 << 0),
+       /* connection to the MLAG daemon on the peer switch is up. note
+        * that there is no direct connection between FRR and the peer MLAG
+        * daemon. this is just a peer-session status provided by the local
+        * MLAG daemon.
+        */
+       PIM_MLAGF_PEER_CONN_UP = (1 << 1),
+       /* status update rxed from the local daemon */
+       PIM_MLAGF_STATUS_RXED = (1 << 2),
+       /* initial dump of data done post peerlink flap */
+       PIM_MLAGF_PEER_REPLAY_DONE = (1 << 3),
+       /* zebra is up on the peer */
+       PIM_MLAGF_PEER_ZEBRA_UP = (1 << 4)
+};
+
 struct pim_router {
        struct thread_master *master;
 
@@ -65,7 +105,7 @@ struct pim_router {
         */
        vrf_id_t vrf_id;
 
-       enum mlag_role role;
+       enum mlag_role mlag_role;
        uint32_t pim_mlag_intf_cnt;
        /* if true we have registered with MLAG */
        bool mlag_process_register;
@@ -77,6 +117,12 @@ struct pim_router {
        struct stream_fifo *mlag_fifo;
        struct stream *mlag_stream;
        struct thread *zpthread_mlag_write;
+       struct in_addr anycast_vtep_ip;
+       struct in_addr local_vtep_ip;
+       struct pim_mlag_stats mlag_stats;
+       enum pim_mlag_flags mlag_flags;
+       char peerlink_rif[INTERFACE_NAMSIZ];
+       struct interface *peerlink_rif_p;
 };
 
 /* Per VRF PIM DB */
index f60c18204b6d5179c1886445eb6c6d5869634b3d..1c2f7c563d1ccd3b81c328c4a5c15dd941eba5f6 100644 (file)
 
 #include "pimd.h"
 #include "pim_mlag.h"
+#include "pim_upstream.h"
+#include "pim_vxlan.h"
 
 extern struct zclient *zclient;
 
+#define PIM_MLAG_METADATA_LEN 4
+
+/******************************* pim upstream sync **************************/
+/* Update DF role for the upstream entry and return true on role change */
+bool pim_mlag_up_df_role_update(struct pim_instance *pim,
+               struct pim_upstream *up, bool is_df, const char *reason)
+{
+       struct channel_oil *c_oil = up->channel_oil;
+       bool old_is_df = !PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags);
+       struct pim_interface *vxlan_ifp;
+
+       if (is_df == old_is_df) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug(
+                               "%s: Ignoring Role update for %s, since no change",
+                               __func__, up->sg_str);
+               return false;
+       }
+
+       if (PIM_DEBUG_MLAG)
+               zlog_debug("local MLAG mroute %s role changed to %s based on %s",
+                               up->sg_str, is_df ? "df" : "non-df", reason);
+
+       if (is_df)
+               PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(up->flags);
+       else
+               PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags);
+
+
+       /* If the DF role has changed check if ipmr-lo needs to be
+        * muted/un-muted. Active-Active devices and vxlan termination
+        * devices (ipmr-lo) are suppressed on the non-DF.
+        * This may leave the mroute with the empty OIL in which case the
+        * the forwarding entry's sole purpose is to just blackhole the flow
+        * headed to the switch.
+        */
+       if (c_oil) {
+               vxlan_ifp = pim_vxlan_get_term_ifp(pim);
+               if (vxlan_ifp)
+                       pim_channel_update_oif_mute(c_oil, vxlan_ifp);
+       }
+
+       /* If DF role changed on a (*,G) termination mroute update the
+        * associated DF role on the inherited (S,G) entries
+        */
+       if ((up->sg.src.s_addr == INADDR_ANY) &&
+                       PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags))
+               pim_vxlan_inherit_mlag_flags(pim, up, true /* inherit */);
+
+       return true;
+}
+
+/* Run per-upstream entry DF election and return true on role change */
+static bool pim_mlag_up_df_role_elect(struct pim_instance *pim,
+               struct pim_upstream *up)
+{
+       bool is_df;
+       uint32_t peer_cost;
+       uint32_t local_cost;
+       bool rv;
+
+       if (!pim_up_mlag_is_local(up))
+               return false;
+
+       /* We are yet to rx a status update from the local MLAG daemon so
+        * we will assume DF status.
+        */
+       if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED))
+               return pim_mlag_up_df_role_update(pim, up,
+                               true /*is_df*/, "mlagd-down");
+
+       /* If not connected to peer assume DF role on the MLAG primary
+        * switch (and non-DF on the secondary switch.
+        */
+       if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
+               is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
+               return pim_mlag_up_df_role_update(pim, up,
+                               is_df, "peer-down");
+       }
+
+       /* If MLAG peer session is up but zebra is down on the peer
+        * assume DF role.
+        */
+       if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP))
+               return pim_mlag_up_df_role_update(pim, up,
+                               true /*is_df*/, "zebra-down");
+
+       /* If we are connected to peer switch but don't have a mroute
+        * from it we have to assume non-DF role to avoid duplicates.
+        * Note: When the peer connection comes up we wait for initial
+        * replay to complete before moving "strays" i.e. local-mlag-mroutes
+        * without a peer reference to non-df role.
+        */
+       if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
+               return pim_mlag_up_df_role_update(pim, up,
+                               false /*is_df*/, "no-peer-mroute");
+
+       /* switch with the lowest RPF cost wins. if both switches have the same
+        * cost MLAG role is used as a tie breaker (MLAG primary wins).
+        */
+       peer_cost = up->mlag.peer_mrib_metric;
+       local_cost = pim_up_mlag_local_cost(up);
+       if (local_cost == peer_cost) {
+               is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
+               rv = pim_mlag_up_df_role_update(pim, up, is_df, "equal-cost");
+       } else {
+               is_df = (local_cost < peer_cost) ? true : false;
+               rv = pim_mlag_up_df_role_update(pim, up, is_df, "cost");
+       }
+
+       return rv;
+}
+
+/* Handle upstream entry add from the peer MLAG switch -
+ * - if a local entry doesn't exist one is created with reference
+ *   _MLAG_PEER
+ * - if a local entry exists and has a MLAG OIF DF election is run.
+ *   the non-DF switch stop forwarding traffic to MLAG devices.
+ */
+static void pim_mlag_up_peer_add(struct mlag_mroute_add *msg)
+{
+       struct pim_upstream *up;
+       struct pim_instance *pim;
+       int flags = 0;
+       struct prefix_sg sg;
+       struct vrf *vrf;
+       char sg_str[PIM_SG_LEN];
+
+       memset(&sg, 0, sizeof(struct prefix_sg));
+       sg.src.s_addr = htonl(msg->source_ip);
+       sg.grp.s_addr = htonl(msg->group_ip);
+       if (PIM_DEBUG_MLAG)
+               pim_str_sg_set(&sg, sg_str);
+
+       if (PIM_DEBUG_MLAG)
+               zlog_debug("peer MLAG mroute add %s:%s cost %d",
+                       msg->vrf_name, sg_str, msg->cost_to_rp);
+
+       /* XXX - this is not correct. we MUST cache updates to avoid losing
+        * an entry because of race conditions with the peer switch.
+        */
+       vrf = vrf_lookup_by_name(msg->vrf_name);
+       if  (!vrf) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("peer MLAG mroute add failed %s:%s; no vrf",
+                                       msg->vrf_name, sg_str);
+               return;
+       }
+       pim = vrf->info;
+
+       up = pim_upstream_find(pim, &sg);
+       if (up) {
+               /* upstream already exists; create peer reference if it
+                * doesn't already exist.
+                */
+               if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
+                       pim_upstream_ref(up,
+                                       PIM_UPSTREAM_FLAG_MASK_MLAG_PEER,
+                                       __PRETTY_FUNCTION__);
+       } else {
+               PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags);
+               up = pim_upstream_add(pim, &sg, NULL /*iif*/, flags,
+                               __PRETTY_FUNCTION__, NULL /*if_ch*/);
+
+               if (!up) {
+                       if (PIM_DEBUG_MLAG)
+                               zlog_debug("peer MLAG mroute add failed %s:%s",
+                                               vrf->name, sg_str);
+                       return;
+               }
+       }
+       up->mlag.peer_mrib_metric = msg->cost_to_rp;
+       pim_mlag_up_df_role_elect(pim, up);
+}
+
+/* Handle upstream entry del from the peer MLAG switch -
+ * - peer reference is removed. this can result in the upstream
+ *   being deleted altogether.
+ * - if a local entry continues to exisy and has a MLAG OIF DF election
+ *   is re-run (at the end of which the local entry will be the DF).
+ */
+static void pim_mlag_up_peer_deref(struct pim_instance *pim,
+               struct pim_upstream *up)
+{
+       if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
+               return;
+
+       PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(up->flags);
+       up = pim_upstream_del(pim, up, __PRETTY_FUNCTION__);
+       if (up)
+               pim_mlag_up_df_role_elect(pim, up);
+}
+static void pim_mlag_up_peer_del(struct mlag_mroute_del *msg)
+{
+       struct pim_upstream *up;
+       struct pim_instance *pim;
+       struct prefix_sg sg;
+       struct vrf *vrf;
+       char sg_str[PIM_SG_LEN];
+
+       memset(&sg, 0, sizeof(struct prefix_sg));
+       sg.src.s_addr = htonl(msg->source_ip);
+       sg.grp.s_addr = htonl(msg->group_ip);
+       if (PIM_DEBUG_MLAG)
+               pim_str_sg_set(&sg, sg_str);
+
+       if (PIM_DEBUG_MLAG)
+               zlog_debug("peer MLAG mroute del %s:%s", msg->vrf_name,
+                               sg_str);
+
+       vrf = vrf_lookup_by_name(msg->vrf_name);
+       if  (!vrf) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("peer MLAG mroute del skipped %s:%s; no vrf",
+                                       msg->vrf_name, sg_str);
+               return;
+       }
+       pim = vrf->info;
+
+       up = pim_upstream_find(pim, &sg);
+       if  (!up) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("peer MLAG mroute del skipped %s:%s; no up",
+                                       vrf->name, sg_str);
+               return;
+       }
+
+       pim_mlag_up_peer_deref(pim, up);
+}
+
+/* When we lose connection to the local MLAG daemon we can drop all peer
+ * references.
+ */
+static void pim_mlag_up_peer_del_all(void)
+{
+       struct list *temp = list_new();
+       struct pim_upstream *up;
+       struct vrf *vrf;
+       struct pim_instance *pim;
+
+       /*
+        * So why these gyrations?
+        * pim->upstream_head has the list of *,G and S,G
+        * that are in the system.  The problem of course
+        * is that it is an ordered list:
+        * (*,G1) -> (S1,G1) -> (S2,G2) -> (S3, G2) -> (*,G2) -> (S1,G2)
+        * And the *,G1 has pointers to S1,G1 and S2,G1
+        * if we delete *,G1 then we have a situation where
+        * S1,G1 and S2,G2 can be deleted as well.  Then a
+        * simple ALL_LIST_ELEMENTS will have the next listnode
+        * pointer become invalid and we crash.
+        * So let's grab the list of MLAG_PEER upstreams
+        * add a refcount put on another list and delete safely
+        */
+       RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
+               pim = vrf->info;
+               frr_each (rb_pim_upstream, &pim->upstream_head, up) {
+                       if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
+                               continue;
+                       listnode_add(temp, up);
+                       /*
+                        * Add a reference since we are adding to this
+                        * list for deletion
+                        */
+                       up->ref_count++;
+               }
+
+               while (temp->count) {
+                       up = listnode_head(temp);
+                       listnode_delete(temp, up);
+
+                       pim_mlag_up_peer_deref(pim, up);
+                       /*
+                        * This is the deletion of the reference added
+                        * above
+                        */
+                       pim_upstream_del(pim, up, __PRETTY_FUNCTION__);
+               }
+       }
+
+       list_delete(&temp);
+}
+
+static int pim_mlag_signal_zpthread(void)
+{
+       /* XXX - This is a temporary stub; the MLAG thread code is planned for
+        * a separate commit
+        */
+    return (0);
+}
+
+/* Send upstream entry to the local MLAG daemon (which will subsequently
+ * send it to the peer MLAG switch).
+ */
+static void pim_mlag_up_local_add_send(struct pim_instance *pim,
+               struct pim_upstream *up)
+{
+       struct stream *s = NULL;
+       struct vrf *vrf = pim->vrf;
+
+       if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
+               return;
+
+       s = stream_new(sizeof(struct mlag_mroute_add) + PIM_MLAG_METADATA_LEN);
+       if (!s)
+               return;
+
+       if (PIM_DEBUG_MLAG)
+               zlog_debug("local MLAG mroute add %s:%s",
+                               vrf->name, up->sg_str);
+
+       ++router->mlag_stats.msg.mroute_add_tx;
+
+       stream_putl(s, MLAG_MROUTE_ADD);
+       stream_put(s, vrf->name, VRF_NAMSIZ);
+       stream_putl(s, ntohl(up->sg.src.s_addr));
+       stream_putl(s, ntohl(up->sg.grp.s_addr));
+
+       stream_putl(s, pim_up_mlag_local_cost(up));
+       /* XXX - who is addding*/
+       stream_putl(s, MLAG_OWNER_VXLAN);
+       /* XXX - am_i_DR field should be removed */
+       stream_putc(s, false);
+       stream_putc(s, !(PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags)));
+       stream_putl(s, vrf->vrf_id);
+       /* XXX - this field is a No-op for VXLAN*/
+       stream_put(s, NULL, INTERFACE_NAMSIZ);
+
+       stream_fifo_push_safe(router->mlag_fifo, s);
+       pim_mlag_signal_zpthread();
+}
+
+static void pim_mlag_up_local_del_send(struct pim_instance *pim,
+               struct pim_upstream *up)
+{
+       struct stream *s = NULL;
+       struct vrf *vrf = pim->vrf;
+
+       if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
+               return;
+
+       s = stream_new(sizeof(struct mlag_mroute_del) + PIM_MLAG_METADATA_LEN);
+       if (!s)
+               return;
+
+       if (PIM_DEBUG_MLAG)
+               zlog_debug("local MLAG mroute del %s:%s",
+                               vrf->name, up->sg_str);
+
+       ++router->mlag_stats.msg.mroute_del_tx;
+
+       stream_putl(s, MLAG_MROUTE_DEL);
+       stream_put(s, vrf->name, VRF_NAMSIZ);
+       stream_putl(s, ntohl(up->sg.src.s_addr));
+       stream_putl(s, ntohl(up->sg.grp.s_addr));
+       /* XXX - who is adding */
+       stream_putl(s, MLAG_OWNER_VXLAN);
+       stream_putl(s, vrf->vrf_id);
+       /* XXX - this field is a No-op for VXLAN */
+       stream_put(s, NULL, INTERFACE_NAMSIZ);
+
+       /* XXX - is this the the most optimal way to do things */
+       stream_fifo_push_safe(router->mlag_fifo, s);
+       pim_mlag_signal_zpthread();
+}
+
+
+/* Called when a local upstream entry is created or if it's cost changes */
+void pim_mlag_up_local_add(struct pim_instance *pim,
+               struct pim_upstream *up)
+{
+       pim_mlag_up_df_role_elect(pim, up);
+       /* XXX - need to add some dup checks here */
+       pim_mlag_up_local_add_send(pim, up);
+}
+
+/* Called when local MLAG reference is removed from an upstream entry */
+void pim_mlag_up_local_del(struct pim_instance *pim,
+               struct pim_upstream *up)
+{
+       pim_mlag_up_df_role_elect(pim, up);
+       pim_mlag_up_local_del_send(pim, up);
+}
+
+/* When connection to local MLAG daemon is established all the local
+ * MLAG upstream entries are replayed to it.
+ */
+static void pim_mlag_up_local_replay(void)
+{
+       struct pim_upstream *up;
+       struct vrf *vrf;
+       struct pim_instance *pim;
+
+       RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
+               pim = vrf->info;
+               frr_each (rb_pim_upstream, &pim->upstream_head, up) {
+                       if (pim_up_mlag_is_local(up))
+                               pim_mlag_up_local_add_send(pim, up);
+               }
+       }
+}
+
+/* on local/peer mlag connection and role changes the DF status needs
+ * to be re-evaluated
+ */
+static void pim_mlag_up_local_reeval(bool mlagd_send, const char *reason_code)
+{
+       struct pim_upstream *up;
+       struct vrf *vrf;
+       struct pim_instance *pim;
+
+       if (PIM_DEBUG_MLAG)
+               zlog_debug("%s re-run DF election because of %s",
+                               __func__, reason_code);
+       RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
+               pim = vrf->info;
+               frr_each (rb_pim_upstream, &pim->upstream_head, up) {
+                       if (!pim_up_mlag_is_local(up))
+                               continue;
+                       /* if role changes re-send to peer */
+                       if (pim_mlag_up_df_role_elect(pim, up) &&
+                                       mlagd_send)
+                               pim_mlag_up_local_add_send(pim, up);
+               }
+       }
+}
+
+/*****************PIM Actions for MLAG state changes**********************/
+
+/* notify the anycast VTEP component about state changes */
+static inline void pim_mlag_vxlan_state_update(void)
+{
+       bool enable = !!(router->mlag_flags & PIM_MLAGF_STATUS_RXED);
+       bool peer_state = !!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP);
+
+       pim_vxlan_mlag_update(enable, peer_state, router->mlag_role,
+                       router->peerlink_rif_p, &router->local_vtep_ip);
+
+}
+
+/**************End of PIM Actions for MLAG State changes******************/
+
 
 /********************API to process PIM MLAG Data ************************/
 
 static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
 {
+       bool role_chg = false;
+       bool state_chg = false;
+       bool notify_vxlan = false;
+       struct interface *peerlink_rif_p;
        char buf[MLAG_ROLE_STRSIZE];
 
        if (PIM_DEBUG_MLAG)
@@ -41,6 +489,84 @@ static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
                           mlag_role2str(msg.my_role, buf, sizeof(buf)),
                           (msg.peer_state == MLAG_STATE_RUNNING ? "RUNNING"
                                                                 : "DOWN"));
+
+       if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("%s: msg ignored mlagd process state down",
+                                       __func__);
+               return;
+       }
+       ++router->mlag_stats.msg.mlag_status_updates;
+
+       /* evaluate the changes first */
+       if (router->mlag_role != msg.my_role) {
+               role_chg = true;
+               notify_vxlan = true;
+               router->mlag_role = msg.my_role;
+       }
+
+       strcpy(router->peerlink_rif, msg.peerlink_rif);
+       /* XXX - handle the case where we may rx the interface name from the
+        * MLAG daemon before we get the interface from zebra.
+        */
+       peerlink_rif_p = if_lookup_by_name(router->peerlink_rif, VRF_DEFAULT);
+       if (router->peerlink_rif_p != peerlink_rif_p) {
+               router->peerlink_rif_p = peerlink_rif_p;
+               notify_vxlan = true;
+       }
+
+       if (msg.peer_state == MLAG_STATE_RUNNING) {
+               if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
+                       state_chg = true;
+                       notify_vxlan = true;
+                       router->mlag_flags |= PIM_MLAGF_PEER_CONN_UP;
+               }
+               router->connected_to_mlag = true;
+       } else {
+               if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) {
+                       ++router->mlag_stats.peer_session_downs;
+                       state_chg = true;
+                       notify_vxlan = true;
+                       router->mlag_flags &= ~PIM_MLAGF_PEER_CONN_UP;
+               }
+               router->connected_to_mlag = false;
+       }
+
+       /* apply the changes */
+       /* when connection to mlagd comes up we hold send mroutes till we have
+        * rxed the status and had a chance to re-valuate DF state
+        */
+       if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) {
+               router->mlag_flags |= PIM_MLAGF_STATUS_RXED;
+               pim_mlag_vxlan_state_update();
+               /* on session up re-eval DF status */
+               pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_up");
+               /* replay all the upstream entries to the local MLAG daemon */
+               pim_mlag_up_local_replay();
+               return;
+       }
+
+       if (notify_vxlan)
+               pim_mlag_vxlan_state_update();
+
+       if (state_chg) {
+               if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP))
+                       /* when a connection goes down the primary takes over
+                        * DF role for all entries
+                        */
+                       pim_mlag_up_local_reeval(true /*mlagd_send*/,
+                                       "peer_down");
+               else
+                       /* XXX - when session comes up we need to wait for
+                        * PEER_REPLAY_DONE before running re-election on
+                        * local-mlag entries that are missing peer reference
+                        */
+                       pim_mlag_up_local_reeval(true /*mlagd_send*/,
+                                       "peer_up");
+       } else if (role_chg) {
+               /* MLAG role changed without a state change */
+               pim_mlag_up_local_reeval(true /*mlagd_send*/, "role_chg");
+       }
 }
 
 static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
@@ -49,37 +575,116 @@ static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
                zlog_debug(
                        "%s: msg dump: peer_frr_state: %s", __func__,
                        (msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN"));
+
+       if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("%s: msg ignored mlagd process state down",
+                                       __func__);
+               return;
+       }
+       ++router->mlag_stats.msg.peer_zebra_status_updates;
+
+       /* evaluate the changes first */
+       if (msg.frr_state == MLAG_FRR_STATE_UP) {
+               if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) {
+                       router->mlag_flags |= PIM_MLAGF_PEER_ZEBRA_UP;
+                       /* XXX - when peer zebra comes up we need to wait for
+                        * for some time to let the peer setup MDTs before
+                        * before relinquishing DF status
+                        */
+                       pim_mlag_up_local_reeval(true /*mlagd_send*/,
+                                       "zebra_up");
+               }
+       } else {
+               if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) {
+                       ++router->mlag_stats.peer_zebra_downs;
+                       router->mlag_flags &= ~PIM_MLAGF_PEER_ZEBRA_UP;
+                       /* when a peer zebra goes down we assume DF role */
+                       pim_mlag_up_local_reeval(true /*mlagd_send*/,
+                                       "zebra_down");
+               }
+       }
 }
 
 static void pim_mlag_process_vxlan_update(struct mlag_vxlan *msg)
 {
+       char addr_buf1[INET_ADDRSTRLEN];
+       char addr_buf2[INET_ADDRSTRLEN];
+       uint32_t local_ip;
+
+       if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("%s: msg ignored mlagd process state down",
+                                       __func__);
+               return;
+       }
+
+       ++router->mlag_stats.msg.vxlan_updates;
+       router->anycast_vtep_ip.s_addr = htonl(msg->anycast_ip);
+       local_ip = htonl(msg->local_ip);
+       if (router->local_vtep_ip.s_addr != local_ip) {
+               router->local_vtep_ip.s_addr = local_ip;
+               pim_mlag_vxlan_state_update();
+       }
+
+       if (PIM_DEBUG_MLAG) {
+               inet_ntop(AF_INET, &router->local_vtep_ip,
+                               addr_buf1, INET_ADDRSTRLEN);
+               inet_ntop(AF_INET, &router->anycast_vtep_ip,
+                               addr_buf2, INET_ADDRSTRLEN);
+
+               zlog_debug("%s: msg dump: local-ip:%s, anycast-ip:%s",
+                               __func__, addr_buf1, addr_buf2);
+       }
 }
 
 static void pim_mlag_process_mroute_add(struct mlag_mroute_add msg)
 {
        if (PIM_DEBUG_MLAG) {
                zlog_debug(
-                       "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x cost: %u",
-                       __func__, msg.vrf_name, msg.source_ip, msg.group_ip,
-                       msg.cost_to_rp);
+                               "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x cost: %u",
+                               __func__, msg.vrf_name, msg.source_ip,
+                               msg.group_ip, msg.cost_to_rp);
                zlog_debug(
-                       "owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s",
-                       msg.owner_id, msg.am_i_dr, msg.am_i_dual_active,
-                       msg.vrf_id, msg.intf_name);
+                               "owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s",
+                               msg.owner_id, msg.am_i_dr, msg.am_i_dual_active,
+                               msg.vrf_id, msg.intf_name);
+       }
+
+       if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("%s: msg ignored mlagd process state down",
+                                       __func__);
+               return;
        }
+
+       ++router->mlag_stats.msg.mroute_add_rx;
+
+       pim_mlag_up_peer_add(&msg);
 }
 
 static void pim_mlag_process_mroute_del(struct mlag_mroute_del msg)
 {
        if (PIM_DEBUG_MLAG) {
                zlog_debug(
-                       "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x ",
-                       __func__, msg.vrf_name, msg.source_ip, msg.group_ip);
+                               "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x ",
+                               __func__, msg.vrf_name, msg.source_ip,
+                               msg.group_ip);
                zlog_debug("owner_id: %d, vrf_id: 0x%x intf_name: %s",
-                          msg.owner_id, msg.vrf_id, msg.intf_name);
+                               msg.owner_id, msg.vrf_id, msg.intf_name);
        }
-}
 
+       if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
+               if (PIM_DEBUG_MLAG)
+                       zlog_debug("%s: msg ignored mlagd process state down",
+                                       __func__);
+               return;
+       }
+
+       ++router->mlag_stats.msg.mroute_del_rx;
+
+       pim_mlag_up_peer_del(&msg);
+}
 
 int pim_zebra_mlag_handle_msg(struct stream *s, int len)
 {
@@ -179,11 +784,40 @@ int pim_zebra_mlag_process_up(void)
        return 0;
 }
 
+static void pim_mlag_param_reset(void)
+{
+       /* reset the cached params and stats */
+       router->mlag_flags &= ~(PIM_MLAGF_STATUS_RXED |
+                       PIM_MLAGF_LOCAL_CONN_UP |
+                       PIM_MLAGF_PEER_CONN_UP |
+                       PIM_MLAGF_PEER_ZEBRA_UP);
+       router->local_vtep_ip.s_addr = INADDR_ANY;
+       router->anycast_vtep_ip.s_addr = INADDR_ANY;
+       router->mlag_role = MLAG_ROLE_NONE;
+       memset(&router->mlag_stats.msg, 0, sizeof(router->mlag_stats.msg));
+       router->peerlink_rif[0] = '\0';
+}
+
 int pim_zebra_mlag_process_down(void)
 {
        if (PIM_DEBUG_MLAG)
                zlog_debug("%s: Received Process-Down from Mlag", __func__);
 
+       /* Local CLAG is down, reset peer data and forward the traffic if
+        * we are DR
+        */
+       if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)
+               ++router->mlag_stats.peer_session_downs;
+       if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)
+               ++router->mlag_stats.peer_zebra_downs;
+       router->connected_to_mlag = false;
+       pim_mlag_param_reset();
+       /* on mlagd session down re-eval DF status */
+       pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_down");
+       /* flush all peer references */
+       pim_mlag_up_peer_del_all();
+       /* notify the vxlan component */
+       pim_mlag_vxlan_state_update();
        return 0;
 }
 
@@ -339,6 +973,7 @@ void pim_instance_mlag_terminate(struct pim_instance *pim)
 
 void pim_mlag_init(void)
 {
+       pim_mlag_param_reset();
        router->pim_mlag_intf_cnt = 0;
        router->connected_to_mlag = false;
        router->mlag_fifo = stream_fifo_new();
index e86fdae78f1c2cf1dbf3b86d7e55c185ba77ef21..dab29cc9a256fb48a7d58d43ce25fc6b5d7bc1e4 100644 (file)
@@ -37,4 +37,10 @@ extern void pim_mlag_deregister(void);
 extern int pim_zebra_mlag_process_up(void);
 extern int pim_zebra_mlag_process_down(void);
 extern int pim_zebra_mlag_handle_msg(struct stream *msg, int len);
+extern void pim_mlag_up_local_add(struct pim_instance *pim,
+               struct pim_upstream *upstream);
+extern void pim_mlag_up_local_del(struct pim_instance *pim,
+               struct pim_upstream *upstream);
+extern bool pim_mlag_up_df_role_update(struct pim_instance *pim,
+               struct pim_upstream *up, bool is_df, const char *reason);
 #endif
index 24519adb1e3608a9150133564d427db48882cbfc..889e0704c4c63922ccf5cf328fe6a2424f3d4223 100644 (file)
@@ -194,6 +194,32 @@ static int nexthop_mismatch(const struct pim_nexthop *nh1,
               || (nh1->mrib_route_metric != nh2->mrib_route_metric);
 }
 
+static void pim_rpf_cost_change(struct pim_instance *pim,
+               struct pim_upstream *up, uint32_t old_cost)
+{
+       struct pim_rpf *rpf = &up->rpf;
+       uint32_t new_cost;
+
+       new_cost = pim_up_mlag_local_cost(up);
+       if (PIM_DEBUG_MLAG)
+               zlog_debug(
+                       "%s: Cost_to_rp of upstream-%s changed to:%u, from:%u",
+                       __func__, up->sg_str, new_cost, old_cost);
+
+       if (old_cost == new_cost)
+               return;
+
+       /* Cost changed, it might Impact MLAG DF election, update */
+       if (PIM_DEBUG_MLAG)
+               zlog_debug(
+                       "%s: Cost_to_rp of upstream-%s changed to:%u",
+                       __func__, up->sg_str,
+                       rpf->source_nexthop.mrib_route_metric);
+
+       if (pim_up_mlag_is_local(up))
+               pim_mlag_up_local_add(pim, up);
+}
+
 enum pim_rpf_result pim_rpf_update(struct pim_instance *pim,
                struct pim_upstream *up, struct pim_rpf *old,
                const char *caller)
@@ -203,6 +229,7 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim,
        struct prefix nht_p;
        struct prefix src, grp;
        bool neigh_needed = true;
+       uint32_t saved_mrib_route_metric;
 
        if (PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags))
                return PIM_RPF_OK;
@@ -215,6 +242,7 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim,
 
        saved.source_nexthop = rpf->source_nexthop;
        saved.rpf_addr = rpf->rpf_addr;
+       saved_mrib_route_metric = pim_up_mlag_local_cost(up);
        if (old) {
                old->source_nexthop = saved.source_nexthop;
                old->rpf_addr = saved.rpf_addr;
@@ -236,8 +264,12 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim,
                neigh_needed = false;
        pim_find_or_track_nexthop(pim, &nht_p, up, NULL, false, NULL);
        if (!pim_ecmp_nexthop_lookup(pim, &rpf->source_nexthop, &src, &grp,
-                                    neigh_needed))
+                               neigh_needed)) {
+               /* Route is Deleted in Zebra, reset the stored NH data */
+               pim_upstream_rpf_clear(pim, up);
+               pim_rpf_cost_change(pim, up, saved_mrib_route_metric);
                return PIM_RPF_FAILURE;
+       }
 
        rpf->rpf_addr.family = AF_INET;
        rpf->rpf_addr.u.prefix4 = pim_rpf_find_rpf_addr(up);
@@ -290,10 +322,18 @@ enum pim_rpf_result pim_rpf_update(struct pim_instance *pim,
        if (saved.rpf_addr.u.prefix4.s_addr != rpf->rpf_addr.u.prefix4.s_addr
            || saved.source_nexthop
                               .interface != rpf->source_nexthop.interface) {
-
+               pim_rpf_cost_change(pim, up, saved_mrib_route_metric);
                return PIM_RPF_CHANGED;
        }
 
+       if (PIM_DEBUG_MLAG)
+               zlog_debug(
+                       "%s(%s): Cost_to_rp of upstream-%s changed to:%u",
+                       __func__, caller, up->sg_str,
+                       rpf->source_nexthop.mrib_route_metric);
+
+       pim_rpf_cost_change(pim, up, saved_mrib_route_metric);
+
        return PIM_RPF_OK;
 }
 
index c899e403c83a8a5d9cde0951eadbbedcb17e97b2..2d3a44b646662f70ff3d9398c80270a5c0a51018 100644 (file)
@@ -52,6 +52,7 @@
 #include "pim_nht.h"
 #include "pim_ssm.h"
 #include "pim_vxlan.h"
+#include "pim_mlag.h"
 
 static void join_timer_stop(struct pim_upstream *up);
 static void
@@ -883,6 +884,13 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim,
                }
        }
 
+       /* send the entry to the MLAG peer */
+       /* XXX - duplicate send is possible here if pim_rpf_update
+        * successfully resolved the nexthop
+        */
+       if (pim_up_mlag_is_local(up))
+               pim_mlag_up_local_add(pim, up);
+
        if (PIM_DEBUG_PIM_TRACE) {
                zlog_debug(
                        "%s: Created Upstream %s upstream_addr %s ref count %d increment",
@@ -893,6 +901,22 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim,
        return up;
 }
 
+uint32_t pim_up_mlag_local_cost(struct pim_upstream *up)
+{
+       if (!(pim_up_mlag_is_local(up)))
+               return router->infinite_assert_metric.route_metric;
+
+       return up->rpf.source_nexthop.mrib_route_metric;
+}
+
+uint32_t pim_up_mlag_peer_cost(struct pim_upstream *up)
+{
+       if (!(up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_PEER))
+               return router->infinite_assert_metric.route_metric;
+
+       return up->mlag.peer_mrib_metric;
+}
+
 struct pim_upstream *pim_upstream_find(struct pim_instance *pim,
                                       struct prefix_sg *sg)
 {
@@ -916,6 +940,15 @@ struct pim_upstream *pim_upstream_find_or_add(struct prefix_sg *sg,
 
 void pim_upstream_ref(struct pim_upstream *up, int flags, const char *name)
 {
+       /* if a local MLAG reference is being created we need to send the mroute
+        * to the peer
+        */
+       if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags) &&
+                       PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(flags)) {
+               PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(up->flags);
+               pim_mlag_up_local_add(up->pim, up);
+       }
+
        /* when we go from non-FHR to FHR we need to re-eval traffic
         * forwarding path
         */
@@ -1950,8 +1983,9 @@ static void pim_upstream_sg_running(void *arg)
                                        "source reference created on kat restart %s[%s]",
                                        up->sg_str, pim->vrf->name);
 
-                       pim_upstream_ref(up, PIM_UPSTREAM_FLAG_MASK_SRC_STREAM,
-                                        __PRETTY_FUNCTION__);
+                       pim_upstream_ref(up,
+                                       PIM_UPSTREAM_FLAG_MASK_SRC_STREAM,
+                                       __PRETTY_FUNCTION__);
                        PIM_UPSTREAM_FLAG_SET_SRC_STREAM(up->flags);
                        pim_upstream_fhr_kat_start(up);
                }
index 1eb2052bb3380ac550f0923c479c6ab41d6029ea..1c1f18008338b204b714ad63c90098c26c486893 100644 (file)
@@ -74,6 +74,8 @@
  * blackholing the traffic pulled down to the LHR.
  */
 #define PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF             (1 << 17)
+/* MLAG mroute rxed from the peer MLAG switch */
+#define PIM_UPSTREAM_FLAG_MASK_MLAG_PEER               (1 << 18)
 /*
  * We are creating a non-joined upstream data structure
  * for this S,G as that we want to have a channel oil
 #define PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN(flags) ((flags) & (PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG | PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM))
 #define PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN)
 #define PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF)
+#define PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)
 #define PIM_UPSTREAM_FLAG_TEST_SRC_NOCACHE(flags) ((flags) &PIM_UPSTREAM_FLAG_MASK_SRC_NOCACHE)
 #define PIM_UPSTREAM_FLAG_TEST_USE_RPT(flags) ((flags) & PIM_UPSTREAM_FLAG_MASK_USE_RPT)
 
 #define PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM)
 #define PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN)
 #define PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF)
+#define PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)
 #define PIM_UPSTREAM_FLAG_SET_USE_RPT(flags) ((flags) |= PIM_UPSTREAM_FLAG_MASK_USE_RPT)
 
 #define PIM_UPSTREAM_FLAG_UNSET_DR_JOIN_DESIRED(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_DR_JOIN_DESIRED)
 #define PIM_UPSTREAM_FLAG_UNSET_SRC_VXLAN_TERM(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM)
 #define PIM_UPSTREAM_FLAG_UNSET_MLAG_VXLAN(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN)
 #define PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_NON_DF)
+#define PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_MLAG_PEER)
 #define PIM_UPSTREAM_FLAG_UNSET_SRC_NOCACHE(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_SRC_NOCACHE)
 #define PIM_UPSTREAM_FLAG_UNSET_USE_RPT(flags) ((flags) &= ~PIM_UPSTREAM_FLAG_MASK_USE_RPT)
 
@@ -169,6 +174,13 @@ enum pim_upstream_sptbit {
        PIM_UPSTREAM_SPTBIT_TRUE
 };
 
+struct pim_up_mlag {
+       /* MRIB.metric(S) from the peer switch. This is used for DF election
+        * and switch with the lowest cost wins.
+        */
+       uint32_t peer_mrib_metric;
+};
+
 PREDECL_RBTREE_UNIQ(rb_pim_upstream);
 /*
   Upstream (S,G) channel in Joined state
@@ -218,6 +230,8 @@ struct pim_upstream {
 
        struct pim_rpf rpf;
 
+       struct pim_up_mlag mlag;
+
        struct thread *t_join_timer;
 
        /*
@@ -249,6 +263,14 @@ static inline bool pim_upstream_is_kat_running(struct pim_upstream *up)
        return (up->t_ka_timer != NULL);
 }
 
+static inline bool pim_up_mlag_is_local(struct pim_upstream *up)
+{
+       /* XXX: extend this to also return true if the channel-oil has
+        * any AA devices
+        */
+       return (up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN);
+}
+
 struct pim_upstream *pim_upstream_find(struct pim_instance *pim,
                                       struct prefix_sg *sg);
 struct pim_upstream *pim_upstream_find_or_add(struct prefix_sg *sg,
@@ -259,7 +281,8 @@ struct pim_upstream *pim_upstream_add(struct pim_instance *pim,
                                      struct interface *ifp, int flags,
                                      const char *name,
                                      struct pim_ifchannel *ch);
-void pim_upstream_ref(struct pim_upstream *up, int flags, const char *name);
+void pim_upstream_ref(struct pim_upstream *up,
+               int flags, const char *name);
 struct pim_upstream *pim_upstream_del(struct pim_instance *pim,
                                      struct pim_upstream *up,
                                      const char *name);
@@ -350,5 +373,7 @@ void pim_upstream_fill_static_iif(struct pim_upstream *up,
                                struct interface *incoming);
 void pim_upstream_update_use_rpt(struct pim_upstream *up,
                bool update_mroute);
+uint32_t pim_up_mlag_local_cost(struct pim_upstream *up);
+uint32_t pim_up_mlag_peer_cost(struct pim_upstream *up);
 void pim_upstream_reeval_use_rpt(struct pim_instance *pim);
 #endif /* PIM_UPSTREAM_H */
index c48ec373f844c526f049a22dc9f9c77b5339bc96..b5a5089ae76c374efcbb367f6bbc05b3f0e93b47 100644 (file)
@@ -242,8 +242,6 @@ int pim_global_config_write_worker(struct pim_instance *pim, struct vty *vty)
                }
        }
 
-       pim_vxlan_config_write(vty, spaces, &writes);
-
        return writes;
 }
 
index 4d8fe779ae17d96f69a49682868043cc2fa40661..f1f5c81c00bce636efc08ca9fe161364d03c0980 100644 (file)
@@ -38,6 +38,7 @@
 #include "pim_nht.h"
 #include "pim_zebra.h"
 #include "pim_vxlan.h"
+#include "pim_mlag.h"
 
 /* pim-vxlan global info */
 struct pim_vxlan vxlan_info, *pim_vxlan_p = &vxlan_info;
@@ -594,7 +595,7 @@ static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
                /* clear out all the vxlan related flags */
                up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM |
                        PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN);
-
+               pim_mlag_up_local_del(vxlan_sg->pim, up);
                pim_upstream_del(vxlan_sg->pim, up,
                                __PRETTY_FUNCTION__);
        }
@@ -825,27 +826,6 @@ void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role,
 }
 
 /****************************** misc callbacks *******************************/
-void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes)
-{
-       char addr_buf[INET_ADDRSTRLEN];
-
-       if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
-                       vxlan_mlag.peerlink_rif) {
-
-               inet_ntop(AF_INET, &vxlan_mlag.reg_addr,
-                               addr_buf, sizeof(addr_buf));
-               vty_out(vty,
-                       "%sip pim mlag %s role %s state %s addr %s\n",
-                       spaces,
-                       vxlan_mlag.peerlink_rif->name,
-                       (vxlan_mlag.role == PIM_VXLAN_MLAG_ROLE_PRIMARY) ?
-                               "primary":"secondary",
-                       vxlan_mlag.peer_state ? "up" : "down",
-                       addr_buf);
-               *writes += 1;
-       }
-}
-
 static void pim_vxlan_set_default_iif(struct pim_instance *pim,
                                struct interface *ifp)
 {
index 22ed1f761a2d1deee13d30ec6f4b5becc1b0013b..4495dca6d787de42d7a1c450b64fd133b216cf54 100644 (file)
@@ -70,14 +70,10 @@ enum pim_vxlan_mlag_flags {
        PIM_VXLAN_MLAGF_DO_REG = (1 << 1)
 };
 
-enum pim_vxlan_mlag_role {
-       PIM_VXLAN_MLAG_ROLE_SECONDARY = 0,
-       PIM_VXLAN_MLAG_ROLE_PRIMARY
-};
-
 struct pim_vxlan_mlag {
        enum pim_vxlan_mlag_flags flags;
-       enum pim_vxlan_mlag_role role;
+       /* XXX - remove this variable from here */
+       int role;
        bool peer_state;
        /* routed interface setup on top of MLAG peerlink */
        struct interface *peerlink_rif;
@@ -142,7 +138,6 @@ extern bool pim_vxlan_get_register_src(struct pim_instance *pim,
 extern void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role,
                                struct interface *peerlink_rif,
                                struct in_addr *reg_addr);
-extern void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes);
 extern bool pim_vxlan_do_mlag_reg(void);
 
 #endif /* PIM_VXLAN_H */
index 06507b1f4c60c0144187d609b3c03358ab6d4d73..7eb648ab865a8a8e44e8d1d50ee14bec6ad795b2 100644 (file)
@@ -452,7 +452,7 @@ static void pim_zebra_connected(struct zclient *zclient)
 
 static void pim_zebra_capabilities(struct zclient_capabilities *cap)
 {
-       router->role = cap->role;
+       router->mlag_role = cap->role;
 }
 
 void pim_zebra_init(void)