diff options
| author | Christian Hopps <chopps@labn.net> | 2025-01-17 21:21:33 +0000 | 
|---|---|---|
| committer | Christian Hopps <chopps@labn.net> | 2025-01-18 16:14:29 +0000 | 
| commit | 597d79a89e6628645e3648bdc03db0d7bdc7e0f4 (patch) | |
| tree | e9b6e0195f29d416e9853c304bceb64c75560773 | |
| parent | 1f1d166288410f4f2581ea0340021cd6dc72813e (diff) | |
lib: improve error handling for datastore notifications
Signed-off-by: Christian Hopps <chopps@labn.net>
| -rw-r--r-- | lib/northbound_notif.c | 148 | 
1 files changed, 87 insertions, 61 deletions
diff --git a/lib/northbound_notif.c b/lib/northbound_notif.c index b75c865613..10a81d05fb 100644 --- a/lib/northbound_notif.c +++ b/lib/northbound_notif.c @@ -480,87 +480,96 @@ static struct op_changes_group *op_changes_group_next(void)  /* Query for changes and notify */  /* ---------------------------- */ +static void timer_walk_abort(struct nb_notif_walk_args *args);  static void timer_walk_continue(struct event *event); +static void timer_walk_done(struct nb_notif_walk_args *args); + +static struct op_change *__next_change(struct op_changes_group *group) +{ +	struct op_change *next = RB_NEXT(op_changes, group->cur_change); + +	/* Remove and free current so retry works */ +	RB_REMOVE(op_changes, group->cur_changes, group->cur_change); +	op_change_free(group->cur_change); +	return next; +} + +static struct op_changes_group *__next_group(struct op_changes_group *group) +{ +	__dbg("done with oper-path collection for group"); +	op_changes_group_free(group); +	return op_changes_group_next(); +}  static enum nb_error oper_walk_done(const struct lyd_node *tree, void *arg, enum nb_error ret)  {  	struct nb_notif_walk_args *args = arg;  	struct op_changes_group *group = args->group;  	const char *path = group->cur_change->path; -	const char *op = group->cur_changes == &group->adds ? "add" : "delete";  	/* we don't send batches when yielding as we need completed edit in any patch */  	assert(ret != NB_YIELD); -	nb_notif_walk = NULL; -  	if (ret == NB_ERR_NOT_FOUND) {  		__dbg("Path not found while walking oper tree: %s", path); -		XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args); -		return ret; -	} -	/* Something else went wrong with the walk */ -	if (ret != NB_OK) { +		ret = NB_OK; +	} else if (ret != NB_OK) {  error: -		__log_err("Error notifying for datastore change on path: %s: %s", path, -			  nb_err_name(ret)); -		XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args); -		/* XXX Need to inform mgmtd/front-ends things are out-of-sync */ -		return ret; -	} - -	__dbg("done with oper-path collection for %s path: %s", op, path); - -	/* Do we need this? */ -	while (tree->parent) -		tree = lyd_parent(tree); - -	/* Send the add (replace) notification */ -	if (mgmt_be_send_ds_replace_notification(path, tree)) { -		ret = NB_ERR; -		goto error; +		__log_err("Error notifying for datastore path: %s: %s", path, nb_err_name(ret)); + +		timer_walk_abort(args); +		goto done; +	} else { +		__dbg("Done with oper-path collection for path: %s", path); + +		/* Do we need this? */ +		while (tree->parent) +			tree = lyd_parent(tree); + +		/* Send the add (replace) notification */ +		if (mgmt_be_send_ds_replace_notification(path, tree)) { +			__log_err("Error sending notification message for path: %s", path); +			ret = NB_ERR; +			goto error; +		}  	}  	/* -	 * Advance to next change (either dels or adds or both). +	 * Advance to next change.  	 */ -	group->cur_change = RB_NEXT(op_changes, group->cur_change); +	group->cur_change = __next_change(group);  	if (!group->cur_change) { -		__dbg("done with oper-path collection for group"); -		op_changes_group_free(group); - -		group = op_changes_group_next(); -		args->group = group; -		if (!group) { -			__dbg("done with ALL oper-path collection for notification"); -			XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args); +		args->group = __next_group(group); +		if (!args->group) { +			timer_walk_done(args);  			goto done;  		}  	} +	/* Run next walk after giving other events a shot to run */  	event_add_timer_msec(nb_notif_master, timer_walk_continue, args, 0, &nb_notif_timer);  done:  	/* Done with current walk and scheduled next one if there is more */  	nb_notif_walk = NULL; -	return NB_OK; +	return ret;  } -static LY_ERR nb_notify_delete_changes(struct nb_notif_walk_args *args) +static int nb_notify_delete_changes(struct nb_notif_walk_args *args)  {  	struct op_changes_group *group = args->group; -	LY_ERR err;  	group->cur_change = RB_MIN(op_changes, group->cur_changes);  	while (group->cur_change) { -		err = mgmt_be_send_ds_delete_notification(group->cur_change->path); -		assert(err == LY_SUCCESS); /* XXX */ - -		group->cur_change = RB_NEXT(op_changes, group->cur_change); +		if (mgmt_be_send_ds_delete_notification(group->cur_change->path)) { +			__log_err("Error sending delete notification message for path: %s", +				  group->cur_change->path); +			return 1; +		} +		group->cur_change = __next_change(group);  	} - -	return LY_SUCCESS; +	return 0;  }  static void timer_walk_continue(struct event *event) @@ -568,15 +577,17 @@ static void timer_walk_continue(struct event *event)  	struct nb_notif_walk_args *args = EVENT_ARG(event);  	struct op_changes_group *group = args->group;  	const char *path; -	LY_ERR err; +	int ret;  	/*  	 * Notify about deletes until we have add changes to collect.  	 */  	while (group->cur_changes == &group->dels) { -		err = nb_notify_delete_changes(args); -		assert(err == LY_SUCCESS);  /* XXX */ -		assert(!group->cur_change); /* we send all the deletes in one message */ +		ret = nb_notify_delete_changes(args); +		if (ret) { +			timer_walk_abort(args); +			return; +		}  		/* after deletes advance to adds */  		group->cur_changes = &group->adds; @@ -584,14 +595,9 @@ static void timer_walk_continue(struct event *event)  		if (group->cur_change)  			break; -		__dbg("done with oper-path change group"); -		op_changes_group_free(group); - -		group = op_changes_group_next(); -		args->group = group; -		if (!group) { -			__dbg("done with ALL oper-path changes"); -			XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args); +		args->group = __next_group(group); +		if (!args->group) { +			timer_walk_done(args);  			return;  		}  	} @@ -621,6 +627,22 @@ static void timer_walk_start(struct event *event)  	timer_walk_continue(event);  } +static void timer_walk_abort(struct nb_notif_walk_args *args) +{ +	__dbg("Failed notifying datastore changes, will retry"); + +	__dbg("oper-state notify setting retry timer to fire in: %d msec ", NB_NOTIF_TIMER_MSEC); +	event_add_timer_msec(nb_notif_master, timer_walk_continue, args, NB_NOTIF_TIMER_MSEC, +			     &nb_notif_timer); +} + +static void timer_walk_done(struct nb_notif_walk_args *args) +{ +	__dbg("Finished notifying for all datastore changes"); +	assert(!args->group); +	XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args); +} +  static void nb_notif_set_walk_timer(void)  {  	if (nb_notif_walk) { @@ -659,19 +681,23 @@ void nb_notif_init(struct event_loop *tm)  void nb_notif_terminate(void)  { -	struct nb_notif_walk_args *args; +	struct nb_notif_walk_args *args = nb_notif_timer ? EVENT_ARG(nb_notif_timer) : NULL;  	struct op_changes_group *group; +	__dbg("terminating: timer: %p timer arg: %p walk %p", nb_notif_timer, args, nb_notif_walk); +  	EVENT_OFF(nb_notif_timer);  	if (nb_notif_walk) { -		nb_oper_cancel_walk(nb_notif_walk); -		/* need to free the group that's in the walk */ +		/* Grab walk args from walk if active. */  		args = nb_oper_walk_finish_arg(nb_notif_walk); -		if (args) -			op_changes_group_free(args->group); +		nb_oper_cancel_walk(nb_notif_walk);  		nb_notif_walk = NULL;  	} +	if (args) { +		op_changes_group_free(args->group); +		XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args); +	}  	while ((group = op_changes_group_next()))  		op_changes_group_free(group);  | 
