From 76699ae7e0358c57a1186b88962d7233a7057d76 Mon Sep 17 00:00:00 2001 From: Quentin Young Date: Wed, 27 Jul 2016 23:30:21 +0000 Subject: [PATCH] lib: Improve argv construction Fixed a variety of failure cases and wrote a nice doc string. Reverted add_node doc string to correct explanation of procedure. Signed-off-by: Quentin Young --- lib/command_graph.h | 6 +- lib/command_match.c | 167 ++++++++++++++++++++++++++++++++++++------ lib/grammar_sandbox.c | 17 ++--- 3 files changed, 154 insertions(+), 36 deletions(-) diff --git a/lib/command_graph.h b/lib/command_graph.h index f82bbd052b..a9ffe0f7b6 100644 --- a/lib/command_graph.h +++ b/lib/command_graph.h @@ -5,14 +5,14 @@ enum graph_node_type { - WORD_GN, IPV4_GN, IPV4_PREFIX_GN, IPV6_GN, IPV6_PREFIX_GN, - VARIABLE_GN, + WORD_GN, RANGE_GN, NUMBER_GN, + VARIABLE_GN, SELECTOR_GN, OPTION_GN, NUL_GN, @@ -41,7 +41,7 @@ struct graph_node * Adds a node as a child of another node. * If the new parent has a child that is equal to the prospective child, as * determined by cmp_node, then a pointer to the existing node is returned and - * the prospective child is not added. Otherwise the return value is NULL. + * the prospective child is not added. Otherwise the child node is returned. * * @param[in] parent node * @param[in] child node diff --git a/lib/command_match.c b/lib/command_match.c index dc44f1b824..023faafade 100644 --- a/lib/command_match.c +++ b/lib/command_match.c @@ -8,7 +8,10 @@ static int add_nexthops(struct list *, struct graph_node *); static struct list * -match_build_argv_r (struct graph_node *start, vector vline, unsigned int n); +match_build_argv_r (struct graph_node *, vector, unsigned int); + +static int +score_precedence (struct graph_node *); /* token matcher prototypes */ static enum match_type @@ -160,61 +163,181 @@ match_build_argv (const char *line, struct cmd_element *element) { // call recursive builder on each starting child argv = match_build_argv_r (vector_slot(start->children, i), vline, 0); - // if any of them succeed, return their argv (there should only be one) + // if any of them succeed, return their argv + // since all command DFA's must begin with a word and these are deduplicated, + // no need to check precedence if (argv) break; } return argv; } +/** + * Builds an argument list given a DFA and a matching input line. + * This function should be passed the start node of the DFA, a matching + * input line, and the index of the first input token in the input line. + * + * First the function determines if the node it is passed matches the + * first token of input. If it does not, it returns NULL. If it does + * match, then it saves the input token as the head of an argument list. + * + * The next step is to see if there is further input in the input line. + * If there is not, the current node's children are searched to see if + * any of them are leaves (type END_GN). If this is the case, then the + * bottom of the recursion stack has been reached, and the argument list + * (with one node) is returned. If it is not the case, NULL is returned, + * indicating that there is no match for the input along this path. + * + * If there is further input, then the function recurses on each of the + * current node's children, passing them the input line minus the token + * that was just matched. For each child, the return value of the recursive + * call is inspected. If it is null, then there is no match for the input along + * the subgraph headed by that child. If it is not null, then there is at least + * one input match in that subgraph (more on this in a moment). + * + * If a recursive call on a child returns a non-null value, then it has matched + * the input given it on the subgraph that starts with that child. However, due + * to the flexibility of the grammar, it is sometimes the case that two or more + * child graphs match the same input (two or more of the recursive calls have + * non-NULL return values). This is not a valid state, since only one + * true match is possible. In order to resolve this conflict, the function + * keeps a reference to the child node that most specifically matches the + * input. This is done by assigning each node type a precedence. If a child is + * found to match the remaining input, then the precedence values of the + * current best-matching child and this new match are compared. The node with + * higher precedence is kept, and the other match is discarded. Due to the + * recursive nature of this function, it is only necessary to compare the + * precedence of immediate children, since all subsequent children will already + * have been disambiguated in this way. + * + * In the event that two children are found to match with the same precedence, + * then this command is totally ambiguous (how did you even match it in the first + * place?) and NULL is returned. + * + * The ultimate return value is an ordered linked list of nodes that comprise + * the best match for the command, each with their `arg` fields pointing to the + * matching token string. + * + * @param[out] start the start node. + * @param[in] vline the vectorized input line. + * @param[in] n the index of the first input token. Should be 0 for external + * callers. + */ static struct list * match_build_argv_r (struct graph_node *start, vector vline, unsigned int n) { // if we don't match this node, die - if (match_token(start, vector_slot(vline, n), FILTER_STRICT) == no_match) + if (match_token(start, vector_slot(vline, n), FILTER_STRICT) != exact_match) return NULL; - // some stuffs we need + // arg list for this subgraph struct list *argv = list_new(); + + // pointers for iterating linklist struct graph_node *gn; struct listnode *ln; // append current arg - start->arg = strdup(vector_slot(vline, n)); listnode_add(argv, start); // get all possible nexthops struct list *next = list_new(); add_nexthops(next, start); - // check if one of them is END_GN - for (ALL_LIST_ELEMENTS_RO(next,ln,gn)) - { - if (gn->type == END_GN){ - fprintf(stderr, "Hit END_GN while searching next set of node with text %s\n", start->text); - return argv; + // if we're at the end of input, need END_GN or no match + if (n+1 == vector_active (vline)) { + for (ALL_LIST_ELEMENTS_RO(next,ln,gn)) { + if (gn->type == END_GN) { + list_delete (next); + start->arg = XSTRDUP(MTYPE_CMD_TOKENS, vector_slot(vline, n)); + if (start->type == VARIABLE_GN) + fprintf(stderr, "Setting variable %s->arg with text %s\n", start->text, start->arg); + return argv; + } } + list_free (next); + return NULL; } - // if we have no more input, why even live? - if (n+1 >= vector_active(vline)) return NULL; - // otherwise recurse on all nexthops + struct list *bestmatch = NULL; for (ALL_LIST_ELEMENTS_RO(next,ln,gn)) - { - for (unsigned int i = 0; i < n; i++) fprintf(stderr, "\t"); - fprintf(stderr, "Recursing on node %s for token %s\n", gn->text, (char*) vector_slot(vline, n+1)); - struct list *result = match_build_argv_r (gn, vline, n+1); - if (result != NULL) { - list_add_list (argv, result); - return argv; + { + if (gn->type == END_GN) // skip END_GN since we aren't at end of input + continue; + + // get the result of the next node + for (unsigned int i = 0; i < n; i++) fprintf(stderr, "\t"); + fprintf(stderr, "Recursing on node %s for token %s\n", gn->text, (char*) vector_slot(vline, n+1)); + struct list *result = match_build_argv_r (gn, vline, n+1); + + // compare to our current best match, and save if it's better + if (result) { + if (bestmatch) { + int currprec = score_precedence (listgetdata(listhead(bestmatch))); + int rsltprec = score_precedence (gn); + if (currprec < rsltprec) + list_delete (result); + if (currprec > rsltprec) { + for (unsigned int i = 0; i < n; i++) fprintf(stderr, "\t"); + fprintf(stderr, ">> Overwriting bestmatch with: %s\n", gn->text); + list_delete (bestmatch); + bestmatch = result; + } + if (currprec == rsltprec) { + fprintf(stderr, ">> Ambiguous match. Abort.\n"); + list_delete (bestmatch); + list_delete (result); + list_delete (argv); + return NULL; + } + } + else { + bestmatch = result; + for (unsigned int i = 0; i < n; i++) fprintf(stderr, "\t"); + fprintf(stderr, ">> Setting bestmatch with: %s\n", gn->text); + } + } } + + if (bestmatch) { + list_add_list(argv, bestmatch); + list_delete (bestmatch); + start->arg = XSTRDUP(MTYPE_CMD_TOKENS, vector_slot(vline, n)); + if (start->type == VARIABLE_GN) + fprintf(stderr, "Setting variable %s->arg with text %s\n", start->text, start->arg); + return argv; + } + else { + list_delete (argv); + return NULL; } - return NULL; } /* matching utility functions */ +static int +score_precedence (struct graph_node *node) +{ + switch (node->type) + { + // these should be mutually exclusive + case IPV4_GN: + case IPV4_PREFIX_GN: + case IPV6_GN: + case IPV6_PREFIX_GN: + case RANGE_GN: + case NUMBER_GN: + return 1; + case WORD_GN: + return 2; + case VARIABLE_GN: + return 3; + default: + return 10; + } +} + static enum match_type match_token (struct graph_node *node, char *token, enum filter_type filter) { diff --git a/lib/grammar_sandbox.c b/lib/grammar_sandbox.c index f78ba060a6..e4617969af 100644 --- a/lib/grammar_sandbox.c +++ b/lib/grammar_sandbox.c @@ -82,18 +82,13 @@ DEFUN (grammar_test_match, } struct list *argvv = match_build_argv (command, element); - fprintf(stderr, "num args: %d\n", argvv->count); - - struct listnode *ln; - struct graph_node *gn; - for (ALL_LIST_ELEMENTS_RO(argvv,ln,gn)) { - fprintf(stderr, "node text: %s\n", gn->text); - if (gn->arg) - fprintf(stderr, "node arg: %s\n", gn->arg); - else - fprintf(stderr, "No arg.\n"); + if (!argvv) fprintf(stderr, "Failed to build argv.\n"); + else { + struct listnode *ln; + struct graph_node *gn; + for (ALL_LIST_ELEMENTS_RO(argvv,ln,gn)) + fprintf(stderr, "%s -- %s\n", gn->text, gn->arg); } - return CMD_SUCCESS; } -- 2.39.5