From 41dca83631d9272d193679a0a99a574a4fdd9933 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Wed, 18 Jun 2014 21:54:52 +0200 Subject: [PATCH] add h_permutation --- src/SConscript | 1 + src/hammer.h | 26 ++++++ src/parsers/permutation.c | 179 ++++++++++++++++++++++++++++++++++++++ src/t_parser.c | 54 ++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 src/parsers/permutation.c diff --git a/src/SConscript b/src/SConscript index 49d43eb7..38ace12a 100644 --- a/src/SConscript +++ b/src/SConscript @@ -39,6 +39,7 @@ parsers = ['parsers/%s.c'%s for s in 'not', 'nothing', 'optional', + 'permutation', 'sequence', 'token', 'unimplemented', diff --git a/src/hammer.h b/src/hammer.h index 947456d8..52058b5c 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -437,6 +437,32 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HPa */ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p); +/** + * Given a null-terminated list of parsers, match a permutation phrase of these + * parsers, i.e. match all parsers exactly once in any order. + * + * If multiple orders would match, the lexically smallest permutation is used; + * in other words, at any step the remaining available parsers are tried in + * the order in which they appear in the arguments. + * + * As an exception, 'h_optional' parsers (actually those that return a result + * of token type TT_NONE) are detected and the algorithm will try to match them + * with a non-empty result. Specifically, a result of TT_NONE is treated as a + * non-match as long as any other argument matches. + * + * Other parsers that succeed on any input (e.g. h_many), that match the same + * input as others, or that match input which is a prefix of another match can + * lead to unexpected results and should probably not be used as arguments. + * + * The result is a sequence of the same length as the argument list. + * Each parser's result is placed at that parser's index in the arguments. + * The permutation itself (the order in which the arguments were matched) is + * not returned. + * + * Result token type: TT_SEQUENCE + */ +HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_permutation, HParser* p); + /** * Given two parsers, p1 and p2, this parser succeeds in the following * cases: diff --git a/src/parsers/permutation.c b/src/parsers/permutation.c new file mode 100644 index 00000000..564565af --- /dev/null +++ b/src/parsers/permutation.c @@ -0,0 +1,179 @@ +#include <stdarg.h> +#include "parser_internal.h" + +typedef struct { + size_t len; + HParser **p_array; +} HSequence; + +// main recursion, used by parse_permutation below +static int parse_permutation_tail(const HSequence *s, + HCountedArray *seq, + const size_t k, char *set, + HParseState *state) +{ + // shorthands + const size_t n = s->len; + HParser **ps = s->p_array; + + // trivial base case + if(k >= n) + return 1; + + HInputStream bak = state->input_stream; + + // try available parsers as first element of the permutation tail + HParseResult *match = NULL; + size_t i; + for(i=0; i<n; i++) { + if(set[i]) { + match = h_do_parse(ps[i], state); + + // save result + if(match) + seq->elements[i] = (void *)match->ast; + + // treat empty optionals (TT_NONE) like failure here + if(match && match->ast && match->ast->token_type == TT_NONE) + match = NULL; + + if(match) { + // remove parser from active set + set[i] = 0; + + // parse the rest of the permutation phrase + if(parse_permutation_tail(s, seq, k+1, set, state)) { + // success + return 1; + } else { + // place parser back in active set and try the next + set[i] = 1; + } + } + + state->input_stream = bak; // rewind input + } + } + + // if all available parsers were empty optionals (TT_NONE), still succeed + for(i=0; i<n; i++) { + if(set[i]) { + HParsedToken *tok = seq->elements[i]; + if(!(tok && tok->token_type == TT_NONE)) + break; + } + } + if(i==n) // all were TT_NONE + return 1; + + // permutations exhausted + return 0; +} + +static HParseResult *parse_permutation(void *env, HParseState *state) +{ + const HSequence *s = env; + const size_t n = s->len; + + // current set of available (not yet matched) parsers + char *set = h_arena_malloc(state->arena, sizeof(char) * n); + memset(set, 1, sizeof(char) * n); + + // parse result + HCountedArray *seq = h_carray_new_sized(state->arena, n); + + if(parse_permutation_tail(s, seq, 0, set, state)) { + // success + // return the sequence of results + seq->used = n; + HParsedToken *tok = a_new(HParsedToken, 1); + tok->token_type = TT_SEQUENCE; + tok->seq = seq; + return make_result(state->arena, tok); + } else { + // no parse + // XXX free seq + return NULL; + } +} + + +static const HParserVtable permutation_vt = { + .parse = parse_permutation, + .isValidRegular = h_false, + .isValidCF = h_false, + .desugar = NULL, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_permutation(HParser* p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_permutation__mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +HParser* h_permutation__m(HAllocator* mm__, HParser* p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_permutation__mv(mm__, p, ap); + va_end(ap); + return ret; +} + +HParser* h_permutation__v(HParser* p, va_list ap) { + return h_permutation__mv(&system_allocator, p, ap); +} + +HParser* h_permutation__mv(HAllocator* mm__, HParser* p, va_list ap_) { + va_list ap; + size_t len = 0; + HSequence *s = h_new(HSequence, 1); + + HParser *arg; + va_copy(ap, ap_); + do { + len++; + arg = va_arg(ap, HParser *); + } while (arg); + va_end(ap); + s->p_array = h_new(HParser *, len); + + va_copy(ap, ap_); + s->p_array[0] = p; + for (size_t i = 1; i < len; i++) { + s->p_array[i] = va_arg(ap, HParser *); + } while (arg); + va_end(ap); + + s->len = len; + return h_new_parser(mm__, &permutation_vt, s); +} + +HParser* h_permutation__a(void *args[]) { + return h_permutation__ma(&system_allocator, args); +} + +HParser* h_permutation__ma(HAllocator* mm__, void *args[]) { + size_t len = -1; // because do...while + const HParser *arg; + + do { + arg=((HParser **)args)[++len]; + } while(arg); + + HSequence *s = h_new(HSequence, 1); + s->p_array = h_new(HParser *, len); + + for (size_t i = 0; i < len; i++) { + s->p_array[i] = ((HParser **)args)[i]; + } + + s->len = len; + HParser *ret = h_new(HParser, 1); + ret->vtable = &permutation_vt; + ret->env = (void*)s; + ret->backend = PB_MIN; + return ret; +} diff --git a/src/t_parser.c b/src/t_parser.c index 2b66bffb..191996cc 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -515,6 +515,59 @@ static void test_put_get(gconstpointer backend) { g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9); } +static void test_permutation(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *p = h_permutation(h_ch('a'), h_ch('b'), h_ch('c'), NULL); + + g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "acb", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "bac", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "bca", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "cab", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "cba", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_failed(p, be, "a", 1); + g_check_parse_failed(p, be, "ab", 2); + g_check_parse_failed(p, be, "abb", 3); + + const HParser *po = h_permutation(h_ch('a'), h_ch('b'), h_optional(h_ch('c')), NULL); + + g_check_parse_match(po, be, "abc", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "acb", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "bac", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "bca", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "cab", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "cba", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "ab", 2, "(u0x61 u0x62 null)"); + g_check_parse_match(po, be, "ba", 2, "(u0x61 u0x62 null)"); + g_check_parse_failed(po, be, "a", 1); + g_check_parse_failed(po, be, "b", 1); + g_check_parse_failed(po, be, "c", 1); + g_check_parse_failed(po, be, "ca", 2); + g_check_parse_failed(po, be, "cb", 2); + g_check_parse_failed(po, be, "cc", 2); + g_check_parse_failed(po, be, "ccab", 4); + g_check_parse_failed(po, be, "ccc", 3); + + const HParser *po2 = h_permutation(h_optional(h_ch('c')), h_ch('a'), h_ch('b'), NULL); + + g_check_parse_match(po2, be, "abc", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "acb", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "bac", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "bca", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "cab", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "cba", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "ab", 2, "(null u0x61 u0x62)"); + g_check_parse_match(po2, be, "ba", 2, "(null u0x61 u0x62)"); + g_check_parse_failed(po2, be, "a", 1); + g_check_parse_failed(po2, be, "b", 1); + g_check_parse_failed(po2, be, "c", 1); + g_check_parse_failed(po2, be, "ca", 2); + g_check_parse_failed(po2, be, "cb", 2); + g_check_parse_failed(po2, be, "cc", 2); + g_check_parse_failed(po2, be, "ccab", 4); + g_check_parse_failed(po2, be, "ccc", 3); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -563,6 +616,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); + g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); -- GitLab