diff --git a/src/hammer.h b/src/hammer.h index ae2103ef7efcd8b321b39f2aa9778f39455f9ef9..25141e081f8502ead70c6ff797157cf3cf1cafa6 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -462,6 +462,15 @@ HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p); */ HAMMER_FN_DECL_VARARGS_ATTR(H_GCC_ATTRIBUTE((sentinel)), HParser*, h_sequence, HParser* p); +/** + * Given an `h_sequence` and a list of indices, returns a parser that parses the sequence + * but returns it without the results at the dropped indices. If a negative integer appears + * in the middle of the list, this combinator will silently ignore the rest of the list. + * + * Result token type: TT_SEQUENCE + */ +#define h_drop_from(p, ...) h_drop_from_(p, __VA_ARGS__, -1) +HAMMER_FN_DECL_VARARGS(HParser*, h_drop_from_, HParser* p); /** * Given an array of parsers, p_array, apply each parser in order. The * first parser to succeed is the result; if no parsers succeed, the diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 5ca034ad50c1b752ff48e202008b21c0d1e75375..786ba62e43683f32ca0cc244bc0695cdb04a76fd 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -174,3 +174,85 @@ HParser* h_sequence__ma(HAllocator* mm__, void *args[]) { ret->desugared = NULL; return ret; } + +HParser* h_drop_from_(HParser* p, ...) { + assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser"); + va_list ap; + va_start(ap, p); + HParser* ret = h_drop_from___mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +HParser* h_drop_from___m(HAllocator* mm__, HParser* p, ...) { + assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser"); + va_list ap; + va_start(ap, p); + HParser* ret = h_drop_from___mv(mm__, p, ap); + va_end(ap); + return ret; +} + +HParser* h_drop_from___v(HParser* p, va_list ap) { + assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser"); + return h_drop_from___mv(&system_allocator, p, ap); +} + +HParser* h_drop_from___mv(HAllocator* mm__, HParser *p, va_list ap) { + /* Ok, here's where things get funny. + * + * Saying `h_drop_from(h_sequence(a, b, c, d, e, NULL), 0, 4, -1)` is functionally + * equivalent to `h_sequence(h_ignore(a), b, c, d, h_ignore(e), NULL)`. Thus, this + * term rewrites itself, becoming an h_sequence where some parsers are ignored. + */ + HSequence *s = (HSequence*)(p->env); + size_t indices[s->len]; + size_t count = 0; + int arg = 0; + + for (arg = va_arg(ap, int); arg >= 0; arg = va_arg(ap, int)) { + indices[count] = arg; + count++; + } + va_end(ap); + + HSequence *rewrite = h_new(HSequence, 1); + rewrite->p_array = h_new(HParser *, s->len); + rewrite->len = s->len; + for (size_t i=0, j=0; i<s->len; ++i) { + if (indices[j]==i) { + rewrite->p_array[i] = h_ignore(s->p_array[i]); + ++j; + } else { + rewrite->p_array[i] = s->p_array[i]; + } + } + + return h_new_parser(mm__, &sequence_vt, rewrite); +} + +HParser* h_drop_from___a(void *args[]) { + return h_drop_from___ma(&system_allocator, args); +} + +HParser* h_drop_from___ma(HAllocator* mm__, void *args[]) { + HParser *p = (HParser*)(args[0]); + assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser"); + HSequence *s = (HSequence*)(p->env); + HSequence *rewrite = h_new(HSequence, 1); + rewrite->p_array = h_new(HParser *, s->len); + rewrite->len = s->len; + + int i=0, *argp = (int*)(args[1]); + while (*argp >= 0) { + if (i == *argp) { + rewrite->p_array[i] = h_ignore(s->p_array[i]); + ++argp; + } else { + rewrite->p_array[i] = s->p_array[i]; + } + ++i; + } + + return h_new_parser(mm__, &sequence_vt, rewrite); +} diff --git a/src/t_parser.c b/src/t_parser.c index 7e4ff8293990adbb642a5b7b54f8b743466a2654..cb67901ed9227787d5580079112c410df000dd94 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -845,6 +845,19 @@ static void test_seek(gconstpointer backend) { g_check_parse_failed(p, be, "abc", 3); } +static void test_drop_from(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p, *q, *r, *seq; + + seq = h_sequence(h_ch('a'), h_ch('b'), h_ch('c'), h_ch('d'), h_ch('e'), NULL); + p = h_drop_from(seq, 0, 4); + g_check_parse_match(p, be, "abcde", 5, "(u0x62 u0x63 u0x64)"); + //q = h_drop_from(seq, 1, 2, -1); + //g_check_parse_match(q, be, "abcde", 5, "(u0x61 u0x64 u0x65)"); + //r = h_drop_from(seq, 0, 1, 3, 4, -1); + //g_check_parse_match(r, be, "abcde", 5, "(u0x63)"); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -902,6 +915,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip); g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek); g_test_add_data_func("/core/parser/packrat/tell", GINT_TO_POINTER(PB_PACKRAT), test_tell); + g_test_add_data_func("/core/parser/packrat/drop_from", GINT_TO_POINTER(PB_PACKRAT), test_drop_from); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); @@ -948,6 +962,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative); g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead); g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length); + g_test_add_data_func("/core/parser/llk/drop_from", GINT_TO_POINTER(PB_LLk), test_drop_from); g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token); g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch); @@ -989,6 +1004,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore); g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length); g_test_add_data_func("/core/parser/regex/token_position", GINT_TO_POINTER(PB_REGULAR), test_token_position); + g_test_add_data_func("/core/parser/regex/drop_from", GINT_TO_POINTER(PB_REGULAR), test_drop_from); g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token); g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch); @@ -1036,6 +1052,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative); g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead); g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length); + g_test_add_data_func("/core/parser/lalr/drop_from", GINT_TO_POINTER(PB_LALR), test_drop_from); g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token); g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch); @@ -1081,4 +1098,5 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous); g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length); g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position); + g_test_add_data_func("/core/parser/glr/drop_from", GINT_TO_POINTER(PB_GLR), test_drop_from); }