diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 381771b0ba0ac84b1e1cc480b42e5ae6a16ad502..f95ba3ded9ce0e34b307c667cb1a24f025c71409 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -334,8 +334,44 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr return res; } +// The following implementation of the iterative (chunked) parsing API is a +// dummy that expects all input to be passed in one chunk. This allows API +// conformity until a proper implementation is available. If the parser +// attempts to read past the first chunk (an overrun occurs), the parse fails. +// +// NB: A more functional if only slightly less naive approach would be to +// concatenate chunks and blindly re-run the full parse on every call to +// h_packrat_parse_chunk. +// +// NB: A full implementation will still have to concatenate the chunks to +// support arbitrary backtracking, but should be able save much, if not all, of +// the HParseState between calls. + +void h_packrat_parse_start(HSuspendedParser *s) +{ + // nothing to do +} + +bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input) +{ + assert(s->backend_state == NULL); + s->backend_state = h_packrat_parse(s->mm__, s->parser, input); + if (input->overrun) // tried to read past the chunk? + s->backend_state = NULL; // fail the parse. + return true; // don't call me again. +} + +HParseResult *h_packrat_parse_finish(HSuspendedParser *s) +{ + return s->backend_state; +} + HParserBackendVTable h__packrat_backend_vtable = { .compile = h_packrat_compile, .parse = h_packrat_parse, - .free = h_packrat_free + .free = h_packrat_free, + + .parse_start = h_packrat_parse_start, + .parse_chunk = h_packrat_parse_chunk, + .parse_finish = h_packrat_parse_finish }; diff --git a/src/t_parser.c b/src/t_parser.c index 356c38f1674d6d3f90e3b0da672646455437f7a2..de273c95f634424c1c71f0c60818cd0e8f02b73b 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -507,7 +507,42 @@ static void test_rightrec(gconstpointer backend) { g_check_parse_match(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))"); } -static void test_iterative(gconstpointer backend) { +static void test_iterative_single(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + p = h_token((uint8_t*)"foobar", 6); + g_check_parse_chunk_match(p, be, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foopar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_sequence(h_ch('f'), h_token((uint8_t*)"ooba", 4), h_ch('r'), NULL); + g_check_parse_chunk_match(p, be, "foobar",6, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foopar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_choice(h_token((uint8_t*)"foobar", 6), + h_token((uint8_t*)"phupar", 6), NULL); + g_check_parse_chunk_match(p, be, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "phupar",6, "<70.68.75.70.61.72>"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2), + h_token((uint8_t*)"uu", 2), NULL), NULL); + g_check_parse_chunk_match(p, be, "foo",3, "(u0x66 <6f.6f>)"); + g_check_parse_chunk_match(p, be, "fuu",3, "(u0x66 <75.75>)"); + g_check_parse_chunk_failed(p, be, "goo",3); + g_check_parse_chunk_failed(p, be, "fou",3); + g_check_parse_chunk_failed(p, be, "fuo",3); +} + +static void test_iterative_multi(gconstpointer backend) { HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); HParser *p; @@ -933,6 +968,10 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length); //g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position); + g_test_add_data_func("/core/parser/packrat/iterative/single", GINT_TO_POINTER(PB_PACKRAT), test_iterative_single); + //g_test_add_data_func("/core/parser/packrat/iterative/multi", GINT_TO_POINTER(PB_PACKRAT), test_iterative_multi); + //g_test_add_data_func("/core/parser/packrat/iterative/lookahead", GINT_TO_POINTER(PB_PACKRAT), test_iterative_lookahead); + //g_test_add_data_func("/core/parser/packrat/iterative/result_length", GINT_TO_POINTER(PB_PACKRAT), test_iterative_result_length); g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip); g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek); g_test_add_data_func("/core/parser/packrat/tell", GINT_TO_POINTER(PB_PACKRAT), test_tell); @@ -978,7 +1017,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec); g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length); //g_test_add_data_func("/core/parser/llk/token_position", GINT_TO_POINTER(PB_LLk), test_token_position); - g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative); + g_test_add_data_func("/core/parser/llk/iterative/single", GINT_TO_POINTER(PB_LLk), test_iterative_single); + g_test_add_data_func("/core/parser/llk/iterative/multi", GINT_TO_POINTER(PB_LLk), test_iterative_multi); g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead); g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length); g_test_add_data_func("/core/parser/llk/drop_from", GINT_TO_POINTER(PB_LLk), test_drop_from); @@ -1064,7 +1104,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length); g_test_add_data_func("/core/parser/lalr/token_position", GINT_TO_POINTER(PB_LALR), test_token_position); - g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative); + g_test_add_data_func("/core/parser/lalr/iterative/single", GINT_TO_POINTER(PB_LALR), test_iterative_single); + g_test_add_data_func("/core/parser/lalr/iterative/multi", GINT_TO_POINTER(PB_LALR), test_iterative_multi); g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead); g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length); g_test_add_data_func("/core/parser/lalr/drop_from", GINT_TO_POINTER(PB_LALR), test_drop_from); diff --git a/src/test_suite.h b/src/test_suite.h index 56fa42c6494bd205996328009ca71dc4470ae337..dad0621db148431b5953a1291593d22d2a11cd55 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -289,6 +289,97 @@ } \ } while(0) +#define g_check_parse_chunk_failed__m(mm__, parser, backend, chunk1, c1_len) do { \ + int skip = h_compile__m(mm__, (HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_failed___m(mm__, parser, chunk1, c1_len); \ + } while(0) + +#define g_check_parse_chunk_failed___m(mm__, parser, chunk1, c1_len) do { \ + HSuspendedParser *s = h_parse_start__m(mm__, (HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (NULL != res) { \ + h_parse_result_free(res); \ + g_test_message("Check failed: shouldn't have succeeded, but did"); \ + g_test_fail(); \ + } \ + } while(0) + +#define g_check_parse_chunk_failed(p, be, c1, c1_len) \ + g_check_parse_chunk_failed__m(&system_allocator, p, be, c1, c1_len) + +#define g_check_parse_chunk_failed_(p, c1, c1_len) \ + g_check_parse_chunk_failed___m(&system_allocator, p, c1, c1_len) + +#define g_check_parse_chunk_ok(parser, backend, chunk1, c1_len) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_ok_(parser, chunk1, c1_len); \ + } while(0) + +#define g_check_parse_chunk_ok_(parser, chunk1, c1_len) do { \ + HSuspendedParser *s = h_parse_start((HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + print_arena_stats(res->arena); \ + h_parse_result_free(res); \ + } \ + } while(0) + +#define g_check_parse_chunk_match(parser, backend, chunk1, c1_len, result) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_match_(parser, chunk1, c1_len, result); \ + } while(0) + +#define g_check_parse_chunk_match_(parser, chunk1, c1_len, result) do { \ + HSuspendedParser *s = h_parse_start((HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + char* cres = h_write_result_unamb(res->ast); \ + g_check_string(cres, ==, result); \ + (&system_allocator)->free(&system_allocator, cres); \ + print_arena_stats(res->arena); \ + h_parse_result_free(res); \ + } \ + } while(0) + #define g_check_hashtable_present(table, key) do { \ if(!h_hashtable_present(table, key)) { \ g_test_message("Check failed: key should have been in table, but wasn't"); \