From 92805cb17f26bf72427468260d64fd70e3ddae12 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Fri, 16 Sep 2022 11:29:32 +0200 Subject: [PATCH] dummy support for iterative parsing in packrat A very first step. This implementation still expects all input in a single chunk but allows the use of the iterative API. If the parser attempts to read past the first chunk, the parse fails. Contains some comments for next steps towards full support. Adds tests for the single-chunk case. --- src/backends/packrat.c | 38 +++++++++++++++++- src/t_parser.c | 47 ++++++++++++++++++++-- src/test_suite.h | 91 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 4 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 381771b0..f95ba3de 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -334,8 +334,44 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr return res; } +// The following implementation of the iterative (chunked) parsing API is a +// dummy that expects all input to be passed in one chunk. This allows API +// conformity until a proper implementation is available. If the parser +// attempts to read past the first chunk (an overrun occurs), the parse fails. +// +// NB: A more functional if only slightly less naive approach would be to +// concatenate chunks and blindly re-run the full parse on every call to +// h_packrat_parse_chunk. +// +// NB: A full implementation will still have to concatenate the chunks to +// support arbitrary backtracking, but should be able save much, if not all, of +// the HParseState between calls. + +void h_packrat_parse_start(HSuspendedParser *s) +{ + // nothing to do +} + +bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input) +{ + assert(s->backend_state == NULL); + s->backend_state = h_packrat_parse(s->mm__, s->parser, input); + if (input->overrun) // tried to read past the chunk? + s->backend_state = NULL; // fail the parse. + return true; // don't call me again. +} + +HParseResult *h_packrat_parse_finish(HSuspendedParser *s) +{ + return s->backend_state; +} + HParserBackendVTable h__packrat_backend_vtable = { .compile = h_packrat_compile, .parse = h_packrat_parse, - .free = h_packrat_free + .free = h_packrat_free, + + .parse_start = h_packrat_parse_start, + .parse_chunk = h_packrat_parse_chunk, + .parse_finish = h_packrat_parse_finish }; diff --git a/src/t_parser.c b/src/t_parser.c index 356c38f1..de273c95 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -507,7 +507,42 @@ static void test_rightrec(gconstpointer backend) { g_check_parse_match(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))"); } -static void test_iterative(gconstpointer backend) { +static void test_iterative_single(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + p = h_token((uint8_t*)"foobar", 6); + g_check_parse_chunk_match(p, be, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foopar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_sequence(h_ch('f'), h_token((uint8_t*)"ooba", 4), h_ch('r'), NULL); + g_check_parse_chunk_match(p, be, "foobar",6, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foopar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_choice(h_token((uint8_t*)"foobar", 6), + h_token((uint8_t*)"phupar", 6), NULL); + g_check_parse_chunk_match(p, be, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "phupar",6, "<70.68.75.70.61.72>"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2), + h_token((uint8_t*)"uu", 2), NULL), NULL); + g_check_parse_chunk_match(p, be, "foo",3, "(u0x66 <6f.6f>)"); + g_check_parse_chunk_match(p, be, "fuu",3, "(u0x66 <75.75>)"); + g_check_parse_chunk_failed(p, be, "goo",3); + g_check_parse_chunk_failed(p, be, "fou",3); + g_check_parse_chunk_failed(p, be, "fuo",3); +} + +static void test_iterative_multi(gconstpointer backend) { HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); HParser *p; @@ -933,6 +968,10 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length); //g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position); + g_test_add_data_func("/core/parser/packrat/iterative/single", GINT_TO_POINTER(PB_PACKRAT), test_iterative_single); + //g_test_add_data_func("/core/parser/packrat/iterative/multi", GINT_TO_POINTER(PB_PACKRAT), test_iterative_multi); + //g_test_add_data_func("/core/parser/packrat/iterative/lookahead", GINT_TO_POINTER(PB_PACKRAT), test_iterative_lookahead); + //g_test_add_data_func("/core/parser/packrat/iterative/result_length", GINT_TO_POINTER(PB_PACKRAT), test_iterative_result_length); g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip); g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek); g_test_add_data_func("/core/parser/packrat/tell", GINT_TO_POINTER(PB_PACKRAT), test_tell); @@ -978,7 +1017,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec); g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length); //g_test_add_data_func("/core/parser/llk/token_position", GINT_TO_POINTER(PB_LLk), test_token_position); - g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative); + g_test_add_data_func("/core/parser/llk/iterative/single", GINT_TO_POINTER(PB_LLk), test_iterative_single); + g_test_add_data_func("/core/parser/llk/iterative/multi", GINT_TO_POINTER(PB_LLk), test_iterative_multi); g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead); g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length); g_test_add_data_func("/core/parser/llk/drop_from", GINT_TO_POINTER(PB_LLk), test_drop_from); @@ -1064,7 +1104,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length); g_test_add_data_func("/core/parser/lalr/token_position", GINT_TO_POINTER(PB_LALR), test_token_position); - g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative); + g_test_add_data_func("/core/parser/lalr/iterative/single", GINT_TO_POINTER(PB_LALR), test_iterative_single); + g_test_add_data_func("/core/parser/lalr/iterative/multi", GINT_TO_POINTER(PB_LALR), test_iterative_multi); g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead); g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length); g_test_add_data_func("/core/parser/lalr/drop_from", GINT_TO_POINTER(PB_LALR), test_drop_from); diff --git a/src/test_suite.h b/src/test_suite.h index 56fa42c6..dad0621d 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -289,6 +289,97 @@ } \ } while(0) +#define g_check_parse_chunk_failed__m(mm__, parser, backend, chunk1, c1_len) do { \ + int skip = h_compile__m(mm__, (HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_failed___m(mm__, parser, chunk1, c1_len); \ + } while(0) + +#define g_check_parse_chunk_failed___m(mm__, parser, chunk1, c1_len) do { \ + HSuspendedParser *s = h_parse_start__m(mm__, (HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (NULL != res) { \ + h_parse_result_free(res); \ + g_test_message("Check failed: shouldn't have succeeded, but did"); \ + g_test_fail(); \ + } \ + } while(0) + +#define g_check_parse_chunk_failed(p, be, c1, c1_len) \ + g_check_parse_chunk_failed__m(&system_allocator, p, be, c1, c1_len) + +#define g_check_parse_chunk_failed_(p, c1, c1_len) \ + g_check_parse_chunk_failed___m(&system_allocator, p, c1, c1_len) + +#define g_check_parse_chunk_ok(parser, backend, chunk1, c1_len) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_ok_(parser, chunk1, c1_len); \ + } while(0) + +#define g_check_parse_chunk_ok_(parser, chunk1, c1_len) do { \ + HSuspendedParser *s = h_parse_start((HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + print_arena_stats(res->arena); \ + h_parse_result_free(res); \ + } \ + } while(0) + +#define g_check_parse_chunk_match(parser, backend, chunk1, c1_len, result) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_match_(parser, chunk1, c1_len, result); \ + } while(0) + +#define g_check_parse_chunk_match_(parser, chunk1, c1_len, result) do { \ + HSuspendedParser *s = h_parse_start((HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + char* cres = h_write_result_unamb(res->ast); \ + g_check_string(cres, ==, result); \ + (&system_allocator)->free(&system_allocator, cres); \ + print_arena_stats(res->arena); \ + h_parse_result_free(res); \ + } \ + } while(0) + #define g_check_hashtable_present(table, key) do { \ if(!h_hashtable_present(table, key)) { \ g_test_message("Check failed: key should have been in table, but wasn't"); \ -- GitLab