From 5b5f131c42b35f9fe259c529425fff10b2dc1595 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Wed, 28 Sep 2022 18:48:04 +0200 Subject: [PATCH] properly suspend h_end_p and h_seek before the last chunk Includes a test that exercises both. Also fixes tracking of input position when suspending on the first chunk and adapts h_input_stream_pos and h_input_stream_length to multi-chunk operation. --- src/backends/packrat.c | 16 +++++++++------- src/internal.h | 10 ++++++---- src/parsers/end.c | 10 ++++++++-- src/parsers/seek.c | 4 ++++ src/t_parser.c | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 13 deletions(-) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 33db3c42..0f7bf476 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -382,7 +382,7 @@ bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input) memcpy((void *)cat->input, input->input, input->length); s->backend_state = cat; - return false; // come back with more input. + goto suspend; } // we have received additional input - append it to the saved stream @@ -407,12 +407,8 @@ bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input) input->overrun = cat->overrun; // suspend if the parser still needs more input - if (input->overrun && !input->last_chunk) { - input->index = input->length; // consume the entire chunk on suspend - input->margin = 0; - input->bit_offset = 0; - return false; - } + if (input->overrun && !input->last_chunk) + goto suspend; // otherwise the parse is finished... // report final input position @@ -433,6 +429,12 @@ bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input) s->backend_state = res; return true; // don't call me again. + +suspend: + input->index = input->length; // consume the entire chunk on suspend + input->margin = 0; + input->bit_offset = 0; + return false; // come back with more input. } HParseResult *h_packrat_parse_finish(HSuspendedParser *s) diff --git a/src/internal.h b/src/internal.h index 7c3943c6..203e3412 100644 --- a/src/internal.h +++ b/src/internal.h @@ -332,12 +332,14 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p); void h_skip_bits(HInputStream* state, size_t count); void h_seek_bits(HInputStream* state, size_t pos); static inline size_t h_input_stream_pos(HInputStream* state) { - assert(state->index < SIZE_MAX / 8); - return state->index * 8 + state->bit_offset + state->margin; + assert(state->pos <= SIZE_MAX - state->index); + assert(state->pos + state->index < SIZE_MAX / 8); + return (state->pos + state->index) * 8 + state->bit_offset + state->margin; } static inline size_t h_input_stream_length(HInputStream *state) { - assert(state->length <= SIZE_MAX / 8); - return state->length * 8; + assert(state->pos <= SIZE_MAX - state->length); + assert(state->pos + state->length <= SIZE_MAX / 8); + return (state->pos + state->length) * 8; } // need to decide if we want to make this public. HParseResult* h_do_parse(const HParser* parser, HParseState *state); diff --git a/src/parsers/end.c b/src/parsers/end.c index 35e4186d..754bb7f5 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -1,13 +1,19 @@ #include "parser_internal.h" -static HParseResult* parse_end(void *env, HParseState *state) { - if (state->input_stream.index == state->input_stream.length) { +static HParseResult* parse_end(void *env, HParseState *state) +{ + if (state->input_stream.index < state->input_stream.length) + return NULL; + + assert(state->input_stream.index == state->input_stream.length); + if (state->input_stream.last_chunk) { HParseResult *ret = a_new(HParseResult, 1); ret->ast = NULL; ret->bit_length = 0; ret->arena = state->arena; return ret; } else { + state->input_stream.overrun = true; // need more input return NULL; } } diff --git a/src/parsers/seek.c b/src/parsers/seek.c index d5bc0284..e1459d80 100644 --- a/src/parsers/seek.c +++ b/src/parsers/seek.c @@ -25,6 +25,10 @@ static HParseResult *parse_seek(void *env, HParseState *state) pos = 0; break; case SEEK_END: + if (!stream->last_chunk) { /* the end is not yet known! */ + stream->overrun = true; /* we need more input */ + return NULL; + } pos = h_input_stream_length(stream); break; case SEEK_CUR: diff --git a/src/t_parser.c b/src/t_parser.c index b1988023..dbeaabde 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -632,6 +632,39 @@ static void test_iterative_lookahead(gconstpointer backend) { g_check_parse_chunks_failed_(p, "fo",2, "b",1); } +static void test_iterative_seek(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *p; + + // seeking should work across chunk boundaries... + + p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_ch('f'), NULL); + g_check_parse_chunks_match(p, be, "a",1, "bcdef",5, "(u0x61 u0x28 u0x66)"); + g_check_parse_chunks_failed(p, be, "a",1, "bcdex",5); + g_check_parse_chunks_failed(p, be, "a",1, "bc",2); + + p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_end_p(), NULL); + g_check_parse_chunks_match(p, be, "ab",2, "cde",3, "(u0x61 u0x28)"); + g_check_parse_chunks_failed(p, be, "ab",2, "cdex",4); + g_check_parse_chunks_failed(p, be, "ab",2, "c",1); + + p = h_sequence(h_ch('a'), h_seek(0, SEEK_END), h_end_p(), NULL); + g_check_parse_chunks_match(p, be, "abc",3, "de",2, "(u0x61 u0x28)"); + g_check_parse_chunks_match(p, be, "abc",3, "",0, "(u0x61 u0x18)"); + + p = h_sequence(h_ch('a'), h_seek(-16, SEEK_END), h_ch('x'), NULL); + g_check_parse_chunks_match(p, be, "abcd",4, "xy",2, "(u0x61 u0x20 u0x78)"); + g_check_parse_chunks_match(p, be, "abxy",4, "",0, "(u0x61 u0x10 u0x78)"); + g_check_parse_chunks_failed(p, be, "a",1, "bc",2); + g_check_parse_chunks_failed(p, be, "",0, "x",1); + + p = h_sequence(h_ch('a'), h_seek(32, SEEK_CUR), h_ch('f'), NULL); + g_check_parse_chunks_match(p, be, "abcde",5, "f",1, "(u0x61 u0x28 u0x66)"); + g_check_parse_chunks_failed(p, be, "xbcde",5, "f",1); + g_check_parse_chunks_failed(p, be, "abcde",5, "x",1); + g_check_parse_chunks_failed(p, be, "abc",3, "",0); +} + static void test_iterative_result_length(gconstpointer backend) { HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); HParser *p = h_token((uint8_t*)"foobar", 6); @@ -1001,6 +1034,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/iterative/single", GINT_TO_POINTER(PB_PACKRAT), test_iterative_single); g_test_add_data_func("/core/parser/packrat/iterative/multi", GINT_TO_POINTER(PB_PACKRAT), test_iterative_multi); g_test_add_data_func("/core/parser/packrat/iterative/lookahead", GINT_TO_POINTER(PB_PACKRAT), test_iterative_lookahead); + g_test_add_data_func("/core/parser/packrat/iterative/seek", GINT_TO_POINTER(PB_PACKRAT), test_iterative_seek); g_test_add_data_func("/core/parser/packrat/iterative/result_length", GINT_TO_POINTER(PB_PACKRAT), test_iterative_result_length); g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip); g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek); -- GitLab