From 5b5f131c42b35f9fe259c529425fff10b2dc1595 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Wed, 28 Sep 2022 18:48:04 +0200
Subject: [PATCH] properly suspend h_end_p and h_seek before the last chunk

Includes a test that exercises both.
Also fixes tracking of input position when suspending on the first chunk and
adapts h_input_stream_pos and h_input_stream_length to multi-chunk operation.
---
 src/backends/packrat.c | 16 +++++++++-------
 src/internal.h         | 10 ++++++----
 src/parsers/end.c      | 10 ++++++++--
 src/parsers/seek.c     |  4 ++++
 src/t_parser.c         | 34 ++++++++++++++++++++++++++++++++++
 5 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/src/backends/packrat.c b/src/backends/packrat.c
index 33db3c42..0f7bf476 100644
--- a/src/backends/packrat.c
+++ b/src/backends/packrat.c
@@ -382,7 +382,7 @@ bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input)
     memcpy((void *)cat->input, input->input, input->length);
     s->backend_state = cat;
 
-    return false;			// come back with more input.
+    goto suspend;
   }
 
   // we have received additional input - append it to the saved stream
@@ -407,12 +407,8 @@ bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input)
   input->overrun    = cat->overrun;
 
   // suspend if the parser still needs more input
-  if (input->overrun && !input->last_chunk) {
-    input->index = input->length;	// consume the entire chunk on suspend
-    input->margin = 0;
-    input->bit_offset = 0;
-    return false;
-  }
+  if (input->overrun && !input->last_chunk)
+    goto suspend;
   // otherwise the parse is finished...
 
   // report final input position
@@ -433,6 +429,12 @@ bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input)
   s->backend_state = res;
 
   return true;				// don't call me again.
+
+suspend:
+  input->index = input->length;		// consume the entire chunk on suspend
+  input->margin = 0;
+  input->bit_offset = 0;
+  return false;				// come back with more input.
 }
 
 HParseResult *h_packrat_parse_finish(HSuspendedParser *s)
diff --git a/src/internal.h b/src/internal.h
index 7c3943c6..203e3412 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -332,12 +332,14 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p);
 void h_skip_bits(HInputStream* state, size_t count);
 void h_seek_bits(HInputStream* state, size_t pos);
 static inline size_t h_input_stream_pos(HInputStream* state) {
-  assert(state->index < SIZE_MAX / 8);
-  return state->index * 8 + state->bit_offset + state->margin;
+  assert(state->pos <= SIZE_MAX - state->index);
+  assert(state->pos + state->index < SIZE_MAX / 8);
+  return (state->pos + state->index) * 8 + state->bit_offset + state->margin;
 }
 static inline size_t h_input_stream_length(HInputStream *state) {
-  assert(state->length <= SIZE_MAX / 8);
-  return state->length * 8;
+  assert(state->pos <= SIZE_MAX - state->length);
+  assert(state->pos + state->length <= SIZE_MAX / 8);
+  return (state->pos + state->length) * 8;
 }
 // need to decide if we want to make this public. 
 HParseResult* h_do_parse(const HParser* parser, HParseState *state);
diff --git a/src/parsers/end.c b/src/parsers/end.c
index 35e4186d..754bb7f5 100644
--- a/src/parsers/end.c
+++ b/src/parsers/end.c
@@ -1,13 +1,19 @@
 #include "parser_internal.h"
 
-static HParseResult* parse_end(void *env, HParseState *state) {
-  if (state->input_stream.index == state->input_stream.length) {
+static HParseResult* parse_end(void *env, HParseState *state)
+{
+  if (state->input_stream.index < state->input_stream.length)
+    return NULL;
+
+  assert(state->input_stream.index == state->input_stream.length);
+  if (state->input_stream.last_chunk) {
     HParseResult *ret = a_new(HParseResult, 1);
     ret->ast = NULL;
     ret->bit_length = 0;
     ret->arena = state->arena;
     return ret;
   } else {
+    state->input_stream.overrun = true;	// need more input
     return NULL;
   }
 }
diff --git a/src/parsers/seek.c b/src/parsers/seek.c
index d5bc0284..e1459d80 100644
--- a/src/parsers/seek.c
+++ b/src/parsers/seek.c
@@ -25,6 +25,10 @@ static HParseResult *parse_seek(void *env, HParseState *state)
     pos = 0;
     break;
   case SEEK_END:
+    if (!stream->last_chunk) {	/* the end is not yet known! */
+      stream->overrun = true;	/* we need more input */
+      return NULL;
+    }
     pos = h_input_stream_length(stream);
     break;
   case SEEK_CUR:
diff --git a/src/t_parser.c b/src/t_parser.c
index b1988023..dbeaabde 100644
--- a/src/t_parser.c
+++ b/src/t_parser.c
@@ -632,6 +632,39 @@ static void test_iterative_lookahead(gconstpointer backend) {
   g_check_parse_chunks_failed_(p, "fo",2, "b",1);
 }
 
+static void test_iterative_seek(gconstpointer backend) {
+  HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
+  const HParser *p;
+
+  // seeking should work across chunk boundaries...
+
+  p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_ch('f'), NULL);
+  g_check_parse_chunks_match(p, be, "a",1, "bcdef",5, "(u0x61 u0x28 u0x66)");
+  g_check_parse_chunks_failed(p, be, "a",1, "bcdex",5);
+  g_check_parse_chunks_failed(p, be, "a",1, "bc",2);
+
+  p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_end_p(), NULL);
+  g_check_parse_chunks_match(p, be, "ab",2, "cde",3, "(u0x61 u0x28)");
+  g_check_parse_chunks_failed(p, be, "ab",2, "cdex",4);
+  g_check_parse_chunks_failed(p, be, "ab",2, "c",1);
+
+  p = h_sequence(h_ch('a'), h_seek(0, SEEK_END), h_end_p(), NULL);
+  g_check_parse_chunks_match(p, be, "abc",3, "de",2, "(u0x61 u0x28)");
+  g_check_parse_chunks_match(p, be, "abc",3, "",0, "(u0x61 u0x18)");
+
+  p = h_sequence(h_ch('a'), h_seek(-16, SEEK_END), h_ch('x'), NULL);
+  g_check_parse_chunks_match(p, be, "abcd",4, "xy",2, "(u0x61 u0x20 u0x78)");
+  g_check_parse_chunks_match(p, be, "abxy",4, "",0, "(u0x61 u0x10 u0x78)");
+  g_check_parse_chunks_failed(p, be, "a",1, "bc",2);
+  g_check_parse_chunks_failed(p, be, "",0, "x",1);
+
+  p = h_sequence(h_ch('a'), h_seek(32, SEEK_CUR), h_ch('f'), NULL);
+  g_check_parse_chunks_match(p, be, "abcde",5, "f",1, "(u0x61 u0x28 u0x66)");
+  g_check_parse_chunks_failed(p, be, "xbcde",5, "f",1);
+  g_check_parse_chunks_failed(p, be, "abcde",5, "x",1);
+  g_check_parse_chunks_failed(p, be, "abc",3, "",0);
+}
+
 static void test_iterative_result_length(gconstpointer backend) {
   HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
   HParser *p = h_token((uint8_t*)"foobar", 6);
@@ -1001,6 +1034,7 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/packrat/iterative/single", GINT_TO_POINTER(PB_PACKRAT), test_iterative_single);
   g_test_add_data_func("/core/parser/packrat/iterative/multi", GINT_TO_POINTER(PB_PACKRAT), test_iterative_multi);
   g_test_add_data_func("/core/parser/packrat/iterative/lookahead", GINT_TO_POINTER(PB_PACKRAT), test_iterative_lookahead);
+  g_test_add_data_func("/core/parser/packrat/iterative/seek", GINT_TO_POINTER(PB_PACKRAT), test_iterative_seek);
   g_test_add_data_func("/core/parser/packrat/iterative/result_length", GINT_TO_POINTER(PB_PACKRAT), test_iterative_result_length);
   g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip);
   g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek);
-- 
GitLab