From fbdd2b7613efae11bbbb24cd8f007c739608a9cc Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Mon, 17 Mar 2014 23:46:14 +0100
Subject: [PATCH] pull saved position into HParserCacheValue and fix segfault
 in grow()

---
 src/backends/packrat.c | 81 ++++++++++++++++++++++++++----------------
 src/internal.h         | 10 ++----
 src/t_parser.c         | 15 +++++++-
 3 files changed, 67 insertions(+), 39 deletions(-)

diff --git a/src/backends/packrat.c b/src/backends/packrat.c
index 87f166de..afd35775 100644
--- a/src/backends/packrat.c
+++ b/src/backends/packrat.c
@@ -3,10 +3,22 @@
 #include "../internal.h"
 #include "../parsers/parser_internal.h"
 
-// short-hand for constructing HCachedResult's
-static HCachedResult *cached_result(const HParseState *state, HParseResult *result) {
-  HCachedResult *ret = a_new(HCachedResult, 1);
-  ret->result = result;
+// short-hand for creating cache values (regular case)
+static
+HParserCacheValue * cached_result(HParseState *state, HParseResult *result) {
+  HParserCacheValue *ret = a_new(HParserCacheValue, 1);
+  ret->value_type = PC_RIGHT;
+  ret->right = result;
+  ret->input_stream = state->input_stream;
+  return ret;
+}
+
+// short-hand for caching parse results (left recursion case)
+static
+HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) {
+  HParserCacheValue *ret = a_new(HParserCacheValue, 1);
+  ret->value_type = PC_LEFT;
+  ret->left = lr;
   ret->input_stream = state->input_stream;
   return ret;
 }
@@ -52,9 +64,7 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
       // Nothing in the cache, and the key parser is not involved
       HParseResult *tmp = a_new(HParseResult, 1);
       tmp->ast = NULL; tmp->arena = state->arena;
-      HParserCacheValue *ret = a_new(HParserCacheValue, 1);
-      ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp);
-      return ret;
+      return cached_result(state, tmp);
     }
     if (h_slist_find(head->eval_set, k->parser)) {
       // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. 
@@ -64,7 +74,8 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
       if (!cached)
 	cached = a_new(HParserCacheValue, 1);
       cached->value_type = PC_RIGHT;
-      cached->right = cached_result(state, tmp_res);
+      cached->right = tmp_res;
+      cached->input_stream = state->input_stream;
     }
     return cached;
   }
@@ -83,14 +94,28 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) {
     some->eval_set = NULL;
     rec_detect->head = some;
   }
-  //assert(state->lr_stack->head != NULL);
-  HSlistNode *head = state->lr_stack->head;
-  HLeftRec *lr;
-  while (head && (lr = head->elem)->rule != p) {
+
+  // XXX is it ok for lr_stack to be empty here?! (we used to have an assert
+  //     saying it was not.)
+  HSlistNode *it;
+  for(it=state->lr_stack->head; it; it=it->next) {
+    HLeftRec *lr = it->elem;
+
+    if(lr->rule == p)
+      break;
+
     lr->head = rec_detect->head;
     h_slist_push(lr->head->involved_set, (void*)lr->rule);
-    head = head->next;
+      // XXX we are assuming that involved_set does not contain p, yet,
+      //     or ignoring that fact. is this correct?
   }
+  //assert(it != NULL);  // we should always find p (XXX unless lr_stack empty?)
+}
+
+// helper: true iff pos1 is less than pos2
+static inline bool pos_lt(HInputStream pos1, HInputStream pos2) {
+  return ((pos1.index < pos2.index) ||
+	  (pos1.index == pos2.index && pos1.bit_offset < pos2.bit_offset));
 }
 
 /* If recall() returns NULL, we need to store a dummy failure in the cache and compute the
@@ -103,25 +128,22 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head)
   HParserCacheValue *old_cached = h_hashtable_get(state->cache, k);
   if (!old_cached || PC_LEFT == old_cached->value_type)
     errx(1, "impossible match");
-  HParseResult *old_res = old_cached->right->result;
+  HParseResult *old_res = old_cached->right;
   
   // reset the eval_set of the head of the recursion at each beginning of growth
   head->eval_set = h_slist_copy(head->involved_set);
   HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
 
   if (tmp_res) {
-    if ((old_res->ast->index < tmp_res->ast->index) || 
-	(old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) {
-      HParserCacheValue *v = a_new(HParserCacheValue, 1);
-      v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res);
-      h_hashtable_put(state->cache, k, v);
+    if (pos_lt(old_cached->input_stream, state->input_stream)) {
+      h_hashtable_put(state->cache, k, cached_result(state, tmp_res));
       return grow(k, state, head);
     } else {
       // we're done with growing, we can remove data from the recursion head
       h_hashtable_del(state->recursion_heads, k);
       HParserCacheValue *cached = h_hashtable_get(state->cache, k);
       if (cached && PC_RIGHT == cached->value_type) {
-	return cached->right->result;
+	return cached->right;
       } else {
 	errx(1, "impossible match");
       }
@@ -140,9 +162,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab
     }
     else {
       // update cache
-      HParserCacheValue *v = a_new(HParserCacheValue, 1);
-      v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed);
-      h_hashtable_put(state->cache, k, v);
+      h_hashtable_put(state->cache, k, cached_result(state, growable->seed));
       if (!growable->seed)
 	return NULL;
       else
@@ -165,18 +185,17 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
     base->seed = NULL; base->rule = parser; base->head = NULL;
     h_slist_push(state->lr_stack, base);
     // cache it
-    HParserCacheValue *dummy = a_new(HParserCacheValue, 1);
-    dummy->value_type = PC_LEFT; dummy->left = base;
-    h_hashtable_put(state->cache, key, dummy);
+    HParserCacheValue *cached = cached_lr(state, base);
+    h_hashtable_put(state->cache, key, cached);
     // parse the input
     HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
     // the base variable has passed equality tests with the cache
     h_slist_pop(state->lr_stack);
+    // update the cached value to our new position
+    cached->input_stream = state->input_stream;
     // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
     if (NULL == base->head) {
-      HParserCacheValue *right = a_new(HParserCacheValue, 1);
-      right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res);
-      h_hashtable_put(state->cache, key, right);
+      h_hashtable_put(state->cache, key, cached_result(state, tmp_res));
       return tmp_res;
     } else {
       base->seed = tmp_res;
@@ -185,12 +204,12 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
     }
   } else {
     // it exists!
+    state->input_stream = m->input_stream;
     if (PC_LEFT == m->value_type) {
       setupLR(parser, state, m->left);
       return m->left->seed; // BUG: this might not be correct
     } else {
-      state->input_stream = m->right->input_stream;
-      return m->right->result;
+      return m->right;
     }
   }
 }
diff --git a/src/internal.h b/src/internal.h
index 056a5afc..85cd4dbc 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -255,21 +255,17 @@ typedef struct HLeftRec_ {
   HRecursionHead *head;
 } HLeftRec;
 
-/* Result and remaining input, for rerunning from a cached position. */
-typedef struct HCachedResult_ {
-  HParseResult *result;
-  HInputStream input_stream;
-} HCachedResult;
-
 /* Tagged union for values in the cache: either HLeftRec's (Left) or 
  * HParseResult's (Right).
+ * Includes the position (input_stream) to advance to after using this value.
  */
 typedef struct HParserCacheValue_t {
   HParserCacheValueType value_type;
   union {
     HLeftRec *left;
-    HCachedResult *right;
+    HParseResult *right;
   };
+  HInputStream input_stream;
 } HParserCacheValue;
 
 // This file provides the logical inverse of bitreader.c
diff --git a/src/t_parser.c b/src/t_parser.c
index e2eca978..23fc5cb1 100644
--- a/src/t_parser.c
+++ b/src/t_parser.c
@@ -419,6 +419,18 @@ static void test_leftrec(gconstpointer backend) {
   g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)");
 }
 
+static void test_leftrec_nonempty(gconstpointer backend) {
+  HParser *a_ = h_ch('a');
+
+  HParser *lr_ = h_indirect();
+  h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL));
+
+  g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61");
+  g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)");
+  g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)");
+  g_check_parse_failed(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0);
+}
+
 static void test_rightrec(gconstpointer backend) {
   HParser *a_ = h_ch('a');
 
@@ -485,7 +497,8 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and);
   g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not);
   g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore);
-  //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
+  g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
+  g_test_add_data_func("/core/parser/packrat/leftrec_nonempty", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_nonempty);
   g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
 
   g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
-- 
GitLab