From 127600425054788c121fa4be3831cf09d2c636d5 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Fri, 4 Sep 2015 21:05:56 +0200
Subject: [PATCH] handle suspend on lookahead at the very end of the chunk

---
 src/backends/llk.c | 27 ++++++++++++++++-----------
 src/cfgrammar.c    | 11 ++++++++---
 src/cfgrammar.h    |  3 +++
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/backends/llk.c b/src/backends/llk.c
index 9acf67ec..95289975 100644
--- a/src/backends/llk.c
+++ b/src/backends/llk.c
@@ -296,7 +296,7 @@ static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser)
   return s;
 }
 
-// returns partial result or NULL
+// returns partial result or NULL (no parse)
 static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
                                        HInputStream* stream)
 {
@@ -316,6 +316,8 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
 
   // when we empty the stack, the parse is complete.
   while(!h_slist_empty(stack)) {
+    tok = NULL;
+
     // pop top of stack for inspection
     x = h_slist_pop(stack);
     assert(x != NULL);
@@ -323,22 +325,24 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
     if(x != MARK && x->type == HCF_CHOICE) {
       // x is a nonterminal; apply the appropriate production and continue
 
-      // push stack frame
-      h_slist_push(stack, seq);   // save current partial value
-      h_slist_push(stack, x);     // save the nonterminal
-      h_slist_push(stack, MARK);  // frame delimiter
-
-      // open a fresh result sequence
-      seq = h_carray_new(arena);
-
       // look up applicable production in parse table
       const HCFSequence *p = h_llk_lookup(table, x, stream);
       if(p == NULL)
         goto no_parse;
+      if(p == H_NEED_INPUT)
+        goto need_input;
 
       // an infinite loop case that shouldn't happen
       assert(!p->items[0] || p->items[0] != x);
 
+      // push stack frame
+      h_slist_push(stack, seq);   // save current partial value
+      h_slist_push(stack, x);     // save the nonterminal
+      h_slist_push(stack, MARK);  // frame delimiter
+
+      // open a fresh result sequence
+      seq = h_carray_new(arena);
+
       // push production's rhs onto the stack (in reverse order)
       HCFChoice **s;
       for(s = p->items; *s; s++);
@@ -433,8 +437,9 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
  need_input:
   if(stream->last_chunk)
     goto no_parse;
-  h_arena_free(arena, tok); // no result, yet
-  h_slist_push(stack, x);   // try this symbol again next time
+  if(tok)
+    h_arena_free(arena, tok);   // no result, yet
+  h_slist_push(stack, x);       // try this symbol again next time
   return seq;
 }
 
diff --git a/src/cfgrammar.c b/src/cfgrammar.c
index a8761b8d..117009a5 100644
--- a/src/cfgrammar.c
+++ b/src/cfgrammar.c
@@ -349,6 +349,7 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en
   return m->epsilon_branch;
 }
 
+// A NULL result means no parse. H_NEED_INPUT means lookahead is too short.
 void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
 {
   while(m) {
@@ -362,9 +363,13 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead)
     // reading bits from it does not consume them from the real input.
     uint8_t c = h_read_bits(&lookahead, 8, false);
     
-    if (lookahead.overrun) {     // end of input
-      // XXX assumption of byte-wise grammar and input
-      return m->end_branch;
+    if (lookahead.overrun) {        // end of chunk
+      if (lookahead.last_chunk) {   // end of input
+        // XXX assumption of byte-wise grammar and input
+        return m->end_branch;
+      } else {
+        return H_NEED_INPUT;
+      }
     }
 
     // no match yet, descend
diff --git a/src/cfgrammar.h b/src/cfgrammar.h
index 9cefc62e..2294d445 100644
--- a/src/cfgrammar.h
+++ b/src/cfgrammar.h
@@ -56,6 +56,9 @@ bool h_stringmap_empty(const HStringMap *m);
 static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
  { return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
 
+// dummy return value used by h_stringmap_get_lookahead when out of input
+#define H_NEED_INPUT ((void *)&h_stringmap_get_lookahead)
+
 
 /* Convert 'parser' into CFG representation by desugaring and compiling the set
  * of nonterminals.
-- 
GitLab