From f1d6d0bc5efeb247b60af69200b905a733028d84 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Thu, 3 Sep 2015 16:24:47 +0200
Subject: [PATCH] split h_llk_parse into start/chunk/finish internally

---
 src/backends/llk.c | 117 ++++++++++++++++++++++++++++++++++-----------
 src/internal.h     |   6 +--
 2 files changed, 92 insertions(+), 31 deletions(-)

diff --git a/src/backends/llk.c b/src/backends/llk.c
index afccb745..af755944 100644
--- a/src/backends/llk.c
+++ b/src/backends/llk.c
@@ -259,44 +259,74 @@ void h_llk_free(HParser *parser)
 
 /* LL(k) driver */
 
-HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
+typedef struct {
+  HArena *arena;        // will hold the results
+  HArena *tarena;       // tmp, deleted after parse
+  HSlist *stack;
+  HCountedArray *seq;   // accumulates current parse result
+} HLLkState;
+
+// in order to construct the parse tree, we delimit the symbol stack into
+// frames corresponding to production right-hand sides. since only left-most
+// derivations are produced this linearization is unique.
+// the 'mark' allocated below simply reserves a memory address to use as the
+// frame delimiter.
+// nonterminals, instead of being popped and forgotten, are put back onto the
+// stack below the mark to tell us which validations and semantic actions to
+// execute on their corresponding result.
+// also on the stack below the mark, we store the previously accumulated
+// value for the surrounding production.
+static int dummy;
+static void *MARK = &dummy;   // stack frame delimiter
+
+static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser)
 {
   const HLLkTable *table = parser->backend_data;
   assert(table != NULL);
 
-  HArena *arena  = h_new_arena(mm__, 0);    // will hold the results
-  HArena *tarena = h_new_arena(mm__, 0);    // tmp, deleted after parse
-  HSlist *stack  = h_slist_new(tarena);
-  HCountedArray *seq = h_carray_new(arena); // accumulates current parse result
-
-  // in order to construct the parse tree, we delimit the symbol stack into
-  // frames corresponding to production right-hand sides. since only left-most
-  // derivations are produced this linearization is unique.
-  // the 'mark' allocated below simply reserves a memory address to use as the
-  // frame delimiter.
-  // nonterminals, instead of being popped and forgotten, are put back onto the
-  // stack below the mark to tell us which validations and semantic actions to
-  // execute on their corresponding result.
-  // also on the stack below the mark, we store the previously accumulated
-  // value for the surrounding production.
-  void *mark = h_arena_malloc(tarena, 1);
+  HLLkState *s = h_new(HLLkState, 1);
+  s->arena  = h_new_arena(mm__, 0);
+  s->tarena = h_new_arena(mm__, 0);
+  s->stack  = h_slist_new(s->tarena);
+  s->seq    = h_carray_new(s->arena);
 
   // initialize with the start symbol on the stack.
-  h_slist_push(stack, table->start);
+  h_slist_push(s->stack, table->start);
+
+  return s;
+}
+
+// returns partial result or NULL
+static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser,
+                                       HInputStream* stream, bool last_chunk)
+{
+  HParsedToken *tok = NULL;   // will hold result token
+  HCFChoice *x = NULL;        // current symbol (from top of stack)
+
+  const HLLkTable *table = parser->backend_data;
+  assert(table != NULL);
+
+  HArena *arena = s->arena;
+  HArena *tarena = s->tarena;
+  HSlist *stack = s->stack;
+  HCountedArray *seq = s->seq;
+
+  if(!seq)
+    return NULL;  // parse already failed
 
   // when we empty the stack, the parse is complete.
   while(!h_slist_empty(stack)) {
     // pop top of stack for inspection
-    HCFChoice *x = h_slist_pop(stack);
+    x = h_slist_pop(stack);
     assert(x != NULL);
 
-    if(x != mark && x->type == HCF_CHOICE) {
+    if(x != MARK && x->type == HCF_CHOICE) {
       // x is a nonterminal; apply the appropriate production and continue
 
       // push stack frame
       h_slist_push(stack, seq);   // save current partial value
       h_slist_push(stack, x);     // save the nonterminal
-      h_slist_push(stack, mark);  // frame delimiter
+      h_slist_push(stack, MARK);  // frame delimiter
 
       // open a fresh result sequence
       seq = h_carray_new(arena);
@@ -319,11 +349,10 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
     }
 
     // the top of stack is such that there will be a result...
-    HParsedToken *tok;  // will hold result token
     tok = h_arena_malloc(arena, sizeof(HParsedToken));
     tok->index = stream->index;
     tok->bit_offset = stream->bit_offset;
-    if(x == mark) {
+    if(x == MARK) {
       // hit stack frame boundary...
       // wrap the accumulated parse result, this sequence is finished
       tok->token_type = TT_SEQUENCE;
@@ -344,13 +373,15 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
       case HCF_END:
         if(!stream->overrun)
           goto no_parse;
+        if(!last_chunk)
+          goto need_input;
         h_arena_free(arena, tok);
         tok = NULL;
         break;
 
       case HCF_CHAR:
         if(stream->overrun)
-          goto no_parse;
+          goto need_input;
         if(input != x->chr)
           goto no_parse;
         tok->token_type = TT_UINT;
@@ -359,7 +390,7 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
 
       case HCF_CHARSET:
         if(stream->overrun)
-          goto no_parse;
+          goto need_input;
         if(!charset_isset(x->charset, input))
           goto no_parse;
         tok->token_type = TT_UINT;
@@ -388,16 +419,46 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream*
     h_carray_append(seq, tok);
   }
 
+  // success
   // since we started with a single nonterminal on the stack, seq should
   // contain exactly the parse result.
   assert(seq->used == 1);
-  h_delete_arena(tarena);
-  return make_result(arena, seq->elements[0]);
+  return seq;
 
  no_parse:
-  h_delete_arena(tarena);
   h_delete_arena(arena);
+  s->arena = NULL;
   return NULL;
+
+ need_input:
+  if(last_chunk)
+    goto no_parse;
+  h_arena_free(arena, tok); // no result, yet
+  h_slist_push(stack, x);   // try this symbol again next time
+  return seq;
+}
+
+static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s)
+{
+  HParseResult *res = NULL;
+
+  if(s->seq) {
+    assert(s->seq->used == 1);
+    res = make_result(s->arena, s->seq->elements[0]);
+  }
+
+  h_delete_arena(s->tarena);
+  h_free(s);
+  return res;
+}
+
+HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
+{
+  HLLkState *s = llk_parse_start_(mm__, parser);
+
+  s->seq = llk_parse_chunk_(s, parser, stream, true /* last chunk */);
+
+  return llk_parse_finish_(mm__, s);
 }
 
 
diff --git a/src/internal.h b/src/internal.h
index 8c799765..fa781811 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -225,13 +225,13 @@ typedef struct HParserBackendVTable_ {
   void (*free)(HParser* parser);
 
   void (*parse_start)(HSuspendedParser *s);
-    // parse_start should allocate backend_state.
+    // parse_start should allocate s->backend_state.
   void (*parse_chunk)(HSuspendedParser *s, HInputStream *input);
     // when parse_chunk leaves input.overrun unset, parse is done. else:
-    // parse_chunk MUST consume all input, integrating it into backend_state.
+    // parse_chunk MUST consume all input, integrating it into s->backend_state.
     // calling parse_chunk again after parse is done should have no effect.
   HParseResult *(*parse_finish)(HSuspendedParser *s);
-    // parse_finish must free backend_state.
+    // parse_finish must free s->backend_state.
 } HParserBackendVTable;
 
 
-- 
GitLab