From c32cf709b2e51924f8f1f91693febc1856fedc43 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Fri, 21 Jun 2013 20:11:19 +0200
Subject: [PATCH] eliminate the right stack; work with the HInputStream
 directly

---
 src/backends/glr.c | 15 ++++----
 src/backends/lr.c  | 87 +++++++++++++++++++++++-----------------------
 src/backends/lr.h  | 12 +++----
 3 files changed, 54 insertions(+), 60 deletions(-)

diff --git a/src/backends/glr.c b/src/backends/glr.c
index d460e8af..7a5f8f51 100644
--- a/src/backends/glr.c
+++ b/src/backends/glr.c
@@ -29,15 +29,14 @@ HLREngine *fork_engine(const HLREngine *engine)
   HLREngine *eng2 = h_arena_malloc(engine->tarena, sizeof(HLREngine));
   eng2->table = engine->table;
   eng2->state = engine->state;
+  eng2->input = engine->input;
 
-  // shallow-copy the stacks
+  // shallow-copy the stack
   // this works because h_slist_push and h_slist_pop never modify
   // the underlying structure of HSlistNodes, only the head pointer.
   // in fact, this gives us prefix sharing for free.
-  eng2->left = h_arena_malloc(engine->tarena, sizeof(HSlist));
-  eng2->right = h_arena_malloc(engine->tarena, sizeof(HSlist));
-  *eng2->left = *engine->left;
-  *eng2->right = *engine->right;
+  eng2->stack = h_arena_malloc(engine->tarena, sizeof(HSlist));
+  *eng2->stack = *engine->stack;
 
   eng2->arena = engine->arena;
   eng2->tarena = engine->tarena;
@@ -54,7 +53,7 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
   HArena *tarena = h_new_arena(mm__, 0);    // tmp, deleted after parse
 
   HSlist *engines = h_slist_new(tarena);
-  h_slist_push(engines, h_lrengine_new(arena, tarena, table));
+  h_slist_push(engines, h_lrengine_new(arena, tarena, table, stream));
 
   HParseResult *result = NULL;
   while(result == NULL && !h_slist_empty(engines)) {
@@ -75,7 +74,7 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
         continue;
       }
 
-      const HLRAction *action = h_lrengine_action(engine, stream);
+      const HLRAction *action = h_lrengine_action(engine);
 
       // fork engine on conflicts
       if(action && action->type == HLR_CONFLICT) {
@@ -120,8 +119,6 @@ HParserBackendVTable h__glr_backend_vtable = {
 
 
 // XXX TODO
-// - eliminate right stack by always doing a shift after reduce
-//   (shift should always follow reduce because rightmost)
 // - split tables into
 //   - one mapping input bytes to actions (shift or reduce or conflict)
 //   - one mapping reduced-to lhs nonterminals to shift states
diff --git a/src/backends/lr.c b/src/backends/lr.c
index 2603ff2a..f33aab82 100644
--- a/src/backends/lr.c
+++ b/src/backends/lr.c
@@ -202,65 +202,64 @@ h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol)
   }
 }
 
-HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table)
+HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
+                          const HInputStream *stream)
 {
   HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine));
 
   engine->table = table;
   engine->state = 0;
   engine->run = true;
-  engine->left = h_slist_new(tarena);
-  engine->right = h_slist_new(tarena);
+  engine->stack = h_slist_new(tarena);
+  engine->input = *stream;
   engine->arena = arena;
   engine->tarena = tarena;
 
   return engine;
 }
 
-const HLRAction *h_lrengine_action(HLREngine *engine, HInputStream *stream)
+const HLRAction *h_lrengine_action(const HLREngine *engine)
 {
-  HSlist *right = engine->right;
-  HArena *arena = engine->arena;
   HArena *tarena = engine->tarena;
 
-  // make sure there is input on the right stack
-  if(h_slist_empty(right)) {
-    // XXX use statically-allocated terminal symbols
-    HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice));
-    HParsedToken *v;
-
-    uint8_t c = h_read_bits(stream, 8, false);
+  // XXX use statically-allocated terminal symbols
+  HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice));
 
-    if(stream->overrun) {     // end of input
-      x->type = HCF_END;
-      v = NULL;
-    } else {
-      x->type = HCF_CHAR;
-      x->chr = c;
-      v = h_arena_malloc(arena, sizeof(HParsedToken));
-      v->token_type = TT_UINT;
-      v->uint = c;
-    }
+  HInputStream lookahead = engine->input;
+  uint8_t c = h_read_bits(&lookahead, 8, false);
 
-    h_slist_push(right, v);
-    h_slist_push(right, x);
+  if(lookahead.overrun) {     // end of input
+    x->type = HCF_END;
+  } else {
+    x->type = HCF_CHAR;
+    x->chr = c;
   }
 
-  // peek at input symbol on the right side
-  HCFChoice *symbol = right->head->elem;
+  return h_lr_lookup(engine->table, engine->state, x);
+}
+
+static HParsedToken *consume_input(HLREngine *engine)
+{
+  HParsedToken *v;
 
-  // table lookup
-  const HLRAction *action = h_lr_lookup(engine->table, engine->state, symbol);
+  uint8_t c = h_read_bits(&engine->input, 8, false);
 
-  return action;
+  if(engine->input.overrun) {     // end of input
+    v = NULL;
+  } else {
+    v = h_arena_malloc(engine->arena, sizeof(HParsedToken));
+    v->token_type = TT_UINT;
+    v->uint = c;
+  }
+
+  return v;
 }
 
 // run LR parser for one round; returns false when finished
 static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
 {
   // short-hand names
-  HSlist *left = engine->left;
-  HSlist *right = engine->right;
+  HSlist *stack = engine->stack;
   HArena *arena = engine->arena;
   HArena *tarena = engine->tarena;
 
@@ -278,11 +277,11 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
     value->token_type = TT_SEQUENCE;
     value->seq = h_carray_new_sized(arena, len);
     
-    // pull values off the left stack, rewinding state accordingly
+    // pull values off the stack, rewinding state accordingly
     HParsedToken *v = NULL;
     for(size_t i=0; i<len; i++) {
-      v = h_slist_drop(left);
-      engine->state = (uintptr_t)h_slist_drop(left);
+      v = h_slist_drop(stack);
+      engine->state = (uintptr_t)h_slist_drop(stack);
 
       // collect values in result sequence
       value->seq->elements[len-1-i] = v;
@@ -315,17 +314,17 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
     assert(shift->type == HLR_SHIFT);
 
     // piggy-back the shift right here, never touching the input
-    h_slist_push(left, (void *)(uintptr_t)engine->state);
-    h_slist_push(left, value);
+    h_slist_push(stack, (void *)(uintptr_t)engine->state);
+    h_slist_push(stack, value);
     engine->state = shift->nextstate;
 
     if(symbol == engine->table->start)
       return false;     // reduced to start symbol; accept!
   } else {
     assert(action->type == HLR_SHIFT);
-    h_slist_push(left, (void *)(uintptr_t)engine->state);
-    h_slist_drop(right);                      // symbol (discard)
-    h_slist_push(left, h_slist_drop(right));   // semantic value
+    HParsedToken *value = consume_input(engine);
+    h_slist_push(stack, (void *)(uintptr_t)engine->state);
+    h_slist_push(stack, value);
     engine->state = action->nextstate;
   }
 
@@ -341,9 +340,9 @@ void h_lrengine_step(HLREngine *engine, const HLRAction *action)
 HParseResult *h_lrengine_result(HLREngine *engine)
 {
   // parsing was successful iff after a shift the engine is back in state 0
-  if(engine->state == 0 && !h_slist_empty(engine->left)) {
+  if(engine->state == 0 && !h_slist_empty(engine->stack)) {
     // on top of the stack is the start symbol's semantic value
-    HParsedToken *tok = engine->left->head->elem;
+    HParsedToken *tok = engine->stack->head->elem;
     return make_result(engine->arena, tok);
   } else {
     return NULL;
@@ -358,11 +357,11 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
 
   HArena *arena  = h_new_arena(mm__, 0);    // will hold the results
   HArena *tarena = h_new_arena(mm__, 0);    // tmp, deleted after parse
-  HLREngine *engine = h_lrengine_new(arena, tarena, table);
+  HLREngine *engine = h_lrengine_new(arena, tarena, table, stream);
 
   // iterate engine to completion
   while(engine->run)
-    h_lrengine_step(engine, h_lrengine_action(engine, stream));
+    h_lrengine_step(engine, h_lrengine_action(engine));
 
   HParseResult *result = h_lrengine_result(engine);
   if(!result)
diff --git a/src/backends/lr.h b/src/backends/lr.h
index 5e2f0329..f76bd33f 100644
--- a/src/backends/lr.h
+++ b/src/backends/lr.h
@@ -70,11 +70,8 @@ typedef struct HLREngine_ {
   size_t state;
   bool run;
 
-  // stack layout:
-  // on the left stack, we put pairs:  (saved state, semantic value)
-  // on the right stack, we put pairs: (symbol, semantic value)
-  HSlist *left;     // left stack; reductions happen here
-  HSlist *right;    // right stack; input appears here
+  HSlist *stack;        // holds pairs: (saved state, semantic value)
+  HInputStream input;
 
   HArena *arena;    // will hold the results
   HArena *tarena;   // tmp, deleted after parse
@@ -108,7 +105,8 @@ HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark);
 HLRState *h_lrstate_new(HArena *arena);
 HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows);
 void h_lrtable_free(HLRTable *table);
-HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table);
+HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
+                          const HInputStream *stream);
 HLRAction *h_reduce_action(HArena *arena, const HLRItem *item);
 HLRAction *h_shift_action(HArena *arena, size_t nextstate);
 HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new);
@@ -128,7 +126,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params);
 void h_lalr_free(HParser *parser);
 
 const HLRAction *h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol);
-const HLRAction *h_lrengine_action(HLREngine *engine, HInputStream *stream);
+const HLRAction *h_lrengine_action(const HLREngine *engine);
 void h_lrengine_step(HLREngine *engine, const HLRAction *action);
 HParseResult *h_lrengine_result(HLREngine *engine);
 HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
-- 
GitLab