diff --git a/src/backends/glr.c b/src/backends/glr.c
index 411171b797df8f913fba764f8c89fecd16914ac9..34a56335881b1abbda27f15011a3d4842c8cd5a3 100644
--- a/src/backends/glr.c
+++ b/src/backends/glr.c
@@ -1,6 +1,9 @@
 #include <assert.h>
 #include "lr.h"
 
+static bool glr_step(HParseResult **result, HLREngine **engines,
+                     HLREngine *engine, const HLRAction *action);
+
 
 /* GLR compilation (LALR w/o failing on conflict) */
 
@@ -22,7 +25,86 @@ void h_glr_free(HParser *parser)
 }
 
 
-/* GLR driver */
+/* Merging engines (when they converge on the same state) */
+
+static HLREngine *lrengine_merge(HLREngine *old, HLREngine *new)
+{
+  HArena *arena = old->arena;
+
+  HLREngine *ret = h_arena_malloc(arena, sizeof(HLREngine));
+
+  assert(old->state == new->state);
+  assert(old->input.input == new->input.input);
+
+  *ret = *old;
+  ret->stack = h_slist_new(arena);
+  ret->merged[0] = old;
+  ret->merged[1] = new;
+
+  return ret;
+}
+
+static HSlist *demerge_stack(HSlistNode *bottom, HSlist *stack)
+{
+  HArena *arena = stack->arena;
+
+  HSlist *ret = h_slist_new(arena);
+
+  // copy the stack from the top
+  HSlistNode **y = &ret->head;
+  for(HSlistNode *x=stack->head; x; x=x->next) {
+    HSlistNode *node = h_arena_malloc(arena, sizeof(HSlistNode));
+    node->elem = x->elem;
+    node->next = NULL;
+    *y = node;
+    y = &node->next;
+  }
+  *y = bottom;  // attach the ancestor stack
+
+  return ret;
+}
+
+static inline HLREngine *respawn(HLREngine *eng, HSlist *stack)
+{
+  // NB: this can be a destructive update because an engine is not used for
+  // anything after it is merged.
+  eng->stack = demerge_stack(eng->stack->head, stack);
+  return eng;
+}
+
+static HLREngine *
+demerge(HParseResult **result, HLREngine **engines,
+        HLREngine *engine, const HLRAction *action, size_t depth)
+{
+  // no-op on engines that are not merged
+  if(!engine->merged[0])
+    return engine;
+
+  HSlistNode *p = engine->stack->head;
+  for(size_t i=0; i<depth; i++) {
+    // if stack hits bottom, respawn ancestors
+    if(p == NULL) {
+      HLREngine *a = respawn(engine->merged[0], engine->stack);
+      HLREngine *b = respawn(engine->merged[1], engine->stack);
+
+      // continue demerge until final depth reached
+      a = demerge(result, engines, a, action, depth-i);
+      b = demerge(result, engines, b, action, depth-i);
+      
+      // step and stow one ancestor...
+      glr_step(result, engines, a, action);
+
+      // ...and return the other
+      return b;
+    }
+    p = p->next;
+  }
+
+  return engine;    // there is enough stack before the merge point
+}
+
+
+/* Forking engines (on conflicts */
 
 HLREngine *fork_engine(const HLREngine *engine)
 {
@@ -43,14 +125,9 @@ HLREngine *fork_engine(const HLREngine *engine)
   return eng2;
 }
 
-static void stow_engine(HSlist *engines, HLREngine *engine)
-{
-  // XXX switch to one engine per state, and do the merge here
-  h_slist_push(engines, engine);
-}
-
-static const HLRAction *handle_conflict(HSlist *engines, const HLREngine *engine,
-                                        const HSlist *branches)
+static const HLRAction *
+handle_conflict(HParseResult **result, HLREngine **engines,
+                const HLREngine *engine, const HSlist *branches)
 {
   // there should be at least two conflicting actions
   assert(branches->head);
@@ -61,63 +138,46 @@ static const HLRAction *handle_conflict(HSlist *engines, const HLREngine *engine
     HLRAction *act = x->elem; 
     HLREngine *eng = fork_engine(engine);
 
-    // perform one step and add to list
-    h_lrengine_step(eng, act);
-    stow_engine(engines, eng);
+    // perform one step and add to engines
+    glr_step(result, engines, eng, act);
   } 
 
   // return first action for use with original engine
   return branches->head->elem;
 }
 
-static HSlist *demerge_stack(HSlistNode *bottom, HSlistNode *mp, HSlist *stack)
-{
-  HArena *arena = stack->arena;
 
-  HSlist *ret = h_slist_new(arena);
-
-  // copy the stack from the top
-  HSlistNode **y = &ret->head;
-  for(HSlistNode *x=stack->head; x && x!=mp; x=x->next) {
-    HSlistNode *node = h_arena_malloc(arena, sizeof(HSlistNode));
-    node->elem = x->elem;
-    node->next = NULL;
-    *y = node;
-    y = &node->next;
-  }
-  *y = bottom;  // attach the ancestor stack
-
-  return ret;
-}
+/* GLR driver */
 
-static void demerge(HSlist *engines, HLREngine *engine,
-                    const HLRAction *action, size_t depth)
+static bool glr_step(HParseResult **result, HLREngine **engines,
+                     HLREngine *engine, const HLRAction *action)
 {
-  // no-op on engines that are not merged
-  if(!engine->merged)
-    return;
-
-  HSlistNode *p = engine->stack->head;
-  for(size_t i=0; i<depth; i++) {
-    // if stack hits mergepoint, respawn ancestor
-    if(p == engine->mp) {
-      HLREngine *eng = engine->merged;
-      eng->stack = demerge_stack(eng->stack->head, engine->mp, engine->stack);
-      demerge(engines, eng, action, depth-i);
-      
-      // call step and stow on restored ancestor
-      h_lrengine_step(eng, action);
-      stow_engine(engines, eng);
-      break;
+  // handle forks and demerges (~> spawn engines)
+  if(action) {
+    if(action->type == HLR_CONFLICT) {
+      // fork engine on conflicts
+      action = handle_conflict(result, engines, engine, action->branches);
+    } else if(action->type == HLR_REDUCE) {
+      // demerge/respawn as needed
+      size_t depth = action->production.length;
+      engine = demerge(result, engines, engine, action, depth);
     }
-    p = p->next;
   }
-}
 
-static inline void
-handle_demerge(HSlist *engines, HLREngine *engine, const HLRAction *reduce)
-{
-  demerge(engines, engine, reduce, reduce->production.length);
+  bool run = h_lrengine_step(engine, action);
+  
+  if(run) {
+    // store engine in the array, merge if necessary
+    if(engines[engine->state] == NULL)
+      engines[engine->state] = engine;
+    else
+      engines[engine->state] = lrengine_merge(engines[engine->state], engine);
+  } else if(engine->state == HLR_SUCCESS) {
+    // save the result
+    *result = h_lrengine_result(engine);
+  }
+
+  return run;
 }
 
 HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream)
@@ -129,43 +189,42 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
   HArena *arena  = h_new_arena(mm__, 0);    // will hold the results
   HArena *tarena = h_new_arena(mm__, 0);    // tmp, deleted after parse
 
-  HSlist *engines = h_slist_new(tarena);
-  h_slist_push(engines, h_lrengine_new(arena, tarena, table, stream));
+  // allocate engine arrays (can hold one engine per state)
+  // these are swapped each iteration
+  HLREngine **engines = h_arena_malloc(tarena, table->nrows * sizeof(HLREngine *));
+  HLREngine **engback = h_arena_malloc(tarena, table->nrows * sizeof(HLREngine *));
 
-  HParseResult *result = NULL;
-  while(result == NULL && !h_slist_empty(engines)) {
-    for(HSlistNode **x = &engines->head; *x; ) {
-      HLREngine *engine = (*x)->elem;
+  assert(table->nrows > 0);
+  for(size_t i=0; i<table->nrows; i++) {
+    engines[i] = NULL;
+    engback[i] = NULL;
+  }
 
-      // remove engine from list; it may come back in below
-      *x = (*x)->next;    // advance x, removing the current element
+  // create initial engine
+  engines[0] = h_lrengine_new(arena, tarena, table, stream);
+  assert(engines[0]->state == 0);
 
-      // drop those engines that have terminated
-      if(!engine->run) {
-        // check for parse success
-        HParseResult *res = h_lrengine_result(engine);
-        if(res)
-          result = res;
+  HParseResult *result = NULL;
+  size_t engines_left = 1;
+  while(engines_left && result == NULL) {
+    engines_left = 0;
 
+    for(size_t i=0; i<table->nrows; i++) {
+      HLREngine *engine = engines[i];
+      if(engine == NULL)
         continue;
-      }
-
-      const HLRAction *action = h_lrengine_action(engine);
-
-      // handle forks and demerges (~> spawn engines)
-      if(action) {
-        if(action->type == HLR_CONFLICT) {
-          // fork engine on conflicts
-          action = handle_conflict(engines, engine, action->branches);
-        } else if(action->type == HLR_REDUCE) {
-          // demerge/respawn as needed
-          handle_demerge(engines, engine, action);
-        }
-      }
-
-      h_lrengine_step(engine, action);
-      stow_engine(engines, engine);
+      engines[i] = NULL;    // cleared for next iteration
+
+      // step all engines
+      bool run = glr_step(&result, engback, engine, h_lrengine_action(engine));
+      if(run)
+        engines_left++;
     }
+
+    // swap the arrays
+    HLREngine **tmp = engines;
+    engines = engback;
+    engback = tmp;
   }
 
   if(!result)
@@ -184,13 +243,6 @@ HParserBackendVTable h__glr_backend_vtable = {
 
 
 
-// XXX TODO
-// - implement engine merging
-//   - triggered when two enter the same state
-//   - old stacks (/engines?) saved
-//   - new common suffix stack created
-//   - when rewinding (during reduce), watch for empty stack -> demerge
-
 
 // dummy!
 int test_glr(void)
diff --git a/src/backends/lr.c b/src/backends/lr.c
index c481d291fe881249b4c2b2aa17cff945ab289f06..4c89d19d70030a1475c1a1e3cccc92ed58bdef58 100644
--- a/src/backends/lr.c
+++ b/src/backends/lr.c
@@ -206,11 +206,10 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table,
 
   engine->table = table;
   engine->state = 0;
-  engine->run = true;
   engine->stack = h_slist_new(tarena);
   engine->input = *stream;
-  engine->merged = NULL;
-  engine->mp = NULL;
+  engine->merged[0] = NULL;
+  engine->merged[1] = NULL;
   engine->arena = arena;
   engine->tarena = tarena;
 
@@ -267,7 +266,7 @@ static HParsedToken *consume_input(HLREngine *engine)
 }
 
 // run LR parser for one round; returns false when finished
-static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
+bool h_lrengine_step(HLREngine *engine, const HLRAction *action)
 {
   // short-hand names
   HSlist *stack = engine->stack;
@@ -329,8 +328,11 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
     h_slist_push(stack, value);
     engine->state = shift->nextstate;
 
-    if(symbol == engine->table->start)
-      return false;     // reduced to start symbol; accept!
+    // check for success
+    if(engine->state == HLR_SUCCESS) {
+      assert(symbol == engine->table->start);
+      return false;
+    }
   } else {
     assert(action->type == HLR_SHIFT);
     HParsedToken *value = consume_input(engine);
@@ -342,17 +344,12 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action)
   return true;
 }
 
-// run LR parser for one round; sets engine->run
-void h_lrengine_step(HLREngine *engine, const HLRAction *action)
-{
-  engine->run = h_lrengine_step_(engine, action);
-}
-
 HParseResult *h_lrengine_result(HLREngine *engine)
 {
-  // parsing was successful iff after a shift the engine is back in state 0
-  if(engine->state == 0 && !h_slist_empty(engine->stack)) {
+  // parsing was successful iff the engine reaches the end state
+  if(engine->state == HLR_SUCCESS) {
     // on top of the stack is the start symbol's semantic value
+    assert(!h_slist_empty(engine->stack));
     HParsedToken *tok = engine->stack->head->elem;
     return make_result(engine->arena, tok);
   } else {
@@ -371,8 +368,7 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
   HLREngine *engine = h_lrengine_new(arena, tarena, table, stream);
 
   // iterate engine to completion
-  while(engine->run)
-    h_lrengine_step(engine, h_lrengine_action(engine));
+  while(h_lrengine_step(engine, h_lrengine_action(engine)));
 
   HParseResult *result = h_lrengine_result(engine);
   if(!result)
@@ -464,7 +460,10 @@ void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action)
 {
   switch(action->type) {
   case HLR_SHIFT:
-    fprintf(f, "s%lu", action->nextstate);
+    if(action->nextstate == HLR_SUCCESS)
+      fputs("s~", f);
+    else
+      fprintf(f, "s%lu", action->nextstate);
     break;
   case HLR_REDUCE:
     fputs("r(", f);
diff --git a/src/backends/lr.h b/src/backends/lr.h
index ab48633514bf6565924198bc723a42ca96662c15..8f1eadd9059330b23c77e58aedfd680690b07950 100644
--- a/src/backends/lr.h
+++ b/src/backends/lr.h
@@ -69,18 +69,18 @@ typedef struct HLREnhGrammar_ {
 typedef struct HLREngine_ {
   const HLRTable *table;
   size_t state;
-  bool run;
 
   HSlist *stack;        // holds pairs: (saved state, semantic value)
   HInputStream input;
 
-  struct HLREngine_ *merged;    // ancestor merged into this engine at mp
-  HSlistNode *mp;               // mergepoint: stack->head at time of merge
+  struct HLREngine_ *merged[2]; // ancestors merged into this engine
 
   HArena *arena;        // will hold the results
   HArena *tarena;       // tmp, deleted after parse
 } HLREngine;
 
+#define HLR_SUCCESS ((size_t)~0)    // parser end state
+
 
 // XXX move to internal.h or something
 // XXX replace other hashtable iterations with this
@@ -131,7 +131,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params);
 void h_lalr_free(HParser *parser);
 
 const HLRAction *h_lrengine_action(const HLREngine *engine);
-void h_lrengine_step(HLREngine *engine, const HLRAction *action);
+bool h_lrengine_step(HLREngine *engine, const HLRAction *action);
 HParseResult *h_lrengine_result(HLREngine *engine);
 HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
 HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream);
diff --git a/src/backends/lr0.c b/src/backends/lr0.c
index 5add53a212b2000e54fb6e8b2d50fae41514c535..1c86484e61300ec40362a9abb47105424ddff2b9 100644
--- a/src/backends/lr0.c
+++ b/src/backends/lr0.c
@@ -190,10 +190,8 @@ HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
   // remember start symbol
   table->start = g->start;
 
-  // add dummy shift entry for the start symbol so h_lrengine_step can always
-  // find a shift.
-  // NB: nextstate=0 is used for the "victory condition" by h_lrengine_result.
-  put_shift(table, 0, g->start, 0);
+  // shift to the accepting end state for the start symbol
+  put_shift(table, 0, g->start, HLR_SUCCESS);
 
   // add shift entries
   for(HSlistNode *x = dfa->transitions->head; x; x = x->next) {