From fd297b636c05c6e46882485131d8f519c6fcb954 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Thu, 13 Jun 2013 14:45:26 +0200
Subject: [PATCH] add LALR conflict resolution (untested)

---
 src/backends/lalr.c | 164 ++++++++++++++++++++++++++++++++++----------
 src/cfgrammar.c     |   5 ++
 src/cfgrammar.h     |   1 +
 3 files changed, 135 insertions(+), 35 deletions(-)

diff --git a/src/backends/lalr.c b/src/backends/lalr.c
index aad47d4d..7e83c538 100644
--- a/src/backends/lalr.c
+++ b/src/backends/lalr.c
@@ -52,6 +52,13 @@ typedef struct HLRTable_ {
   HAllocator *mm__;
 } HLRTable;
 
+typedef struct HLREnhGrammar_ {
+  HCFGrammar *grammar;  // enhanced grammar
+  HHashTable *tmap;     // maps transitions to enhanced-grammar symbols
+  HHashTable *smap;     // maps enhanced-grammar symbols to transitions
+  HArena *arena;
+} HLREnhGrammar;
+
 
 // compare symbols - terminals by value, others by pointer
 static bool eq_symbol(const void *p, const void *q)
@@ -410,14 +417,10 @@ static HLRAction *reduce_action(HArena *arena, const HLRItem *item)
   return action;
 }
 
-HLRTable *h_lr0_table(HCFGrammar *g)
+HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa)
 {
   HAllocator *mm__ = g->mm__;
 
-  // construct LR(0) DFA
-  HLRDFA *dfa = h_lr0_dfa(g);
-  if(!dfa) return NULL;
-
   HLRTable *table = h_lrtable_new(mm__, dfa->nstates);
   HArena *arena = table->arena;
 
@@ -475,13 +478,13 @@ static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
   return action->nextstate;
 }
 
-static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
+static HCFChoice *transform_symbol(const HLRTable *table, HLREnhGrammar *eg,
                                    size_t x, HCFChoice *B, size_t z);
 
-static HCFChoice *transform_productions(const HLRTable *table, HHashTable *map,
+static HCFChoice *transform_productions(const HLRTable *table, HLREnhGrammar *eg,
                                          size_t x, HCFChoice *xAy)
 {
-  HArena *arena = map->arena;
+  HArena *arena = eg->arena;
 
   HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq)
                                             * sizeof(HCFSequence *));
@@ -494,7 +497,7 @@ static HCFChoice *transform_productions(const HLRTable *table, HHashTable *map,
     HCFChoice **xBz = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *));
     for(; *B; B++, xBz++) {
       size_t z = follow_transition(table, x, *B);
-      *xBz = transform_symbol(table, map, x, *B, z);
+      *xBz = transform_symbol(table, eg, x, *B, z);
       x=z;
     }
     *xBz = NULL;
@@ -518,21 +521,22 @@ static inline HLRTransition *transition(HArena *arena,
   return t;
 }
 
-static HCFChoice *transform_symbol(const HLRTable *table, HHashTable *map,
+static HCFChoice *transform_symbol(const HLRTable *table, HLREnhGrammar *eg,
                                    size_t x, HCFChoice *B, size_t z)
 {
-  HArena *arena = map->arena;
+  HArena *arena = eg->arena;
 
   // look up the transition in map, create symbol if not found
   HLRTransition *x_B_z = transition(arena, x, B, z);
-  HCFChoice *xBz = h_hashtable_get(map, x_B_z);
+  HCFChoice *xBz = h_hashtable_get(eg->tmap, x_B_z);
   if(!xBz) {
     HCFChoice *xBz = h_arena_malloc(arena, sizeof(HCFChoice));
     *xBz = *B;
-    h_hashtable_put(map, x_B_z, xBz);
+    h_hashtable_put(eg->tmap, x_B_z, xBz);
+    h_hashtable_put(eg->smap, xBz, x_B_z);
   }
 
-  return transform_productions(table, map, x, xBz);
+  return transform_productions(table, eg, x, xBz);
 }
 
 static bool eq_transition(const void *p, const void *q)
@@ -547,39 +551,71 @@ static HHashValue hash_transition(const void *p)
   return (h_hash_ptr(t->symbol) + t->from + t->to); // XXX ?
 }
 
-static HHashTable *enhance_grammar(const HCFGrammar *g, const HLRTable *tbl)
+static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRTable *tbl)
 {
+  HAllocator *mm__ = g->mm__;
   HArena *arena = g->arena; // XXX ?
-  HHashTable *map = h_hashtable_new(arena, eq_transition, hash_transition);
+
+  HLREnhGrammar *eg = h_arena_malloc(arena, sizeof(HLREnhGrammar));
+  eg->tmap = h_hashtable_new(arena, eq_transition, hash_transition);
+  eg->smap = h_hashtable_new(arena, eq_transition, hash_transition);
+  eg->arena = arena;
 
   // copy the start symbol over
   HCFChoice *start = h_arena_malloc(arena, sizeof(HCFChoice));
   *start = *(g->start);
-  h_hashtable_put(map, g->start, start);
 
-  transform_productions(tbl, map, 0, start);
+  transform_productions(tbl, eg, 0, start);
 
-  return map;
+  eg->grammar = h_cfgrammar_(mm__, start);
+  return eg;
 }
 
 
 
 /* LALR table generation */
 
-bool is_inadequate(HLRTable *table, size_t state)
+static inline bool has_conflicts(HLRTable *table)
 {
-  // XXX
-  return false;
+  return !h_slist_empty(table->inadeq);
 }
 
-bool has_conflicts(HLRTable *table)
+// place a new entry in tbl; records conflicts in tbl->inadeq
+// returns 0 on success, -1 on conflict
+// ignores forall entries
+int h_lrtable_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action)
 {
-  return !h_slist_empty(table->inadeq);
+  HLRAction *prev = h_hashtable_get(tbl->rows[state], x);
+  if(prev && prev != action) {
+    // conflict
+    h_slist_push(tbl->inadeq, (void *)(uintptr_t)state);
+    return -1;
+  } else {
+    h_hashtable_put(tbl->rows[state], x, action);
+    return 0;
+  }
+}
+
+// check whether a sequence of enhanced-grammar symbols (p) matches the given
+// (original-grammar) production rhs and terminates in the given end state.
+bool match_production(HLREnhGrammar *eg, HCFChoice **p,
+                      HCFChoice **rhs, size_t endstate)
+{
+  HLRTransition *t;
+  for(; *p && *rhs; p++, rhs++) {
+    t = h_hashtable_get(eg->smap, *p);
+    assert(t != NULL);
+    if(!eq_symbol(t->symbol, *rhs))
+      return false;
+  }
+  return (*p == *rhs    // both NULL
+          && t->to == endstate);
 }
 
 int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
 {
   // generate CFG from parser
+  // construct LR(0) DFA
   // build LR(0) table
   // if necessary, resolve conflicts "by conversion to SLR"
 
@@ -587,21 +623,79 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
   if(g == NULL)     // backend not suitable (language not context-free)
     return -1;
 
-  HLRTable *table = h_lr0_table(g);
-  if(table == NULL) // this should normally not happen
+  HLRDFA *dfa = h_lr0_dfa(g);
+  if(dfa == NULL) {     // this should normally not happen
+    h_cfgrammar_free(g);
     return -1;
+  }
+
+  HLRTable *table = h_lr0_table(g, dfa);
+  if(table == NULL) {   // this should normally not happen
+    h_cfgrammar_free(g);
+    return -1;
+  }
 
   if(has_conflicts(table)) {
-    HHashTable *map = enhance_grammar(g, table);
-    if(map == NULL) // this should normally not happen
+    HArena *arena = table->arena;
+
+    HLREnhGrammar *eg = enhance_grammar(g, table);
+    if(eg == NULL) {    // this should normally not happen
+      h_cfgrammar_free(g);
+      h_lrtable_free(table);
       return -1;
+    }
+
+    // go through the inadequate states; replace inadeq with a new list
+    HSlist *inadeq = table->inadeq;
+    table->inadeq = h_slist_new(arena);
+    
+    for(HSlistNode *x=inadeq->head; x; x=x->next) {
+      size_t state = (uintptr_t)x->elem;
+      
+      // clear old forall entry, it's being replaced by more fine-grained ones
+      table->forall[state] = NULL;
 
-    // XXX resolve conflicts
-    // iterate over dfa's transitions where 'from' state is inadequate
-    //   look up enhanced symbol corr. to the transition
-    //   for each terminal in follow set of enh. symbol:
-    //     put reduce action into table cell (state, terminal)
-    //     conflict if already occupied
+      // go through each reducible item of state
+      H_FOREACH_KEY(dfa->states[state], HLRItem *item)
+        if(item->mark < item->len)
+          continue;
+
+        // action to place in the table cells indicated by lookahead
+        HLRAction *action = reduce_action(arena, item);
+
+        // find all LR(0)-enhanced productions matching item
+        H_FOREACH(eg->smap, HCFChoice *lhs, HLRTransition *t)
+          if(t->symbol != item->lhs)
+            continue;
+          for(HCFSequence **p=lhs->seq; *p; p++) {
+            HCFChoice **rhs = (*p)->items;
+            if(!match_production(eg, rhs, item->rhs, state))
+              continue;
+
+            // the left-hand symbol's follow set is this production's
+            // contribution to the lookahead
+            const HStringMap *fs = h_follow(1, eg->grammar, lhs);
+            assert(fs != NULL);
+
+            // for each lookahead symbol, put action into table cell
+            if(fs->end_branch) {
+              HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
+              terminal->type = HCF_END;
+              h_lrtable_put(table, state, terminal, action);
+            }
+            H_FOREACH(fs->char_branches, void *key, HStringMap *m)
+              if(!m->epsilon_branch)
+                continue;
+
+              HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice));
+              terminal->type = HCF_CHAR; 
+              terminal->chr = key_char((HCharKey)key);
+
+              h_lrtable_put(table, state, terminal, action);
+            H_END_FOREACH  // lookahead character
+        } H_END_FOREACH // enhanced production
+      H_END_FOREACH  // reducible item
+    }
   }
 
   h_cfgrammar_free(g);
@@ -924,7 +1018,7 @@ int test_lalr(void)
     fprintf(stderr, "h_lalr_dfa failed\n");
 
   printf("\n==== L R ( 0 )  T A B L E ====\n");
-  HLRTable *table0 = h_lr0_table(g);
+  HLRTable *table0 = h_lr0_table(g, dfa);
   if(table0)
     h_pprint_lrtable(stdout, g, table0, 0);
   else
diff --git a/src/cfgrammar.c b/src/cfgrammar.c
index bc7b3582..a874236d 100644
--- a/src/cfgrammar.c
+++ b/src/cfgrammar.c
@@ -51,6 +51,11 @@ HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
   if(desugared == NULL)
     return NULL;  // -> backend not suitable for this parser
 
+  return h_cfgrammar_(mm__, desugared);
+}
+
+HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
+{
   HCFGrammar *g = h_cfgrammar_new(mm__);
 
   // recursively traverse the desugared form and collect all HCFChoices that
diff --git a/src/cfgrammar.h b/src/cfgrammar.h
index 62b3320c..c70c68a3 100644
--- a/src/cfgrammar.h
+++ b/src/cfgrammar.h
@@ -59,6 +59,7 @@ static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_
  * A NULL return means we are unable to represent the parser as a CFG.
  */
 HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser);
+HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *start);
 
 HCFGrammar *h_cfgrammar_new(HAllocator *mm__);
 
-- 
GitLab