From 5a57adabab0744474d4cd46f2cfe88d5c729ffc4 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Sat, 15 Jun 2013 20:30:57 +0200
Subject: [PATCH] some refactoring trying to get h_many to work

---
 src/backends/lalr.c | 46 ++++++++++++++++++++++-----------------------
 src/cfgrammar.c     |  6 ++++++
 src/cfgrammar.h     |  1 +
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/src/backends/lalr.c b/src/backends/lalr.c
index 54f6dc8e..0dd1d070 100644
--- a/src/backends/lalr.c
+++ b/src/backends/lalr.c
@@ -207,16 +207,14 @@ static HLRItem *advance_mark(HArena *arena, const HLRItem *item)
   return ret;
 }
 
-static HHashSet *closure(HCFGrammar *g, const HHashSet *items)
+static void expand_to_closure(HCFGrammar *g, HHashSet *items)
 {
   HAllocator *mm__ = g->mm__;
   HArena *arena = g->arena;
-  HHashSet *ret = h_lrstate_new(arena);
   HSlist *work = h_slist_new(arena);
 
   // initialize work list with items
   H_FOREACH_KEY(items, HLRItem *item)
-      h_hashset_put(ret, item);
       h_slist_push(work, (void *)item);
   H_END_FOREACH
 
@@ -231,22 +229,22 @@ static HHashSet *closure(HCFGrammar *g, const HHashSet *items)
       if(sym->type == HCF_CHOICE) {
         for(HCFSequence **p=sym->seq; *p; p++) {
           HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0);
-          if(!h_hashset_present(ret, it)) {
-            h_hashset_put(ret, it);
+          if(!h_hashset_present(items, it)) {
+            h_hashset_put(items, it);
             h_slist_push(work, it);
           }
         }
       } else {  // HCF_CHARSET
         for(unsigned int i=0; i<256; i++) {
           if(charset_isset(sym->charset, i)) {
-            // XXX allocatethese single-character symbols statically somewhere
+            // XXX allocate these single-character symbols statically somewhere
             HCFChoice **rhs = h_new(HCFChoice *, 2);
             rhs[0] = h_new(HCFChoice, 1);
             rhs[0]->type = HCF_CHAR;
             rhs[0]->chr = i;
             rhs[1] = NULL;
             HLRItem *it = h_lritem_new(arena, sym, rhs, 0);
-            h_hashset_put(ret, it);
+            h_hashset_put(items, it);
             // single-character item needs no further work
           }
         }
@@ -254,17 +252,8 @@ static HHashSet *closure(HCFGrammar *g, const HHashSet *items)
         // this seems as good a place as any to set it
         sym->reshape = h_act_first;
       }
-
-      // if sym derives epsilon, also advance over it
-      if(h_derives_epsilon(g, sym)) {
-        HLRItem *it = advance_mark(arena, item);
-        h_hashset_put(ret, it);
-        h_slist_push(work, it);
-      }
     }
   }
-
-  return ret;
 }
 
 HLRDFA *h_lr0_dfa(HCFGrammar *g)
@@ -287,15 +276,16 @@ HLRDFA *h_lr0_dfa(HCFGrammar *g)
   assert(g->start->type == HCF_CHOICE);
   for(HCFSequence **p=g->start->seq; *p; p++)
     h_hashset_put(start, h_lritem_new(arena, g->start, (*p)->items, 0));
+  expand_to_closure(g, start);
   h_hashtable_put(states, start, 0);
   h_slist_push(work, start);
   h_slist_push(work, 0);
   
   // while work to do (on some state)
-  //   compute closure
   //   determine edge symbols
   //   for each edge symbol:
   //     advance respective items -> destination state (kernel)
+  //     compute closure
   //     if destination is a new state:
   //       add it to state set
   //       add transition to it
@@ -308,8 +298,8 @@ HLRDFA *h_lr0_dfa(HCFGrammar *g)
     // maps edge symbols to neighbor states (item sets) of s
     HHashTable *neighbors = h_hashtable_new(arena, eq_symbol, hash_symbol);
 
-    // iterate over closure and generate neighboring sets
-    H_FOREACH_KEY(closure(g, state), HLRItem *item)
+    // iterate over state (closure) and generate neighboring sets
+    H_FOREACH_KEY(state, HLRItem *item)
       HCFChoice *sym = item->rhs[item->mark]; // symbol after mark
 
       if(sym != NULL) { // mark was not at the end
@@ -325,8 +315,10 @@ HLRDFA *h_lr0_dfa(HCFGrammar *g)
       }
     H_END_FOREACH
 
-    // merge neighbor sets into the set of existing states
+    // merge expanded neighbor sets into the set of existing states
     H_FOREACH(neighbors, HCFChoice *symbol, HLRState *neighbor)
+      expand_to_closure(g, neighbor);
+
       // look up existing state, allocate new if not found
       size_t neighbor_idx;
       if(!h_hashset_present(states, neighbor)) {
@@ -528,8 +520,9 @@ static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRDFA *dfa,
 
   HLREnhGrammar *eg = h_arena_malloc(arena, sizeof(HLREnhGrammar));
   eg->tmap = h_hashtable_new(arena, eq_transition, hash_transition);
-  eg->smap = h_hashtable_new(arena, eq_symbol, hash_symbol);
+  eg->smap = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr);
   eg->corr = h_hashtable_new(arena, eq_symbol, hash_symbol);
+  // XXX must use h_eq/hash_ptr for symbols! so enhanced CHARs are different
   eg->arena = arena;
 
   // establish mapping between transitions and symbols
@@ -663,6 +656,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
             // contribution to the lookahead
             const HStringMap *fs = h_follow(1, eg->grammar, lhs);
             assert(fs != NULL);
+            assert(fs->epsilon_branch == NULL);
+            assert(!h_stringmap_empty(fs));
 
             // for each lookahead symbol, put action into table cell
             if(fs->end_branch) {
@@ -734,6 +729,7 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
   while(1) {
     // make sure there is input on the right stack
     if(h_slist_empty(right)) {
+      // XXX use statically-allocated terminal symbols
       HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice));
       HParsedToken *v;
 
@@ -841,7 +837,7 @@ void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item)
   HCFChoice **x = item->rhs;
   HCFChoice **mark = item->rhs + item->mark;
   if(*x == NULL) {
-    fputs("\"\"", f);
+    fputc('.', f);
   } else {
     while(*x) {
       if(x == mark)
@@ -986,12 +982,14 @@ int test_lalr(void)
         | 'n'               -- also try [0-9] for the charset paths
   */
 
+#if 0
   HParser *n = h_ch('n');
   HParser *E = h_indirect();
   HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
   HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
   h_bind_indirect(E, E_);
-  HParser *p = h_sequence(E, NULL);
+#endif
+  HParser *p = h_choice(h_many(h_ch('x')), h_ch('n'), NULL); //h_sequence(E, NULL);
 
   printf("\n==== G R A M M A R ====\n");
   HCFGrammar *g = h_cfgrammar(&system_allocator, p);
@@ -1024,7 +1022,7 @@ int test_lalr(void)
   h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);
 
   printf("\n==== P A R S E  R E S U L T ====\n");
-  HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
+  HParseResult *res = h_parse(p, (uint8_t *)"xxn-(n-((n)))-n", 13);
   if(res)
     h_pprint(stdout, res->ast, 0, 2);
   else
diff --git a/src/cfgrammar.c b/src/cfgrammar.c
index a874236d..199ef5f1 100644
--- a/src/cfgrammar.c
+++ b/src/cfgrammar.c
@@ -331,6 +331,12 @@ bool h_stringmap_present_epsilon(const HStringMap *m)
   return (m->epsilon_branch != NULL);
 }
 
+bool h_stringmap_empty(const HStringMap *m)
+{
+  return (m->epsilon_branch == NULL
+          && m->end_branch == NULL
+          && h_hashtable_empty(m->char_branches));
+}
 
 const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
 {
diff --git a/src/cfgrammar.h b/src/cfgrammar.h
index c70c68a3..57f6f68b 100644
--- a/src/cfgrammar.h
+++ b/src/cfgrammar.h
@@ -49,6 +49,7 @@ void h_stringmap_replace(HStringMap *m, void *old, void *new);
 void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end);
 bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
 bool h_stringmap_present_epsilon(const HStringMap *m);
+bool h_stringmap_empty(const HStringMap *m);
 
 static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
  { return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
-- 
GitLab