From a72aff9b3924ba37f4cfee4ac2cebc913f8f30e8 Mon Sep 17 00:00:00 2001
From: Mikael Vejdemo-Johansson <michiexile@gmail.com>
Date: Wed, 8 Apr 2015 17:32:12 +0200
Subject: [PATCH] refactored out the grammar handling. handles epsilons better

---
 examples/SConscript |   5 +-
 examples/grammar.c  | 148 ++++++++++++++++++++++++++++++++++++++++++++
 examples/grammar.h  |  46 ++++++++++++++
 examples/ties.c     | 141 +----------------------------------------
 4 files changed, 197 insertions(+), 143 deletions(-)
 create mode 100644 examples/grammar.c
 create mode 100644 examples/grammar.h

diff --git a/examples/SConscript b/examples/SConscript
index 456545c2..06947216 100644
--- a/examples/SConscript
+++ b/examples/SConscript
@@ -7,6 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
 base64 = example.Program('base64', 'base64.c')
 base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
 base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
-singular = example.Program('explore_singular', 'explore_singular.c')
-ties = example.Program('ties', 'ties.c')
-env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular, ties])
\ No newline at end of file
+ties = example.Program('ties', ['ties.c', 'grammar.c'])
+env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])
\ No newline at end of file
diff --git a/examples/grammar.c b/examples/grammar.c
new file mode 100644
index 00000000..f722edf0
--- /dev/null
+++ b/examples/grammar.c
@@ -0,0 +1,148 @@
+// Generates a system of equations for generating functions from a grammar.
+//
+// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
+//
+
+// If a desugared parser has user_data set, the generating function systems will try
+// to interpret it as a string:
+//
+// If this string for an h_ch starts with the character 0, then that character
+// will have weight 0 in the generating function.
+//
+// Use the remaining string to set the preferred name of that parser in the
+// generating function.
+//
+
+#include <inttypes.h>
+#include "../src/backends/contextfree.h"
+#include "../src/backends/lr.h"
+#include "grammar.h"
+#include <stdio.h>
+
+const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) {
+  if(nt->user_data != NULL) {
+    if(*(char*)(nt->user_data) != '0') {
+      // user_data is a non-empty string
+      return nt->user_data;
+    } else {
+      return nt->user_data+1;
+    }
+  }
+  
+  static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits
+
+  // find nt's number in g
+  size_t n = (uintptr_t)h_hashtable_get(g->nts, nt);
+
+  // NB the start symbol (number 0) is always "A".
+  int i;
+  for(i=14; i>=0 && (n>0 || i==14); i--) {
+    buf[i] = 'A' + n%26;
+    n = n/26;   // shift one digit
+  }
+
+  return buf+i+1;
+}
+
+
+
+void readsequence(FILE *file, uint32_t *count, uint32_t *length,
+		  const HCFGrammar *g, const HCFSequence *seq) {
+  // tally up numbers of choices, and lengths of emitted strings.
+  // Immediately emit any nonterminals encountered.
+  HCFChoice** x = seq->items;
+  
+  fprintf(file, "1");
+  if (*x == NULL) {
+    // empty sequence
+    // GF is 1
+    return;
+  } else {
+    char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0;
+    HCharset cs;
+    unsigned int i, cscount=0;
+    for(; *x; x++) {
+      switch((*x)->type) {
+      case HCF_CHAR:
+	if(!(has_user_data && *(char*)(*x)->user_data == '0')) {
+	  (*length)++;
+	}
+	break;
+      case HCF_END:
+	break;
+      case HCF_CHARSET:
+	cs = (*x)->charset;
+	for(i=0; i<256; i++) {
+	  if (charset_isset(cs, i)) {
+	    cscount++;
+	  }
+	}
+	*count *= cscount;
+	break;
+      default: // HCF_CHOICE, non-terminal symbol
+	fprintf(file, "*%s(t)", nonterminal_name(g, *x));
+	break;
+      }
+    }
+  }
+}
+
+// For each nt in g->nts
+//     For each choice in nt->key->seq
+//          For all elements in sequence
+//              Accumulate counts 
+//              Accumulate string lengths
+//              Emit count*t^length
+void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) {
+  if (g->nts->used < 1) {
+    return;
+  }
+
+  // determine maximum string length of symbol names
+  int len;
+  size_t s;
+  for(len=1, s=26; s < g->nts->used; len++, s*=26); 
+
+  // iterate over g->nts
+  size_t i;
+  HHashTableEntry *hte;
+  for(i=0; i < g->nts->capacity; i++) {
+    for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
+      if (hte->key == NULL) {
+        continue;
+      }
+
+      const HCFChoice *nt = hte->key;
+      fprintf(file, "%s(t) = ", nonterminal_name(g, nt));
+
+      
+      for(HCFSequence **seq = nt->seq; *seq; seq++) {
+	if (seq != nt->seq) {
+	  fprintf(file, " + ");
+	}
+	uint32_t count=1, length=0;
+	readsequence(file, &count, &length, g, *seq);
+	if(count == 1) {
+	  if(length == 1) {
+	    fprintf(file, "*t");
+	  }
+	  if(length > 1) {
+	    fprintf(file, "*t^%d", length);
+	  }
+	} else if(count > 1) {
+	  if(length == 0) {
+	    fprintf(file, "*%d", count);
+	  }
+	  if(length == 1) {
+	    fprintf(file, "*%d*t", count);
+	  }
+	  if (length > 1) {
+	    fprintf(file, "*%d*t^%d", count, length);
+	  } 
+	}
+      }
+
+      fprintf(file, "\n");
+    }
+  }
+}
diff --git a/examples/grammar.h b/examples/grammar.h
new file mode 100644
index 00000000..b42eced4
--- /dev/null
+++ b/examples/grammar.h
@@ -0,0 +1,46 @@
+// Generates a system of equations for generating functions from a grammar.
+//
+// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
+//
+
+// Currently does absolutely no elegance, no caching of information, but rather
+// just prints the generating functions to a provided FILE*.
+//
+
+
+// If a desugared parser has user_data set, the generating function systems will try
+// to interpret it as a string:
+//
+// If this string for an h_ch starts with the character 0, then that character
+// will have weight 0 in the generating function.
+//
+// Use the remaining string to set the preferred name of that parser in the
+// generating function.
+//
+
+#ifndef HAMMER_GRAMMAR__H
+#define HAMMER_GRAMMAR__H
+
+#include "../src/backends/contextfree.h"
+#include "../src/backends/lr.h"
+
+
+// Filched from cfgrammar.c this function extracts the name from user_data if it
+// is set; otherwise assigns a name automatically from its position in some
+// ordering of non-terminals.
+const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt);
+
+// This function prints out the monomial generated by a single HCFSequence
+// It returns the resulting exponent for t in length and the number of alternatives
+// accumulated in length. The monomial is (mostly) printed out to the provided FILE*,
+// the caller is responsible for adding a scalar and a power of t to the printout.
+void readsequence(FILE *file, uint32_t *count, uint32_t *length,
+		  const HCFGrammar *g, const HCFSequence *seq);
+
+// This function walks through a grammar and generates an equation for each
+// production rule. The results are printed out to the provided FILE*.
+void h_pprint_gfeqns(FILE *file, const HCFGrammar *g);
+
+
+
+#endif
diff --git a/examples/ties.c b/examples/ties.c
index a379b5c7..09f6b707 100644
--- a/examples/ties.c
+++ b/examples/ties.c
@@ -1,6 +1,3 @@
-//
-// Created by Mikael Vejdemo Johansson on 4/7/15.
-//
 // Intention: read in a parser, generate the system of equations for its
 // generating functions
 //
@@ -8,148 +5,12 @@
 #include <inttypes.h>
 #include "../src/backends/contextfree.h"
 #include "../src/backends/lr.h"
+#include "grammar.h"
 #include <stdio.h>
 
 
 HAllocator *mm__;
 
-// If a parser has user_data set, the generating function systems will try
-// to interpret it as a string:
-//
-// If this string for an h_ch starts with the character 0, then that character
-// will have weight 0 in the generating function.
-//
-// Use the remaining string to set the preferred name of that parser in the
-// generating function.
-
-
-
-static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) {
-  if(nt->user_data != NULL) {
-    if(*(char*)(nt->user_data) != '0') {
-      // user_data is a non-empty string
-      return nt->user_data;
-    } else {
-      return nt->user_data+1;
-    }
-  }
-  
-  static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits
-
-  // find nt's number in g
-  size_t n = (uintptr_t)h_hashtable_get(g->nts, nt);
-
-  // NB the start symbol (number 0) is always "A".
-  int i;
-  for(i=14; i>=0 && (n>0 || i==14); i--) {
-    buf[i] = 'A' + n%26;
-    n = n/26;   // shift one digit
-  }
-
-  return buf+i+1;
-}
-
-
-
-void readsequence(FILE *file, uint32_t *count, uint32_t *length,
-		  const HCFGrammar *g, const HCFSequence *seq) {
-  // tally up numbers of choices, and lengths of emitted strings.
-  // Immediately emit any nonterminals encountered.
-  HCFChoice** x = seq->items;
-  
-  if (*x == NULL) {
-    return;
-  } else {
-    char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0;
-    fprintf(file, "1");
-    HCharset cs;
-    unsigned int i, cscount=0;
-    for(; *x; x++) {
-      switch((*x)->type) {
-      case HCF_CHAR:
-	if(!(has_user_data && *(char*)(*x)->user_data == '0')) {
-	  (*length)++;
-	}
-	break;
-      case HCF_END:
-	break;
-      case HCF_CHARSET:
-	cs = (*x)->charset;
-	for(i=0; i<256; i++) {
-	  if (charset_isset(cs, i)) {
-	    cscount++;
-	  }
-	}
-	*count *= cscount;
-	break;
-      default: // HCF_CHOICE, non-terminal symbol
-	fprintf(file, "*%s(t)", nonterminal_name(g, *x));
-	break;
-      }
-    }
-  }
-}
-
-// For each nt in g->nts
-//     For each choice in nt->key->seq
-//          For all elements in sequence
-//              Accumulate counts 
-//              Accumulate string lengths
-//              Emit count*t^length
-void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) {
-  if (g->nts->used < 1) {
-    return;
-  }
-
-  // determine maximum string length of symbol names
-  int len;
-  size_t s;
-  for(len=1, s=26; s < g->nts->used; len++, s*=26); 
-
-  // iterate over g->nts
-  size_t i;
-  HHashTableEntry *hte;
-  for(i=0; i < g->nts->capacity; i++) {
-    for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
-      if (hte->key == NULL) {
-        continue;
-      }
-
-      const HCFChoice *nt = hte->key;
-      fprintf(file, "%s(t) = ", nonterminal_name(g, nt));
-
-      
-      for(HCFSequence **seq = nt->seq; *seq; seq++) {
-	if (seq != nt->seq) {
-	  fprintf(file, " + ");
-	}
-	uint32_t count=1, length=0;
-	readsequence(file, &count, &length, g, *seq);
-	if(count == 1) {
-	  if(length == 1) {
-	    fprintf(file, "*t");
-	  }
-	  if(length > 1) {
-	    fprintf(file, "*t^%d", length);
-	  }
-	} else if(count > 1) {
-	  if(length == 0) {
-	    fprintf(file, "*%d", count);
-	  }
-	  if(length == 1) {
-	    fprintf(file, "*%d*t", count);
-	  }
-	  if (length > 1) {
-	    fprintf(file, "*%d*t^%d", count, length);
-	  } 
-	}
-      }
-
-      fprintf(file, "\n");
-    }
-  }
-}
-
 HParser* cfExample() {
   HParser *n = h_ch('n');
   HParser *E = h_indirect();
-- 
GitLab