From 107d8c092b9b38417f48af1e66f3df72a5e2fc8b Mon Sep 17 00:00:00 2001
From: Mikael Vejdemo-Johansson <michiexile@gmail.com>
Date: Tue, 7 Apr 2015 22:13:18 +0200
Subject: [PATCH] Generating function skeleton code

---
 examples/SConscript         |   3 +-
 examples/explore_singular.c | 244 ++++++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+), 1 deletion(-)
 create mode 100644 examples/explore_singular.c

diff --git a/examples/SConscript b/examples/SConscript
index 0932bdac..07c6e515 100644
--- a/examples/SConscript
+++ b/examples/SConscript
@@ -7,4 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
 base64 = example.Program('base64', 'base64.c')
 base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
 base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
-env.Alias("examples", [dns, base64, base64_sem1, base64_sem2])
\ No newline at end of file
+singular = example.Program('explore_singular', 'explore_singular.c')
+env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular])
\ No newline at end of file
diff --git a/examples/explore_singular.c b/examples/explore_singular.c
new file mode 100644
index 00000000..3c751451
--- /dev/null
+++ b/examples/explore_singular.c
@@ -0,0 +1,244 @@
+//
+// Created by Mikael Vejdemo Johansson on 4/7/15.
+//
+// Intention: read in a parser, generate the system of equations for its
+// generating functions
+//
+
+#include <inttypes.h>
+#include "../src/backends/contextfree.h"
+#include "../src/backends/lr.h"
+#include <stdio.h>
+
+void h_pprint_gfexpr(FILE *file, const HCFGrammar *g, HCFSequence *seq) {
+  HCFChoice **x = seq->items;
+  
+  if (*x == NULL) { // empty sequence
+    fprintf(file, "1\n");
+  } else {
+    while (*x) {
+      if (x != seq->items) {
+	fprintf(file, " + ");
+      }
+      // consume items
+      // if a string, 
+      //    count its length
+      //    output t^length
+
+      if ((*x)->type == HCF_CHAR) {
+	uint32_t count = 0;
+	for(; *x; x++, count++) {
+	  if ((*x)->type != HCF_CHAR) {
+	    break;
+	  }
+	}
+	fprintf(file, "t^%d", count);
+      } else {
+	uint32_t count=0, n, i=0;
+	switch((*x)->type) {
+	case HCF_CHAR:
+	  // should not be possible
+	  break;
+	case HCF_END:
+	  // does not generate any output symbols: value 0
+	  break;
+	case HCF_CHARSET:
+	  for(i=0; i<256; i++) {
+	    if (charset_isset((*x)->charset, i)) {
+	      count++;
+	    }
+	  }
+	  fprintf(file, "%d*t", count);
+	  break;
+	default:
+	  n = (uint8_t)(uintptr_t)h_hashtable_get(g->nts, x);
+
+	  fprintf(file, "%c(t)", 'A'+n);
+	}
+        x++;
+      }
+    }
+  }
+}
+
+      
+void h_pprint_gfeqns_NOTUSED(FILE *file, const HCFGrammar *g) {
+  if (g->nts->used < 1) {
+    return;
+  }
+
+  // determine maximum string length of symbol names
+  int len;
+  size_t s;
+  for(len=1, s=26; s < g->nts->used; len++, s*=26); 
+
+  // iterate over g->nts
+  size_t i;
+  HHashTableEntry *hte;
+  for(i=0; i < g->nts->capacity; i++) {
+    for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
+      if (hte->key == NULL) {
+        continue;
+      }
+      const HCFChoice *lhs = hte->key;        // production's left-hand symbol
+      assert(lhs->type == HCF_CHOICE);
+
+      uint8_t n = (uint8_t)(uintptr_t)h_hashtable_get(g->nts, lhs);
+      fprintf(file, "%c(t) = ", 'A'+n);
+
+      HCFSequence **p = lhs->seq;
+      if (*p == NULL) {
+	return;          // shouldn't happen
+      }
+
+      h_pprint_gfexpr(file, g, *p);
+      for(; *p; p++) {
+	fprintf(file, "\t");
+	h_pprint_gfexpr(file, g, *p);      
+	fprintf(file, "\n");
+      }
+    }
+  }
+}
+
+
+static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt)
+{
+  static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits
+
+  // find nt's number in g
+  size_t n = (uintptr_t)h_hashtable_get(g->nts, nt);
+
+  // NB the start symbol (number 0) is always "A".
+  int i;
+  for(i=14; i>=0 && (n>0 || i==14); i--) {
+    buf[i] = 'A' + n%26;
+    n = n/26;   // shift one digit
+  }
+
+  return buf+i+1;
+}
+
+
+
+void readsequence(FILE *file, uint32_t *count, uint32_t *length,
+		  const HCFGrammar *g, const HCFSequence *seq) {
+  // tally up numbers of choices, and lengths of emitted strings.
+  // Immediately emit any nonterminals encountered.
+  HCFChoice** x = seq->items;
+  
+  if (*x == NULL) {
+    return;
+  } else {
+    fprintf(file, "1");
+    HCharset cs;
+    unsigned int i, cscount=0;
+    for(; *x; x++) {
+      switch((*x)->type) {
+      case HCF_CHAR:
+	(*length)++;
+	break;
+      case HCF_END:
+	break;
+      case HCF_CHARSET:
+	cs = (*x)->charset;
+	for(i=0; i<256; i++) {
+	  if (charset_isset(cs, i)) {
+	    cscount++;
+	  }
+	}
+	*count *= cscount;
+	break;
+      default: // HCF_CHOICE, non-terminal symbol
+	fprintf(file, "*%s(t)", nonterminal_name(g, *x));
+	break;
+      }
+    }
+  }
+}
+
+// For each nt in g->nts
+//     For each choice in nt->key->seq
+//          For all elements in sequence
+//              Accumulate counts 
+//              Accumulate string lengths
+//              Emit count*t^length
+void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) {
+  if (g->nts->used < 1) {
+    return;
+  }
+
+  // determine maximum string length of symbol names
+  int len;
+  size_t s;
+  for(len=1, s=26; s < g->nts->used; len++, s*=26); 
+
+  // iterate over g->nts
+  size_t i;
+  HHashTableEntry *hte;
+  for(i=0; i < g->nts->capacity; i++) {
+    for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
+      if (hte->key == NULL) {
+        continue;
+      }
+
+      const HCFChoice *nt = hte->key;
+      fprintf(file, "%s(t) = ", nonterminal_name(g, nt));
+
+      
+      for(HCFSequence **seq = nt->seq; *seq; seq++) {
+	if (seq != nt->seq) {
+	  fprintf(file, " + ");
+	}
+	uint32_t count=1, length=0;
+	readsequence(file, &count, &length, g, *seq);
+	if(count == 1) {
+	  if(length == 1) {
+	    fprintf(file, "*t");
+	  }
+	  if(length > 1) {
+	    fprintf(file, "*t^%d", length);
+	  }
+	} else if(count > 1) {
+	  if(length == 0) {
+	    fprintf(file, "*%d", count);
+	  }
+	  if(length == 1) {
+	    fprintf(file, "*%d*t", count);
+	  }
+	  if (length > 1) {
+	    fprintf(file, "*%d*t^%d", count, length);
+	  } 
+	}
+      }
+
+      fprintf(file, "\n");
+    }
+  }
+}
+
+
+
+
+int main(int argc, char **argv)
+{
+  HAllocator *mm__ = &system_allocator;
+
+  HParser *n = h_ch('n');
+  HParser *E = h_indirect();
+  HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
+  HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
+  h_bind_indirect(E, E_);
+  HParser *p = E;
+
+  HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
+  if (g == NULL) {
+    fprintf(stderr, "h_cfgrammar failed\n");
+    return 1;
+  }
+  printf("\n==== Generating functions ====\n");
+  h_pprint_gfeqns(stdout, g);
+
+  printf("\n==== Grammar ====\n");
+  h_pprint_grammar(stdout, g, 0);
+}
-- 
GitLab