From a72aff9b3924ba37f4cfee4ac2cebc913f8f30e8 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson <michiexile@gmail.com> Date: Wed, 8 Apr 2015 17:32:12 +0200 Subject: [PATCH] refactored out the grammar handling. handles epsilons better --- examples/SConscript | 5 +- examples/grammar.c | 148 ++++++++++++++++++++++++++++++++++++++++++++ examples/grammar.h | 46 ++++++++++++++ examples/ties.c | 141 +---------------------------------------- 4 files changed, 197 insertions(+), 143 deletions(-) create mode 100644 examples/grammar.c create mode 100644 examples/grammar.h diff --git a/examples/SConscript b/examples/SConscript index 456545c2..06947216 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -7,6 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) base64 = example.Program('base64', 'base64.c') base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c') base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c') -singular = example.Program('explore_singular', 'explore_singular.c') -ties = example.Program('ties', 'ties.c') -env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular, ties]) \ No newline at end of file +ties = example.Program('ties', ['ties.c', 'grammar.c']) +env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties]) \ No newline at end of file diff --git a/examples/grammar.c b/examples/grammar.c new file mode 100644 index 00000000..f722edf0 --- /dev/null +++ b/examples/grammar.c @@ -0,0 +1,148 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org> +// + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#include <inttypes.h> +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include "grammar.h" +#include <stdio.h> + +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { + if(nt->user_data != NULL) { + if(*(char*)(nt->user_data) != '0') { + // user_data is a non-empty string + return nt->user_data; + } else { + return nt->user_data+1; + } + } + + static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits + + // find nt's number in g + size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); + + // NB the start symbol (number 0) is always "A". + int i; + for(i=14; i>=0 && (n>0 || i==14); i--) { + buf[i] = 'A' + n%26; + n = n/26; // shift one digit + } + + return buf+i+1; +} + + + +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq) { + // tally up numbers of choices, and lengths of emitted strings. + // Immediately emit any nonterminals encountered. + HCFChoice** x = seq->items; + + fprintf(file, "1"); + if (*x == NULL) { + // empty sequence + // GF is 1 + return; + } else { + char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0; + HCharset cs; + unsigned int i, cscount=0; + for(; *x; x++) { + switch((*x)->type) { + case HCF_CHAR: + if(!(has_user_data && *(char*)(*x)->user_data == '0')) { + (*length)++; + } + break; + case HCF_END: + break; + case HCF_CHARSET: + cs = (*x)->charset; + for(i=0; i<256; i++) { + if (charset_isset(cs, i)) { + cscount++; + } + } + *count *= cscount; + break; + default: // HCF_CHOICE, non-terminal symbol + fprintf(file, "*%s(t)", nonterminal_name(g, *x)); + break; + } + } + } +} + +// For each nt in g->nts +// For each choice in nt->key->seq +// For all elements in sequence +// Accumulate counts +// Accumulate string lengths +// Emit count*t^length +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { + if (g->nts->used < 1) { + return; + } + + // determine maximum string length of symbol names + int len; + size_t s; + for(len=1, s=26; s < g->nts->used; len++, s*=26); + + // iterate over g->nts + size_t i; + HHashTableEntry *hte; + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + + const HCFChoice *nt = hte->key; + fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); + + + for(HCFSequence **seq = nt->seq; *seq; seq++) { + if (seq != nt->seq) { + fprintf(file, " + "); + } + uint32_t count=1, length=0; + readsequence(file, &count, &length, g, *seq); + if(count == 1) { + if(length == 1) { + fprintf(file, "*t"); + } + if(length > 1) { + fprintf(file, "*t^%d", length); + } + } else if(count > 1) { + if(length == 0) { + fprintf(file, "*%d", count); + } + if(length == 1) { + fprintf(file, "*%d*t", count); + } + if (length > 1) { + fprintf(file, "*%d*t^%d", count, length); + } + } + } + + fprintf(file, "\n"); + } + } +} diff --git a/examples/grammar.h b/examples/grammar.h new file mode 100644 index 00000000..b42eced4 --- /dev/null +++ b/examples/grammar.h @@ -0,0 +1,46 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org> +// + +// Currently does absolutely no elegance, no caching of information, but rather +// just prints the generating functions to a provided FILE*. +// + + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#ifndef HAMMER_GRAMMAR__H +#define HAMMER_GRAMMAR__H + +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" + + +// Filched from cfgrammar.c this function extracts the name from user_data if it +// is set; otherwise assigns a name automatically from its position in some +// ordering of non-terminals. +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt); + +// This function prints out the monomial generated by a single HCFSequence +// It returns the resulting exponent for t in length and the number of alternatives +// accumulated in length. The monomial is (mostly) printed out to the provided FILE*, +// the caller is responsible for adding a scalar and a power of t to the printout. +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq); + +// This function walks through a grammar and generates an equation for each +// production rule. The results are printed out to the provided FILE*. +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g); + + + +#endif diff --git a/examples/ties.c b/examples/ties.c index a379b5c7..09f6b707 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -1,6 +1,3 @@ -// -// Created by Mikael Vejdemo Johansson on 4/7/15. -// // Intention: read in a parser, generate the system of equations for its // generating functions // @@ -8,148 +5,12 @@ #include <inttypes.h> #include "../src/backends/contextfree.h" #include "../src/backends/lr.h" +#include "grammar.h" #include <stdio.h> HAllocator *mm__; -// If a parser has user_data set, the generating function systems will try -// to interpret it as a string: -// -// If this string for an h_ch starts with the character 0, then that character -// will have weight 0 in the generating function. -// -// Use the remaining string to set the preferred name of that parser in the -// generating function. - - - -static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { - if(nt->user_data != NULL) { - if(*(char*)(nt->user_data) != '0') { - // user_data is a non-empty string - return nt->user_data; - } else { - return nt->user_data+1; - } - } - - static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits - - // find nt's number in g - size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); - - // NB the start symbol (number 0) is always "A". - int i; - for(i=14; i>=0 && (n>0 || i==14); i--) { - buf[i] = 'A' + n%26; - n = n/26; // shift one digit - } - - return buf+i+1; -} - - - -void readsequence(FILE *file, uint32_t *count, uint32_t *length, - const HCFGrammar *g, const HCFSequence *seq) { - // tally up numbers of choices, and lengths of emitted strings. - // Immediately emit any nonterminals encountered. - HCFChoice** x = seq->items; - - if (*x == NULL) { - return; - } else { - char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0; - fprintf(file, "1"); - HCharset cs; - unsigned int i, cscount=0; - for(; *x; x++) { - switch((*x)->type) { - case HCF_CHAR: - if(!(has_user_data && *(char*)(*x)->user_data == '0')) { - (*length)++; - } - break; - case HCF_END: - break; - case HCF_CHARSET: - cs = (*x)->charset; - for(i=0; i<256; i++) { - if (charset_isset(cs, i)) { - cscount++; - } - } - *count *= cscount; - break; - default: // HCF_CHOICE, non-terminal symbol - fprintf(file, "*%s(t)", nonterminal_name(g, *x)); - break; - } - } - } -} - -// For each nt in g->nts -// For each choice in nt->key->seq -// For all elements in sequence -// Accumulate counts -// Accumulate string lengths -// Emit count*t^length -void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { - if (g->nts->used < 1) { - return; - } - - // determine maximum string length of symbol names - int len; - size_t s; - for(len=1, s=26; s < g->nts->used; len++, s*=26); - - // iterate over g->nts - size_t i; - HHashTableEntry *hte; - for(i=0; i < g->nts->capacity; i++) { - for(hte = &g->nts->contents[i]; hte; hte = hte->next) { - if (hte->key == NULL) { - continue; - } - - const HCFChoice *nt = hte->key; - fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); - - - for(HCFSequence **seq = nt->seq; *seq; seq++) { - if (seq != nt->seq) { - fprintf(file, " + "); - } - uint32_t count=1, length=0; - readsequence(file, &count, &length, g, *seq); - if(count == 1) { - if(length == 1) { - fprintf(file, "*t"); - } - if(length > 1) { - fprintf(file, "*t^%d", length); - } - } else if(count > 1) { - if(length == 0) { - fprintf(file, "*%d", count); - } - if(length == 1) { - fprintf(file, "*%d*t", count); - } - if (length > 1) { - fprintf(file, "*%d*t^%d", count, length); - } - } - } - - fprintf(file, "\n"); - } - } -} - HParser* cfExample() { HParser *n = h_ch('n'); HParser *E = h_indirect(); -- GitLab