diff --git a/examples/SConscript b/examples/SConscript index 0932bdacbbf51f4f2faaa73484313abd0eab9ad0..069472164a8255595db0c2d8f9e951ba5fdfe6d3 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -7,4 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) base64 = example.Program('base64', 'base64.c') base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c') base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c') -env.Alias("examples", [dns, base64, base64_sem1, base64_sem2]) \ No newline at end of file +ties = example.Program('ties', ['ties.c', 'grammar.c']) +env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties]) \ No newline at end of file diff --git a/examples/grammar.c b/examples/grammar.c new file mode 100644 index 0000000000000000000000000000000000000000..7638fe99558149d9e2d47e5d1dc7f0299da189c1 --- /dev/null +++ b/examples/grammar.c @@ -0,0 +1,179 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org> +// + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#include <inttypes.h> +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include "grammar.h" +#include <stdio.h> + +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { + // if user_data exists and is printable: + if(nt->user_data != NULL && *(char*)(nt->user_data) > ' ' && *(char*)(nt->user_data) < 127) { + if(*(char*)(nt->user_data) != '0') { + // user_data is a non-empty string + return nt->user_data; + } else { + return nt->user_data+1; + } + } + + static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits + + // find nt's number in g + size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); + + // NB the start symbol (number 0) is always "A". + int i; + for(i=14; i>=0 && (n>0 || i==14); i--) { + buf[i] = 'A' + n%26; + n = n/26; // shift one digit + } + + return buf+i+1; +} + + + +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq) { + // tally up numbers of choices, and lengths of emitted strings. + // Immediately emit any nonterminals encountered. + HCFChoice** x = seq->items; + + fprintf(file, "1"); + if (*x == NULL) { + // empty sequence + // GF is 1 + return; + } else { + char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0; + HCharset cs; + unsigned int i, cscount=0; + for(; *x; x++) { + switch((*x)->type) { + case HCF_CHAR: + if(!(has_user_data && *(char*)(*x)->user_data == '0')) { + (*length)++; + } + break; + case HCF_END: + break; + case HCF_CHARSET: + cs = (*x)->charset; + for(i=0; i<256; i++) { + if (charset_isset(cs, i)) { + cscount++; + } + } + *count *= cscount; + break; + default: // HCF_CHOICE, non-terminal symbol + fprintf(file, "*%s", nonterminal_name(g, *x)); + break; + } + } + } +} + +// For each nt in g->nts +// For each choice in nt->key->seq +// For all elements in sequence +// Accumulate counts +// Accumulate string lengths +// Emit count*t^length +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { + if (g->nts->used < 1) { + return; + } + + // determine maximum string length of symbol names + int len; + size_t s; + for(len=1, s=26; s < g->nts->used; len++, s*=26); + + // emit the SageMath ring init string + // iterate over g->nts, output symbols + size_t i; + HHashTableEntry *hte; + fprintf(file, "ring.<t"); + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + const HCFChoice *nt = hte->key; + fprintf(file, ","); + + fprintf(file, "%s", nonterminal_name(g, nt)); + } + } + fprintf(file, "> = QQ[]\n"); + + + // iterate over g->nts + // emit a Sage ideal definition + int j=0; + fprintf(file, "ID = ring.ideal("); + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + + if(j>0) { + fprintf(file, ","); + } + j++; + + const HCFChoice *nt = hte->key; + const char *ntn = nonterminal_name(g, nt); + if(*ntn == 0) { + continue; + } + fprintf(file, "%s - (", ntn); + + + for(HCFSequence **seq = nt->seq; *seq; seq++) { + if (seq != nt->seq) { + fprintf(file, " + "); + } + uint32_t count=1, length=0; + readsequence(file, &count, &length, g, *seq); + if(count == 1) { + if(length == 1) { + fprintf(file, "*t"); + } + if(length > 1) { + fprintf(file, "*t^%d", length); + } + } else if(count > 1) { + if(length == 0) { + fprintf(file, "*%d", count); + } + if(length == 1) { + fprintf(file, "*%d*t", count); + } + if (length > 1) { + fprintf(file, "*%d*t^%d", count, length); + } + } + } + + fprintf(file, ")"); + } + } + fprintf(file, ")\n"); +} diff --git a/examples/grammar.h b/examples/grammar.h new file mode 100644 index 0000000000000000000000000000000000000000..b42eced49b4b958a08610aee09e4498a3cc4da05 --- /dev/null +++ b/examples/grammar.h @@ -0,0 +1,46 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org> +// + +// Currently does absolutely no elegance, no caching of information, but rather +// just prints the generating functions to a provided FILE*. +// + + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#ifndef HAMMER_GRAMMAR__H +#define HAMMER_GRAMMAR__H + +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" + + +// Filched from cfgrammar.c this function extracts the name from user_data if it +// is set; otherwise assigns a name automatically from its position in some +// ordering of non-terminals. +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt); + +// This function prints out the monomial generated by a single HCFSequence +// It returns the resulting exponent for t in length and the number of alternatives +// accumulated in length. The monomial is (mostly) printed out to the provided FILE*, +// the caller is responsible for adding a scalar and a power of t to the printout. +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq); + +// This function walks through a grammar and generates an equation for each +// production rule. The results are printed out to the provided FILE*. +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g); + + + +#endif diff --git a/examples/ties.c b/examples/ties.c new file mode 100644 index 0000000000000000000000000000000000000000..77d0821212e6d5a6145ee9184cfb26b02be3d786 --- /dev/null +++ b/examples/ties.c @@ -0,0 +1,309 @@ +// Intention: read in a parser, generate the system of equations for its +// generating functions +// + +#include <inttypes.h> +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include "grammar.h" +#include <stdio.h> + + +HAllocator *mm__; + +HParser* cfExample() { + HParser *n = h_ch('n'); + HParser *E = h_indirect(); + HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); + HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); + h_bind_indirect(E, E_); + return E; +} + +// The tie knot parsers below would work better if we could patch the gen.function +// code above to allow user specification of non-default byte string "lengths", +// so that U symbols don't contribute with factors of t to the gen. function. +// +// Alternatively: use multivariate generating functions to spit out different +// variables for different terminals. This gets really messy with bigger alphabets. + +HParser* finkmao() { + HParser *L = h_ch('L'); + HParser *R = h_ch('R'); + HParser *C = h_ch('C'); + HParser *U = h_ch('U'); + HParser *Lnext = h_indirect(); + HParser *Rnext = h_indirect(); + HParser *Cnext = h_indirect(); + HParser *L_ = h_choice(h_sequence(R, Rnext, NULL), + h_sequence(C, Cnext, NULL), + h_sequence(R, C, U, NULL), NULL); + HParser *R_ = h_choice(h_sequence(L, Lnext, NULL), + h_sequence(C, Cnext, NULL), + h_sequence(L, C, U, NULL), NULL); + HParser *C_ = h_choice(h_sequence(R, Rnext, NULL), + h_sequence(L, Lnext, NULL), NULL); + h_bind_indirect(Lnext, L_); + h_bind_indirect(Rnext, R_); + h_bind_indirect(Cnext, C_); + HParser *tie = h_sequence(L, Lnext, NULL); + + h_desugar_augmented(mm__, tie); + + L->desugared->user_data = "L"; + R->desugared->user_data = "R"; + C->desugared->user_data = "C"; + Lnext->desugared->user_data = "Ln"; + Rnext->desugared->user_data = "Rn"; + Cnext->desugared->user_data = "Cn"; + tie->desugared->user_data = "tie"; + U->desugared->user_data = "0U"; + + return tie; +} + +HParser* finkmaoTW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), + NULL); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); + HParser *tuck = h_choice(h_sequence(T, T, U, NULL), + h_sequence(W, W, U, NULL), + NULL); + HParser *pairstar = h_indirect(); + HParser *pstar_ = h_choice(h_sequence(pair, pairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(pairstar, pstar_); + + HParser* tie = h_sequence(prefix, pairstar, tuck, NULL); + h_desugar_augmented(mm__, tie); + + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + pstar_->desugared->user_data = "pairstar"; + tie->desugared->user_data = "tie"; + + return tie; +} + +HParser* depth1TW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); + HParser *tuck = h_choice(h_sequence(T, T, U, NULL), + h_sequence(W, W, U, NULL), + NULL); + HParser *tuckpairstar = h_indirect(); + HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + h_sequence(tuck, tuckpairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(tuckpairstar, tpstar_); + HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + + h_desugar_augmented(mm__, tie); + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + tpstar_->desugared->user_data = "tuckpairstar"; + tie->desugared->user_data = "tie"; + + return tie; +} + +HParser* depth1() { + HParser *L = h_ch('L'); + HParser *R = h_ch('R'); + HParser *C = h_ch('C'); + HParser *U = h_ch('U'); + HParser *lastR = h_indirect(); + HParser *lastL = h_indirect(); + HParser *lastC = h_indirect(); + HParser *R_ = h_choice(h_sequence(L, R, lastR, NULL), + h_sequence(C, R, lastR, NULL), + h_sequence(L, C, lastC, NULL), + h_sequence(L, C, U, lastC, NULL), + h_sequence(L, C, U, NULL), + h_sequence(C, L, lastL, NULL), + h_sequence(C, L, U, lastL, NULL), + h_sequence(C, L, U, NULL), + NULL); + HParser *L_ = h_choice(h_sequence(R, L, lastR, NULL), + h_sequence(C, L, lastR, NULL), + h_sequence(R, C, lastC, NULL), + h_sequence(R, C, U, lastC, NULL), + h_sequence(R, C, U, NULL), + h_sequence(C, R, lastR, NULL), + h_sequence(C, R, U, lastR, NULL), + h_sequence(C, R, U, NULL), + NULL); + HParser *C_ = h_choice(h_sequence(L, C, lastR, NULL), + h_sequence(R, C, lastR, NULL), + h_sequence(L, R, lastR, NULL), + h_sequence(L, R, U, lastR, NULL), + h_sequence(L, R, U, NULL), + h_sequence(R, L, lastL, NULL), + h_sequence(R, L, U, lastL, NULL), + h_sequence(R, L, U, NULL), + NULL); + h_bind_indirect(lastR, R_); + h_bind_indirect(lastL, L_); + h_bind_indirect(lastC, C_); + HParser* tie = h_choice(h_sequence(L, lastL, NULL), + h_sequence(R, lastR, NULL), + h_sequence(C, lastC, NULL), + NULL); + + h_desugar_augmented(mm__, tie); + + L->desugared->user_data = "L"; + R->desugared->user_data = "R"; + C->desugared->user_data = "C"; + U->desugared->user_data = "0U"; + lastL ->desugared->user_data = "Ln"; + lastR->desugared->user_data = "Rn"; + lastC->desugared->user_data = "Cn"; + tie->desugared->user_data = "tie"; + + return tie; +} + +HParser* depthNTW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); + HParser *tstart = h_indirect(); + HParser *tw0 = h_indirect(); + HParser *tw1 = h_indirect(); + HParser *tw2 = h_indirect(); + HParser *wstart = h_indirect(); + HParser *wt0 = h_indirect(); + HParser *wt1 = h_indirect(); + HParser *wt2 = h_indirect(); + + HParser *T_ = h_choice(h_sequence(T, T, tw2, U, NULL), + h_sequence(T, W, tw0, U, NULL), + NULL); + HParser *tw0_ = h_choice(h_sequence(T, T, tw2, U, NULL), + h_sequence(T, W, tw0, U, NULL), + h_sequence(W, T, tw0, U, NULL), + h_sequence(W, W, tw1, U, NULL), + h_sequence(tstart, tw2, U, NULL), + h_sequence(wstart, tw1, U, NULL), + NULL); + HParser *tw1_ = h_choice(h_sequence(T, T, tw0, U, NULL), + h_sequence(T, W, tw1, U, NULL), + h_sequence(W, T, tw1, U, NULL), + h_sequence(W, W, tw2, U, NULL), + h_sequence(tstart, tw0, U, NULL), + h_sequence(wstart, tw2, U, NULL), + NULL); + HParser *tw2_ = h_choice(h_sequence(T, T, tw1, U, NULL), + h_sequence(T, W, tw2, U, NULL), + h_sequence(W, T, tw2, U, NULL), + h_sequence(W, W, tw0, U, NULL), + h_sequence(tstart, tw1, U, NULL), + h_sequence(wstart, tw0, U, NULL), + h_epsilon_p(), + NULL); + + HParser *W_ = h_choice(h_sequence(W, W, wt2, U, NULL), + h_sequence(W, T, wt0, U, NULL), + NULL); + HParser *wt0_ = h_choice(h_sequence(W, W, wt2, U, NULL), + h_sequence(W, T, wt0, U, NULL), + h_sequence(T, W, wt0, U, NULL), + h_sequence(T, T, wt1, U, NULL), + h_sequence(wstart, wt2, U, NULL), + h_sequence(tstart, wt1, U, NULL), + NULL); + HParser *wt1_ = h_choice(h_sequence(W, W, wt0, U, NULL), + h_sequence(W, T, wt1, U, NULL), + h_sequence(T, W, wt1, U, NULL), + h_sequence(T, T, wt2, U, NULL), + h_sequence(wstart, wt0, U, NULL), + h_sequence(tstart, wt2, U, NULL), + NULL); + HParser *wt2_ = h_choice(h_sequence(W, W, wt1, U, NULL), + h_sequence(W, T, wt2, U, NULL), + h_sequence(T, W, wt2, U, NULL), + h_sequence(T, T, wt0, U, NULL), + h_sequence(wstart, wt1, U, NULL), + h_sequence(tstart, wt0, U, NULL), + h_epsilon_p(), + NULL); + + h_bind_indirect(tstart, T_); + h_bind_indirect(tw0, tw0_); + h_bind_indirect(tw1, tw1_); + h_bind_indirect(tw2, tw2_); + h_bind_indirect(wstart, W_); + h_bind_indirect(wt0, wt0_); + h_bind_indirect(wt1, wt1_); + h_bind_indirect(wt2, wt2_); + HParser *tuck = h_choice(tstart, wstart, NULL); + + HParser *tuckpairstar = h_indirect(); + HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + h_sequence(tuck, tuckpairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(tuckpairstar, tpstar_); + + HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + + h_desugar_augmented(mm__, tie); + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + tpstar_->desugared->user_data = "tuckpairstar"; + tie->desugared->user_data = "tie"; + + return tie; +} + + +int main(int argc, char **argv) { + mm__ = &system_allocator; + + HParser *p = finkmao(); + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); + if (g == NULL) { + fprintf(stderr, "h_cfgrammar failed\n"); + return 1; + } + printf("\n==== Generating functions ====\n"); + h_pprint_gfeqns(stdout, g); + + printf("\n==== Grammar ====\n"); + h_pprint_grammar(stdout, g, 0); +}