diff --git a/.travis.yml b/.travis.yml index 8973c57ee8376d774f79bd15d9ee4cf71ce485f7..e0f5c40338c5636395f603114adfcd9d51d424c1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,5 @@ +sudo: required +dist: trusty language: c compiler: - gcc @@ -8,35 +10,43 @@ matrix: include: - compiler: gcc language: ruby - rvm: ruby-1.9.3-p484 + rvm: ruby-1.9.3-p551 env: BINDINGS=ruby - compiler: clang language: ruby - rvm: ruby-1.9.3-p484 + rvm: ruby-1.9.3-p551 env: BINDINGS=ruby CC=clang - compiler: gcc language: ruby - rvm: ruby-2.0.0-p353 + rvm: ruby-2.0.0-p647 env: BINDINGS=ruby - compiler: clang language: ruby - rvm: ruby-2.0.0-p353 + rvm: ruby-2.0.0-p647 env: BINDINGS=ruby CC=clang - compiler: gcc language: ruby - rvm: ruby-2.1.0 + rvm: ruby-2.1.7 env: BINDINGS=ruby - compiler: clang language: ruby - rvm: ruby-2.1.0 + rvm: ruby-2.1.7 + env: BINDINGS=ruby CC=clang + - compiler: gcc + language: ruby + rvm: ruby-2.2.3 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: ruby-2.2.3 env: BINDINGS=ruby CC=clang - compiler: gcc language: python - python: "2.7" + python: "2.7.10" env: BINDINGS=python - compiler: clang language: python - python: "2.7" + python: "2.7.10" env: BINDINGS=python CC=clang - compiler: gcc language: perl @@ -87,24 +97,27 @@ matrix: - compiler: gcc language: cpp env: BINDINGS=cpp - - compiler: gcc + - compiler: clang language: cpp env: BINDINGS=cpp CC=clang before_install: - sudo apt-get update -qq - - if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi - - if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi - - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi - - if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi + - sudo apt-get install -y lcov + - gem install coveralls-lcov + - if [ "$BINDINGS" != "none" ]; then sudo sh -c 'echo "deb http://archive.ubuntu.com/ubuntu trusty-backports main restricted universe multiverse" >> /etc/apt/sources.list'; sudo apt-get update -qq; sudo apt-get install -yqq swig3.0/trusty-backports; fi + - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -yqq python-dev; fi + - if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -yqq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi install: true before_script: - if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi -script: - - scons bindings=$BINDINGS test +script: + - if [ "$BINDINGS" == "none" ]; then scons test --variant=debug --coverage; else scons bindings=$BINDINGS test; fi +after_success: + - if [ "$BINDINGS" == "none" ]; then if [ "$CC" == "clang" ]; then llvm-cov gcov -o coverage.info build/debug/src/test_suite.gcda; else lcov --capture --directory build/debug/src --output-file coverage.info; fi; fi + - coveralls-lcov coverage.info notifications: irc: channels: - "irc.upstandinghackers.com#hammer" use_notice: true skip_join: true - diff --git a/SConstruct b/SConstruct index a8f7ce8b9d39964458dea9fd1ee1fbe3d0a4b474..1af97ddc56af244e0d36034045abcb56ca8f320b 100644 --- a/SConstruct +++ b/SConstruct @@ -49,7 +49,7 @@ env['backendsincpath'] = calcInstallPath("$prefix", "include", "hammer", "backen env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig") env.ScanReplace('libhammer.pc.in') -env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable") +env.MergeFlags("-std=gnu11 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable") if env['PLATFORM'] == 'darwin': env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}') @@ -90,15 +90,18 @@ if GetOption("variant") == 'debug': else: env = opt -if GetOption("coverage"): - env.Append(CFLAGS=["-fprofile-arcs", "-ftest-coverage"], - CXXFLAGS=["-fprofile-arcs", "-ftest-coverage"], - LDFLAGS=["-fprofile-arcs", "-ftest-coverage"], - LIBS=['gcov']) - env["CC"] = os.getenv("CC") or env["CC"] env["CXX"] = os.getenv("CXX") or env["CXX"] +if GetOption("coverage"): + env.Append(CFLAGS=["--coverage"], + CXXFLAGS=["--coverage"], + LDFLAGS=["--coverage"]) + if env["CC"] == "gcc": + env.Append(LIBS=['gcov']) + else: + env.ParseConfig('llvm-config --ldflags') + if os.getenv("CC") == "clang" or env['PLATFORM'] == 'darwin': env.Replace(CC="clang", CXX="clang++") diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000000000000000000000000000000000000..2aef9c9423e10b6671bbddb86c4f159335b69f26 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,19 @@ +platform: +- x86 +- x64 +version: 1.0.{build} +os: Visual Studio 2015 +build_script: +- '@echo off' +- setlocal +- ps: >- + If ($env:Platform -Match "x86") { + $env:VCVARS_PLATFORM="x86" + } Else { + $env:VCVARS_PLATFORM="amd64" + } +- call "%VS140COMNTOOLS%\..\..\VC\vcvarsall.bat" %VCVARS_PLATFORM% +- call tools\windows\build.bat +# FIXME(windows) TODO(uucidl): reactivate examples +# - call tools\windows\build_examples.bat +- exit /b 0 diff --git a/examples/SConscript b/examples/SConscript index 0932bdacbbf51f4f2faaa73484313abd0eab9ad0..069472164a8255595db0c2d8f9e951ba5fdfe6d3 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -7,4 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) base64 = example.Program('base64', 'base64.c') base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c') base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c') -env.Alias("examples", [dns, base64, base64_sem1, base64_sem2]) \ No newline at end of file +ties = example.Program('ties', ['ties.c', 'grammar.c']) +env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties]) \ No newline at end of file diff --git a/examples/grammar.c b/examples/grammar.c new file mode 100644 index 0000000000000000000000000000000000000000..7638fe99558149d9e2d47e5d1dc7f0299da189c1 --- /dev/null +++ b/examples/grammar.c @@ -0,0 +1,179 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org> +// + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#include <inttypes.h> +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include "grammar.h" +#include <stdio.h> + +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { + // if user_data exists and is printable: + if(nt->user_data != NULL && *(char*)(nt->user_data) > ' ' && *(char*)(nt->user_data) < 127) { + if(*(char*)(nt->user_data) != '0') { + // user_data is a non-empty string + return nt->user_data; + } else { + return nt->user_data+1; + } + } + + static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits + + // find nt's number in g + size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); + + // NB the start symbol (number 0) is always "A". + int i; + for(i=14; i>=0 && (n>0 || i==14); i--) { + buf[i] = 'A' + n%26; + n = n/26; // shift one digit + } + + return buf+i+1; +} + + + +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq) { + // tally up numbers of choices, and lengths of emitted strings. + // Immediately emit any nonterminals encountered. + HCFChoice** x = seq->items; + + fprintf(file, "1"); + if (*x == NULL) { + // empty sequence + // GF is 1 + return; + } else { + char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0; + HCharset cs; + unsigned int i, cscount=0; + for(; *x; x++) { + switch((*x)->type) { + case HCF_CHAR: + if(!(has_user_data && *(char*)(*x)->user_data == '0')) { + (*length)++; + } + break; + case HCF_END: + break; + case HCF_CHARSET: + cs = (*x)->charset; + for(i=0; i<256; i++) { + if (charset_isset(cs, i)) { + cscount++; + } + } + *count *= cscount; + break; + default: // HCF_CHOICE, non-terminal symbol + fprintf(file, "*%s", nonterminal_name(g, *x)); + break; + } + } + } +} + +// For each nt in g->nts +// For each choice in nt->key->seq +// For all elements in sequence +// Accumulate counts +// Accumulate string lengths +// Emit count*t^length +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { + if (g->nts->used < 1) { + return; + } + + // determine maximum string length of symbol names + int len; + size_t s; + for(len=1, s=26; s < g->nts->used; len++, s*=26); + + // emit the SageMath ring init string + // iterate over g->nts, output symbols + size_t i; + HHashTableEntry *hte; + fprintf(file, "ring.<t"); + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + const HCFChoice *nt = hte->key; + fprintf(file, ","); + + fprintf(file, "%s", nonterminal_name(g, nt)); + } + } + fprintf(file, "> = QQ[]\n"); + + + // iterate over g->nts + // emit a Sage ideal definition + int j=0; + fprintf(file, "ID = ring.ideal("); + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + + if(j>0) { + fprintf(file, ","); + } + j++; + + const HCFChoice *nt = hte->key; + const char *ntn = nonterminal_name(g, nt); + if(*ntn == 0) { + continue; + } + fprintf(file, "%s - (", ntn); + + + for(HCFSequence **seq = nt->seq; *seq; seq++) { + if (seq != nt->seq) { + fprintf(file, " + "); + } + uint32_t count=1, length=0; + readsequence(file, &count, &length, g, *seq); + if(count == 1) { + if(length == 1) { + fprintf(file, "*t"); + } + if(length > 1) { + fprintf(file, "*t^%d", length); + } + } else if(count > 1) { + if(length == 0) { + fprintf(file, "*%d", count); + } + if(length == 1) { + fprintf(file, "*%d*t", count); + } + if (length > 1) { + fprintf(file, "*%d*t^%d", count, length); + } + } + } + + fprintf(file, ")"); + } + } + fprintf(file, ")\n"); +} diff --git a/examples/grammar.h b/examples/grammar.h new file mode 100644 index 0000000000000000000000000000000000000000..b42eced49b4b958a08610aee09e4498a3cc4da05 --- /dev/null +++ b/examples/grammar.h @@ -0,0 +1,46 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org> +// + +// Currently does absolutely no elegance, no caching of information, but rather +// just prints the generating functions to a provided FILE*. +// + + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#ifndef HAMMER_GRAMMAR__H +#define HAMMER_GRAMMAR__H + +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" + + +// Filched from cfgrammar.c this function extracts the name from user_data if it +// is set; otherwise assigns a name automatically from its position in some +// ordering of non-terminals. +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt); + +// This function prints out the monomial generated by a single HCFSequence +// It returns the resulting exponent for t in length and the number of alternatives +// accumulated in length. The monomial is (mostly) printed out to the provided FILE*, +// the caller is responsible for adding a scalar and a power of t to the printout. +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq); + +// This function walks through a grammar and generates an equation for each +// production rule. The results are printed out to the provided FILE*. +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g); + + + +#endif diff --git a/examples/ties.c b/examples/ties.c new file mode 100644 index 0000000000000000000000000000000000000000..77d0821212e6d5a6145ee9184cfb26b02be3d786 --- /dev/null +++ b/examples/ties.c @@ -0,0 +1,309 @@ +// Intention: read in a parser, generate the system of equations for its +// generating functions +// + +#include <inttypes.h> +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include "grammar.h" +#include <stdio.h> + + +HAllocator *mm__; + +HParser* cfExample() { + HParser *n = h_ch('n'); + HParser *E = h_indirect(); + HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); + HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); + h_bind_indirect(E, E_); + return E; +} + +// The tie knot parsers below would work better if we could patch the gen.function +// code above to allow user specification of non-default byte string "lengths", +// so that U symbols don't contribute with factors of t to the gen. function. +// +// Alternatively: use multivariate generating functions to spit out different +// variables for different terminals. This gets really messy with bigger alphabets. + +HParser* finkmao() { + HParser *L = h_ch('L'); + HParser *R = h_ch('R'); + HParser *C = h_ch('C'); + HParser *U = h_ch('U'); + HParser *Lnext = h_indirect(); + HParser *Rnext = h_indirect(); + HParser *Cnext = h_indirect(); + HParser *L_ = h_choice(h_sequence(R, Rnext, NULL), + h_sequence(C, Cnext, NULL), + h_sequence(R, C, U, NULL), NULL); + HParser *R_ = h_choice(h_sequence(L, Lnext, NULL), + h_sequence(C, Cnext, NULL), + h_sequence(L, C, U, NULL), NULL); + HParser *C_ = h_choice(h_sequence(R, Rnext, NULL), + h_sequence(L, Lnext, NULL), NULL); + h_bind_indirect(Lnext, L_); + h_bind_indirect(Rnext, R_); + h_bind_indirect(Cnext, C_); + HParser *tie = h_sequence(L, Lnext, NULL); + + h_desugar_augmented(mm__, tie); + + L->desugared->user_data = "L"; + R->desugared->user_data = "R"; + C->desugared->user_data = "C"; + Lnext->desugared->user_data = "Ln"; + Rnext->desugared->user_data = "Rn"; + Cnext->desugared->user_data = "Cn"; + tie->desugared->user_data = "tie"; + U->desugared->user_data = "0U"; + + return tie; +} + +HParser* finkmaoTW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), + NULL); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); + HParser *tuck = h_choice(h_sequence(T, T, U, NULL), + h_sequence(W, W, U, NULL), + NULL); + HParser *pairstar = h_indirect(); + HParser *pstar_ = h_choice(h_sequence(pair, pairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(pairstar, pstar_); + + HParser* tie = h_sequence(prefix, pairstar, tuck, NULL); + h_desugar_augmented(mm__, tie); + + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + pstar_->desugared->user_data = "pairstar"; + tie->desugared->user_data = "tie"; + + return tie; +} + +HParser* depth1TW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); + HParser *tuck = h_choice(h_sequence(T, T, U, NULL), + h_sequence(W, W, U, NULL), + NULL); + HParser *tuckpairstar = h_indirect(); + HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + h_sequence(tuck, tuckpairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(tuckpairstar, tpstar_); + HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + + h_desugar_augmented(mm__, tie); + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + tpstar_->desugared->user_data = "tuckpairstar"; + tie->desugared->user_data = "tie"; + + return tie; +} + +HParser* depth1() { + HParser *L = h_ch('L'); + HParser *R = h_ch('R'); + HParser *C = h_ch('C'); + HParser *U = h_ch('U'); + HParser *lastR = h_indirect(); + HParser *lastL = h_indirect(); + HParser *lastC = h_indirect(); + HParser *R_ = h_choice(h_sequence(L, R, lastR, NULL), + h_sequence(C, R, lastR, NULL), + h_sequence(L, C, lastC, NULL), + h_sequence(L, C, U, lastC, NULL), + h_sequence(L, C, U, NULL), + h_sequence(C, L, lastL, NULL), + h_sequence(C, L, U, lastL, NULL), + h_sequence(C, L, U, NULL), + NULL); + HParser *L_ = h_choice(h_sequence(R, L, lastR, NULL), + h_sequence(C, L, lastR, NULL), + h_sequence(R, C, lastC, NULL), + h_sequence(R, C, U, lastC, NULL), + h_sequence(R, C, U, NULL), + h_sequence(C, R, lastR, NULL), + h_sequence(C, R, U, lastR, NULL), + h_sequence(C, R, U, NULL), + NULL); + HParser *C_ = h_choice(h_sequence(L, C, lastR, NULL), + h_sequence(R, C, lastR, NULL), + h_sequence(L, R, lastR, NULL), + h_sequence(L, R, U, lastR, NULL), + h_sequence(L, R, U, NULL), + h_sequence(R, L, lastL, NULL), + h_sequence(R, L, U, lastL, NULL), + h_sequence(R, L, U, NULL), + NULL); + h_bind_indirect(lastR, R_); + h_bind_indirect(lastL, L_); + h_bind_indirect(lastC, C_); + HParser* tie = h_choice(h_sequence(L, lastL, NULL), + h_sequence(R, lastR, NULL), + h_sequence(C, lastC, NULL), + NULL); + + h_desugar_augmented(mm__, tie); + + L->desugared->user_data = "L"; + R->desugared->user_data = "R"; + C->desugared->user_data = "C"; + U->desugared->user_data = "0U"; + lastL ->desugared->user_data = "Ln"; + lastR->desugared->user_data = "Rn"; + lastC->desugared->user_data = "Cn"; + tie->desugared->user_data = "tie"; + + return tie; +} + +HParser* depthNTW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); + HParser *tstart = h_indirect(); + HParser *tw0 = h_indirect(); + HParser *tw1 = h_indirect(); + HParser *tw2 = h_indirect(); + HParser *wstart = h_indirect(); + HParser *wt0 = h_indirect(); + HParser *wt1 = h_indirect(); + HParser *wt2 = h_indirect(); + + HParser *T_ = h_choice(h_sequence(T, T, tw2, U, NULL), + h_sequence(T, W, tw0, U, NULL), + NULL); + HParser *tw0_ = h_choice(h_sequence(T, T, tw2, U, NULL), + h_sequence(T, W, tw0, U, NULL), + h_sequence(W, T, tw0, U, NULL), + h_sequence(W, W, tw1, U, NULL), + h_sequence(tstart, tw2, U, NULL), + h_sequence(wstart, tw1, U, NULL), + NULL); + HParser *tw1_ = h_choice(h_sequence(T, T, tw0, U, NULL), + h_sequence(T, W, tw1, U, NULL), + h_sequence(W, T, tw1, U, NULL), + h_sequence(W, W, tw2, U, NULL), + h_sequence(tstart, tw0, U, NULL), + h_sequence(wstart, tw2, U, NULL), + NULL); + HParser *tw2_ = h_choice(h_sequence(T, T, tw1, U, NULL), + h_sequence(T, W, tw2, U, NULL), + h_sequence(W, T, tw2, U, NULL), + h_sequence(W, W, tw0, U, NULL), + h_sequence(tstart, tw1, U, NULL), + h_sequence(wstart, tw0, U, NULL), + h_epsilon_p(), + NULL); + + HParser *W_ = h_choice(h_sequence(W, W, wt2, U, NULL), + h_sequence(W, T, wt0, U, NULL), + NULL); + HParser *wt0_ = h_choice(h_sequence(W, W, wt2, U, NULL), + h_sequence(W, T, wt0, U, NULL), + h_sequence(T, W, wt0, U, NULL), + h_sequence(T, T, wt1, U, NULL), + h_sequence(wstart, wt2, U, NULL), + h_sequence(tstart, wt1, U, NULL), + NULL); + HParser *wt1_ = h_choice(h_sequence(W, W, wt0, U, NULL), + h_sequence(W, T, wt1, U, NULL), + h_sequence(T, W, wt1, U, NULL), + h_sequence(T, T, wt2, U, NULL), + h_sequence(wstart, wt0, U, NULL), + h_sequence(tstart, wt2, U, NULL), + NULL); + HParser *wt2_ = h_choice(h_sequence(W, W, wt1, U, NULL), + h_sequence(W, T, wt2, U, NULL), + h_sequence(T, W, wt2, U, NULL), + h_sequence(T, T, wt0, U, NULL), + h_sequence(wstart, wt1, U, NULL), + h_sequence(tstart, wt0, U, NULL), + h_epsilon_p(), + NULL); + + h_bind_indirect(tstart, T_); + h_bind_indirect(tw0, tw0_); + h_bind_indirect(tw1, tw1_); + h_bind_indirect(tw2, tw2_); + h_bind_indirect(wstart, W_); + h_bind_indirect(wt0, wt0_); + h_bind_indirect(wt1, wt1_); + h_bind_indirect(wt2, wt2_); + HParser *tuck = h_choice(tstart, wstart, NULL); + + HParser *tuckpairstar = h_indirect(); + HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + h_sequence(tuck, tuckpairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(tuckpairstar, tpstar_); + + HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + + h_desugar_augmented(mm__, tie); + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + tpstar_->desugared->user_data = "tuckpairstar"; + tie->desugared->user_data = "tie"; + + return tie; +} + + +int main(int argc, char **argv) { + mm__ = &system_allocator; + + HParser *p = finkmao(); + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); + if (g == NULL) { + fprintf(stderr, "h_cfgrammar failed\n"); + return 1; + } + printf("\n==== Generating functions ====\n"); + h_pprint_gfeqns(stdout, g); + + printf("\n==== Grammar ====\n"); + h_pprint_grammar(stdout, g, 0); +} diff --git a/src/SConscript b/src/SConscript index e192b05e182b0020ac7f931f68244b300b93b9bc..dd6b61628b943a79c14ecf250f89dd4e7e063bca 100644 --- a/src/SConscript +++ b/src/SConscript @@ -5,8 +5,10 @@ Import('env testruns') dist_headers = [ "hammer.h", "allocator.h", + "compiler_specifics.h", "glue.h", - "internal.h" + "internal.h", + "platform.h" ] parsers_headers = [ @@ -61,6 +63,7 @@ misc_hammer_parts = [ 'desugar.c', 'glue.c', 'hammer.c', + 'platform_bsdlike.c', 'pprint.c', 'registry.c', 'system_allocator.c'] @@ -86,7 +89,7 @@ env.Install("$pkgconfigpath", "../../../libhammer.pc") testenv = env.Clone() testenv.ParseConfig('pkg-config --cflags --libs glib-2.0') testenv.Append(LIBS=['hammer'], LIBPATH=['.']) -ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c']) +ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS="--coverage" if testenv.GetOption("coverage") else None) ctest = Alias('testc', [ctestexec], "".join(["env LD_LIBRARY_PATH=", os.path.dirname(ctestexec[0].path), " ", ctestexec[0].path])) AlwaysBuild(ctest) testruns.append(ctest) diff --git a/src/allocator.c b/src/allocator.c index 80fa92172eb9f0c785fdc4f94960d34cf6d0ff75..258edfa5643a6123a4ca3ce640b06e89bd39a5a2 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -49,6 +49,10 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) { block_size = 4096; struct HArena_ *ret = h_new(struct HArena_, 1); struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size); + if (!link) { + // TODO: error-reporting -- let user know that arena link couldn't be allocated + return NULL; + } memset(link, 0, sizeof(struct arena_link) + block_size); link->free = block_size; link->used = 0; @@ -76,6 +80,10 @@ void* h_arena_malloc(HArena *arena, size_t size) { arena->used += size; arena->wasted += sizeof(struct arena_link*); void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*)); + if (!link) { + // TODO: error-reporting -- let user know that arena link couldn't be allocated + return NULL; + } memset(link, 0, size + sizeof(struct arena_link*)); *(struct arena_link**)link = arena->head->next; arena->head->next = (struct arena_link*)link; @@ -83,6 +91,10 @@ void* h_arena_malloc(HArena *arena, size_t size) { } else { // we just need to allocate an ordinary new block. struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size); + if (!link) { + // TODO: error-reporting -- let user know that arena link couldn't be allocated + return NULL; + } memset(link, 0, sizeof(struct arena_link) + arena->block_size); link->free = arena->block_size - size; link->used = size; diff --git a/src/backends/contextfree.h b/src/backends/contextfree.h index ab04ab523064fba731d2cf292bcc1a7bd28a9faf..29b51a08ac3c39251170ce50f6c85448f5adf65f 100644 --- a/src/backends/contextfree.h +++ b/src/backends/contextfree.h @@ -18,10 +18,11 @@ struct HCFStack_ { HCFChoice *last_completed; // Last completed choice. // XXX is last_completed still needed? HCFChoice *prealloc; // If not NULL, will be used for the outermost choice. + char error; }; #ifndef UNUSED -#define UNUSED __attribute__((unused)) +#define UNUSED H_GCC_ATTRIBUTE((unused)) #endif static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) UNUSED; @@ -33,6 +34,7 @@ static HCFStack* h_cfstack_new(HAllocator *mm__) { stack->cap = 4; stack->stack = h_new(HCFChoice*, stack->cap); stack->prealloc = NULL; + stack->error = 0; return stack; } @@ -55,8 +57,12 @@ static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFCh for (int j = 0;; j++) { if (cur_top->seq[i]->items[j] == NULL) { cur_top->seq[i]->items = mm__->realloc(mm__, cur_top->seq[i]->items, sizeof(HCFChoice*) * (j+2)); + if (!cur_top->seq[i]->items) { + stk__->error = 1; + } cur_top->seq[i]->items[j] = item; cur_top->seq[i]->items[j+1] = NULL; + assert(!stk__->error); return; } } @@ -111,8 +117,11 @@ static inline void h_cfstack_begin_choice(HAllocator *mm__, HCFStack *stk__) { assert(stk__->cap > 0); stk__->cap *= 2; stk__->stack = mm__->realloc(mm__, stk__->stack, stk__->cap * sizeof(HCFChoice*)); + if (!stk__->stack) { + stk__->error = 1; + } } - assert(stk__->cap >= 1); + assert(stk__->cap >= 1 && !stk__->error); stk__->stack[stk__->count++] = choice; } @@ -121,6 +130,10 @@ static inline void h_cfstack_begin_seq(HAllocator *mm__, HCFStack *stk__) { for (int i = 0;; i++) { if (top->seq[i] == NULL) { top->seq = mm__->realloc(mm__, top->seq, sizeof(HCFSequence*) * (i+2)); + if (!top->seq) { + stk__->error = 1; + return; + } HCFSequence *seq = top->seq[i] = h_new(HCFSequence, 1); top->seq[i+1] = NULL; seq->items = h_new(HCFChoice*, 1); diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 14f64cd1a23cf2276a1377e0d1b78c3a24125ed8..b82ef71c477128728db39d4ac72ef8d4ab0dc56c 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -52,7 +52,7 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg, if (xAy->type != HCF_CHOICE) { return; } - // XXX CHARSET? + // NB: nothing to do on quasi-terminal CHARSET which carries no list of rhs's HArena *arena = eg->arena; @@ -91,7 +91,7 @@ static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym) HHashSet *cs = h_hashtable_get(eg->corr, sym); if (!cs) { - cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol); + cs = h_hashset_new(arena, h_eq_ptr, h_hash_ptr); h_hashtable_put(eg->corr, sym, cs); } h_hashset_put(cs, esym); @@ -208,6 +208,46 @@ static bool match_production(HLREnhGrammar *eg, HCFChoice **p, && state == endstate); } +// variant of match_production where the production lhs is a charset +// [..x..] -> x +static bool match_charset_production(const HLRTable *table, HLREnhGrammar *eg, + const HCFChoice *lhs, HCFChoice *rhs, + size_t endstate) +{ + assert(lhs->type == HCF_CHARSET); + assert(rhs->type == HCF_CHAR); + + if(!charset_isset(lhs->charset, rhs->chr)) + return false; + + // determine the enhanced-grammar right-hand side and check end state + HLRTransition *t = h_hashtable_get(eg->smap, lhs); + assert(t != NULL); + return (follow_transition(table, t->from, rhs) == endstate); +} + +// check wether any production for sym (enhanced-grammar) matches the given +// (original-grammar) rhs and terminates in the given end state. +static bool match_any_production(const HLRTable *table, HLREnhGrammar *eg, + const HCFChoice *sym, HCFChoice **rhs, + size_t endstate) +{ + assert(sym->type == HCF_CHOICE || sym->type == HCF_CHARSET); + + if(sym->type == HCF_CHOICE) { + for(HCFSequence **p=sym->seq; *p; p++) { + if(match_production(eg, (*p)->items, rhs, endstate)) + return true; + } + } else { // HCF_CHARSET + assert(rhs[0] != NULL); + assert(rhs[1] == NULL); + return match_charset_production(table, eg, sym, rhs[0], endstate); + } + + return false; +} + // desugar parser with a fresh start symbol // this guarantees that the start symbol will not occur in any productions HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser) @@ -286,14 +326,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs); assert(lhss != NULL); H_FOREACH_KEY(lhss, HCFChoice *lhs) - assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET? - - for(HCFSequence **p=lhs->seq; *p; p++) { - HCFChoice **rhs = (*p)->items; - if(!match_production(eg, rhs, item->rhs, state)) { - continue; - } - + if(match_any_production(table, eg, lhs, item->rhs, state)) { // the left-hand symbol's follow set is this production's // contribution to the lookahead const HStringMap *fs = h_follow(1, eg->grammar, lhs); @@ -304,7 +337,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) // for each lookahead symbol, put action into table cell if(terminals_put(table->tmap[state], fs, action) < 0) inadeq = true; - } H_END_FOREACH // enhanced production + } + H_END_FOREACH // enhanced production H_END_FOREACH // reducible item if(inadeq) { @@ -331,7 +365,10 @@ void h_lalr_free(HParser *parser) HParserBackendVTable h__lalr_backend_vtable = { .compile = h_lalr_compile, .parse = h_lr_parse, - .free = h_lalr_free + .free = h_lalr_free, + .parse_start = h_lr_parse_start, + .parse_chunk = h_lr_parse_chunk, + .parse_finish = h_lr_parse_finish }; @@ -340,8 +377,6 @@ HParserBackendVTable h__lalr_backend_vtable = { // dummy! int test_lalr(void) { - HAllocator *mm__ = &system_allocator; - /* E -> E '-' T | T @@ -356,44 +391,24 @@ int test_lalr(void) h_bind_indirect(E, E_); HParser *p = E; - printf("\n==== G R A M M A R ====\n"); - HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); - if (g == NULL) { - fprintf(stderr, "h_cfgrammar failed\n"); + HCFGrammar *g = h_pprint_lr_info(stdout, p); + if(!g) return 1; - } - h_pprint_grammar(stdout, g, 0); - - printf("\n==== D F A ====\n"); - HLRDFA *dfa = h_lr0_dfa(g); - if (dfa) { - h_pprint_lrdfa(stdout, g, dfa, 0); - } else { - fprintf(stderr, "h_lalr_dfa failed\n"); - } - - printf("\n==== L R ( 0 ) T A B L E ====\n"); - HLRTable *table0 = h_lr0_table(g, dfa); - if (table0) { - h_pprint_lrtable(stdout, g, table0, 0); - } else { - fprintf(stderr, "h_lr0_table failed\n"); - } - h_lrtable_free(table0); - printf("\n==== L A L R T A B L E ====\n"); + fprintf(stdout, "\n==== L A L R T A B L E ====\n"); if (h_compile(p, PB_LALR, NULL)) { - fprintf(stderr, "does not compile\n"); + fprintf(stdout, "does not compile\n"); return 2; } h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0); - printf("\n==== P A R S E R E S U L T ====\n"); + fprintf(stdout, "\n==== P A R S E R E S U L T ====\n"); HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13); if (res) { h_pprint(stdout, res->ast, 0, 2); } else { - printf("no parse\n"); + fprintf(stdout, "no parse\n"); } + return 0; } diff --git a/src/backends/llk.c b/src/backends/llk.c index 89151e6f7fb3cc605ca6cbb7e498cdb1490776ea..0ab4610a29a1fcdefd1ca163ea2be8785b3ed0e6 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -12,6 +12,7 @@ static const size_t DEFAULT_KMAX = 1; * maps lookahead strings to productions (HCFSequence). */ typedef struct HLLkTable_ { + size_t kmax; HHashTable *rows; HCFChoice *start; // start symbol HArena *arena; @@ -188,6 +189,7 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HStringMap *row, */ static int fill_table(size_t kmax, HCFGrammar *g, HLLkTable *table) { + table->kmax = kmax; table->start = g->start; // iterate over g->nts @@ -259,56 +261,172 @@ void h_llk_free(HParser *parser) /* LL(k) driver */ -HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) +typedef struct { + HArena *arena; // will hold the results + HArena *tarena; // tmp, deleted after parse + HSlist *stack; + HCountedArray *seq; // accumulates current parse result + + uint8_t *buf; // for lookahead across chunk boundaries + // allocated to size 2*kmax + // new chunk starts at index kmax + // ( 0 ... kmax ... 2*kmax-1 ) + // \_old_/\______new_______/ + HInputStream win; // win.length is set to 0 when not in use +} HLLkState; + +// in order to construct the parse tree, we delimit the symbol stack into +// frames corresponding to production right-hand sides. since only left-most +// derivations are produced this linearization is unique. +// the 'mark' allocated below simply reserves a memory address to use as the +// frame delimiter. +// nonterminals, instead of being popped and forgotten, are put back onto the +// stack below the mark to tell us which validations and semantic actions to +// execute on their corresponding result. +// also on the stack below the mark, we store the previously accumulated +// value for the surrounding production. +static void const * const MARK = &MARK; // stack frame delimiter + +static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser) { const HLLkTable *table = parser->backend_data; assert(table != NULL); - HArena *arena = h_new_arena(mm__, 0); // will hold the results - HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse - HSlist *stack = h_slist_new(tarena); - HCountedArray *seq = h_carray_new(arena); // accumulates current parse result - - // in order to construct the parse tree, we delimit the symbol stack into - // frames corresponding to production right-hand sides. since only left-most - // derivations are produced this linearization is unique. - // the 'mark' allocated below simply reserves a memory address to use as the - // frame delimiter. - // nonterminals, instead of being popped and forgotten, are put back onto the - // stack below the mark to tell us which validations and semantic actions to - // execute on their corresponding result. - // also on the stack below the mark, we store the previously accumulated - // value for the surrounding production. - void *mark = h_arena_malloc(tarena, 1); + HLLkState *s = h_new(HLLkState, 1); + s->arena = h_new_arena(mm__, 0); + s->tarena = h_new_arena(mm__, 0); + s->stack = h_slist_new(s->tarena); + s->seq = h_carray_new(s->arena); + s->buf = h_arena_malloc(s->tarena, 2 * table->kmax); + + s->win.input = s->buf; + s->win.length = 0; // unused // initialize with the start symbol on the stack. - h_slist_push(stack, table->start); + h_slist_push(s->stack, table->start); + + return s; +} + +// helper: add new input to the lookahead window +static void append_win(size_t kmax, HLLkState *s, HInputStream *stream) +{ + assert(stream->bit_offset == 0); + assert(s->win.input == s->buf); + assert(s->win.length == kmax); + assert(s->win.index < kmax); + + size_t n = stream->length - stream->index; // bytes to copy + if(n > kmax) + n = kmax; + + memcpy(s->buf + kmax, stream->input + stream->index, n); + s->win.length += n; +} + +// helper: save old input to the lookahead window +static void save_win(size_t kmax, HLLkState *s, HInputStream *stream) +{ + assert(stream->bit_offset == 0); + + size_t len = stream->length - stream->index; + assert(len < kmax); + + if(len == 0) { + // stream empty? nothing to do. + return; + } else if(s->win.length > 0) { + // window active? should contain all of stream. + assert(s->win.length == kmax + len); + assert(s->win.index <= kmax); + + // shift contents down: + // + // (0 kmax ) + // ... \_old_/\_new_/ ... + // + // (0 kmax ) + // ... \_old_/\_new_/ ... + // + s->win.pos += len; // position of the window shifts up + len = s->win.length - s->win.index; + assert(len <= kmax); + memmove(s->buf + kmax - len, s->buf + s->win.index, len); + } else { + // window not active? save stream to window. + // buffer starts kmax bytes below chunk boundary + s->win.pos = stream->pos - kmax; + memcpy(s->buf + kmax - len, stream->input + stream->index, len); + } + + // metadata + s->win = *stream; + s->win.input = s->buf; + s->win.index = kmax - len; + s->win.length = kmax; +} + +// returns partial result or NULL (no parse) +static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, + HInputStream* chunk) +{ + HParsedToken *tok = NULL; // will hold result token + HCFChoice *x = NULL; // current symbol (from top of stack) + HInputStream *stream; + + assert(chunk->index == 0); + assert(chunk->bit_offset == 0); + + const HLLkTable *table = parser->backend_data; + assert(table != NULL); + + HArena *arena = s->arena; + HArena *tarena = s->tarena; + HSlist *stack = s->stack; + HCountedArray *seq = s->seq; + size_t kmax = table->kmax; + + if(!seq) + return NULL; // parse already failed + + if(s->win.length > 0) { + append_win(kmax, s, chunk); + stream = &s->win; + } else { + stream = chunk; + } // when we empty the stack, the parse is complete. while(!h_slist_empty(stack)) { + tok = NULL; + // pop top of stack for inspection - HCFChoice *x = h_slist_pop(stack); + x = h_slist_pop(stack); assert(x != NULL); - if(x != mark && x->type == HCF_CHOICE) { + if(x != MARK && x->type == HCF_CHOICE) { // x is a nonterminal; apply the appropriate production and continue - // push stack frame - h_slist_push(stack, seq); // save current partial value - h_slist_push(stack, x); // save the nonterminal - h_slist_push(stack, mark); // frame delimiter - - // open a fresh result sequence - seq = h_carray_new(arena); - // look up applicable production in parse table const HCFSequence *p = h_llk_lookup(table, x, stream); if(p == NULL) goto no_parse; + if(p == NEED_INPUT) { + save_win(kmax, s, chunk); + goto need_input; + } // an infinite loop case that shouldn't happen assert(!p->items[0] || p->items[0] != x); + // push stack frame + h_slist_push(stack, seq); // save current partial value + h_slist_push(stack, x); // save the nonterminal + h_slist_push(stack, (void *)MARK); // frame delimiter + + // open a fresh result sequence + seq = h_carray_new(arena); + // push production's rhs onto the stack (in reverse order) HCFChoice **s; for(s = p->items; *s; s++); @@ -319,15 +437,13 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* } // the top of stack is such that there will be a result... - HParsedToken *tok; // will hold result token tok = h_arena_malloc(arena, sizeof(HParsedToken)); - tok->index = stream->index; - tok->bit_offset = stream->bit_offset; - if(x == mark) { + if(x == MARK) { // hit stack frame boundary... // wrap the accumulated parse result, this sequence is finished tok->token_type = TT_SEQUENCE; tok->seq = seq; + // XXX would have to set token pos but we've forgotten pos of seq // recover original nonterminal and result sequence x = h_slist_pop(stack); @@ -337,18 +453,31 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* else { // x is a terminal or simple charset; match against input + tok->index = stream->pos + stream->index; + tok->bit_offset = stream->bit_offset; + // consume the input token uint8_t input = h_read_bits(stream, 8, false); + // when old chunk consumed from window, switch to new chunk + if(s->win.length > 0 && s->win.index >= kmax) { + s->win.length = 0; // disable the window + stream = chunk; + } + switch(x->type) { case HCF_END: if(!stream->overrun) goto no_parse; + if(!stream->last_chunk) + goto need_input; h_arena_free(arena, tok); tok = NULL; break; case HCF_CHAR: + if(stream->overrun) + goto need_input; if(input != x->chr) goto no_parse; tok->token_type = TT_UINT; @@ -357,7 +486,7 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* case HCF_CHARSET: if(stream->overrun) - goto no_parse; + goto need_input; if(!charset_isset(x->charset, input)) goto no_parse; tok->token_type = TT_UINT; @@ -373,8 +502,16 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* // 'tok' has been parsed; process it // perform token reshape if indicated - if(x->reshape) - tok = (HParsedToken *)x->reshape(make_result(arena, tok), x->user_data); + if(x->reshape) { + HParsedToken *t = x->reshape(make_result(arena, tok), x->user_data); + if(t) { + t->index = tok->index; + t->bit_offset = tok->bit_offset; + } else { + h_arena_free(arena, tok); + } + tok = t; + } // call validation and semantic action, if present if(x->pred && !x->pred(make_result(tarena, tok), x->user_data)) @@ -386,24 +523,82 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* h_carray_append(seq, tok); } + // success // since we started with a single nonterminal on the stack, seq should // contain exactly the parse result. assert(seq->used == 1); - h_delete_arena(tarena); - return make_result(arena, seq->elements[0]); + return seq; no_parse: - h_delete_arena(tarena); h_delete_arena(arena); + s->arena = NULL; return NULL; + + need_input: + if(stream->last_chunk) + goto no_parse; + if(tok) + h_arena_free(arena, tok); // no result, yet + h_slist_push(stack, x); // try this symbol again next time + return seq; } +static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s) +{ + HParseResult *res = NULL; + + if(s->seq) { + assert(s->seq->used == 1); + res = make_result(s->arena, s->seq->elements[0]); + } + + h_delete_arena(s->tarena); + h_free(s); + return res; +} + +HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) +{ + HLLkState *s = llk_parse_start_(mm__, parser); + + assert(stream->last_chunk); + s->seq = llk_parse_chunk_(s, parser, stream); + + HParseResult *res = llk_parse_finish_(mm__, s); + if(res) + res->bit_length = stream->index * 8 + stream->bit_offset; + + return res; +} + +void h_llk_parse_start(HSuspendedParser *s) +{ + s->backend_state = llk_parse_start_(s->mm__, s->parser); +} + +bool h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input) +{ + HLLkState *state = s->backend_state; + + state->seq = llk_parse_chunk_(state, s->parser, input); + + return (state->seq == NULL || h_slist_empty(state->stack)); +} + +HParseResult *h_llk_parse_finish(HSuspendedParser *s) +{ + return llk_parse_finish_(s->mm__, s->backend_state); +} HParserBackendVTable h__llk_backend_vtable = { .compile = h_llk_compile, .parse = h_llk_parse, - .free = h_llk_free + .free = h_llk_free, + + .parse_start = h_llk_parse_start, + .parse_chunk = h_llk_parse_chunk, + .parse_finish = h_llk_parse_finish }; diff --git a/src/backends/lr.c b/src/backends/lr.c index e7f237756361303102440700af4ceb5fcfb5abdf..fb256c0bfafa0b6c53b32307bea64f61d4885919 100644 --- a/src/backends/lr.c +++ b/src/backends/lr.c @@ -163,7 +163,7 @@ HLRAction *h_reduce_action(HArena *arena, const HLRItem *item) } // adds 'new' to the branches of 'action' -// returns a 'action' if it is already of type HLR_CONFLICT +// returns 'action' if it is already of type HLR_CONFLICT // allocates a new HLRAction otherwise HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new) { @@ -199,15 +199,14 @@ bool h_lrtable_row_empty(const HLRTable *table, size_t i) /* LR driver */ -HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, - const HInputStream *stream) +static +HLREngine *h_lrengine_new_(HArena *arena, HArena *tarena, const HLRTable *table) { HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); engine->table = table; engine->state = 0; engine->stack = h_slist_new(tarena); - engine->input = *stream; engine->merged[0] = NULL; engine->merged[1] = NULL; engine->arena = arena; @@ -216,6 +215,14 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, return engine; } +HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, + const HInputStream *stream) +{ + HLREngine *engine = h_lrengine_new_(arena, tarena, table); + engine->input = *stream; + return engine; +} + static const HLRAction * terminal_lookup(const HLREngine *engine, const HInputStream *stream) { @@ -260,6 +267,8 @@ static HParsedToken *consume_input(HLREngine *engine) v = h_arena_malloc(engine->arena, sizeof(HParsedToken)); v->token_type = TT_UINT; v->uint = c; + v->index = engine->input.pos + engine->input.index - 1; + v->bit_offset = engine->input.bit_offset; } return v; @@ -302,18 +311,28 @@ bool h_lrengine_step(HLREngine *engine, const HLRAction *action) value->index = v->index; value->bit_offset = v->bit_offset; } else { - // XXX how to get the position in this case? + // result position is current input position XXX ? + value->index = engine->input.pos + engine->input.index; + value->bit_offset = engine->input.bit_offset; } // perform token reshape if indicated - if(symbol->reshape) - value = (HParsedToken *)symbol->reshape(make_result(arena, value), symbol->user_data); + if(symbol->reshape) { + v = symbol->reshape(make_result(arena, value), symbol->user_data); + if(v) { + v->index = value->index; + v->bit_offset = value->bit_offset; + } else { + h_arena_free(arena, value); + } + value = v; + } // call validation and semantic action, if present if(symbol->pred && !symbol->pred(make_result(tarena, value), symbol->user_data)) return false; // validation failed -> no parse; terminate if(symbol->action) - value = (HParsedToken *)symbol->action(make_result(arena, value), symbol->user_data); + value = symbol->action(make_result(arena, value), symbol->user_data); // this is LR, building a right-most derivation bottom-up, so no reduce can // follow a reduce. we can also assume no conflict follows for GLR if we @@ -351,7 +370,9 @@ HParseResult *h_lrengine_result(HLREngine *engine) // on top of the stack is the start symbol's semantic value assert(!h_slist_empty(engine->stack)); HParsedToken *tok = engine->stack->head->elem; - return make_result(engine->arena, tok); + HParseResult *res = make_result(engine->arena, tok); + res->bit_length = (engine->input.pos + engine->input.index) * 8; + return res; } else { return NULL; } @@ -377,7 +398,53 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* return result; } +void h_lr_parse_start(HSuspendedParser *s) +{ + HLRTable *table = s->parser->backend_data; + assert(table != NULL); + + HArena *arena = h_new_arena(s->mm__, 0); // will hold the results + HArena *tarena = h_new_arena(s->mm__, 0); // tmp, deleted after parse + HLREngine *engine = h_lrengine_new_(arena, tarena, table); + s->backend_state = engine; +} + +bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream) +{ + HLREngine *engine = s->backend_state; + engine->input = *stream; + + bool run = true; + while(run) { + // check input against table to determine which action to take + const HLRAction *action = h_lrengine_action(engine); + if(action == NEED_INPUT) { + // XXX assume lookahead 1 + assert(engine->input.length - engine->input.index == 0); + break; + } + + // execute action + run = h_lrengine_step(engine, action); + if(engine->input.overrun && !engine->input.last_chunk) + break; + } + + *stream = engine->input; + return !run; // done if engine no longer running +} + +HParseResult *h_lr_parse_finish(HSuspendedParser *s) +{ + HLREngine *engine = s->backend_state; + + HParseResult *result = h_lrengine_result(engine); + if(!result) + h_delete_arena(engine->arena); + h_delete_arena(engine->tarena); + return result; +} /* Pretty-printers */ @@ -536,3 +603,35 @@ void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, fputc('\n', f); #endif } + +HCFGrammar *h_pprint_lr_info(FILE *f, HParser *p) +{ + HAllocator *mm__ = &system_allocator; + + fprintf(f, "\n==== G R A M M A R ====\n"); + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); + if (g == NULL) { + fprintf(f, "h_cfgrammar failed\n"); + return NULL; + } + h_pprint_grammar(f, g, 0); + + fprintf(f, "\n==== D F A ====\n"); + HLRDFA *dfa = h_lr0_dfa(g); + if (dfa) { + h_pprint_lrdfa(f, g, dfa, 0); + } else { + fprintf(f, "h_lalr_dfa failed\n"); + } + + fprintf(f, "\n==== L R ( 0 ) T A B L E ====\n"); + HLRTable *table0 = h_lr0_table(g, dfa); + if (table0) { + h_pprint_lrtable(f, g, table0, 0); + } else { + fprintf(f, "h_lr0_table failed\n"); + } + h_lrtable_free(table0); + + return g; +} diff --git a/src/backends/lr.h b/src/backends/lr.h index 8f1eadd9059330b23c77e58aedfd680690b07950..724d126ce106e6ed98f86fd7e30c1d42938dd1cd 100644 --- a/src/backends/lr.h +++ b/src/backends/lr.h @@ -134,6 +134,9 @@ const HLRAction *h_lrengine_action(const HLREngine *engine); bool h_lrengine_step(HLREngine *engine, const HLRAction *action); HParseResult *h_lrengine_result(HLREngine *engine); HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream); +void h_lr_parse_start(HSuspendedParser *s); +bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream); +HParseResult *h_lr_parse_finish(HSuspendedParser *s); HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream); void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item); @@ -143,5 +146,6 @@ void h_pprint_lrdfa(FILE *f, const HCFGrammar *g, const HLRDFA *dfa, unsigned int indent); void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, unsigned int indent); +HCFGrammar *h_pprint_lr_info(FILE *f, HParser *p); #endif diff --git a/src/backends/lr0.c b/src/backends/lr0.c index 1c86484e61300ec40362a9abb47105424ddff2b9..a02df9e11da52ca3835e390487062a5a76ea0a31 100644 --- a/src/backends/lr0.c +++ b/src/backends/lr0.c @@ -30,9 +30,9 @@ static void expand_to_closure(HCFGrammar *g, HHashSet *items) HCFChoice *sym = item->rhs[item->mark]; // symbol after mark // if there is a non-terminal after the mark, follow it + // and add items corresponding to the productions of sym // NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here - if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) { - // add items corresponding to the productions of sym + if(sym != NULL) { if(sym->type == HCF_CHOICE) { for(HCFSequence **p=sym->seq; *p; p++) { HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0); @@ -41,7 +41,7 @@ static void expand_to_closure(HCFGrammar *g, HHashSet *items) h_slist_push(work, it); } } - } else { // HCF_CHARSET + } else if(sym->type == HCF_CHARSET) { for(unsigned int i=0; i<256; i++) { if(charset_isset(sym->charset, i)) { // XXX allocate these single-character symbols statically somewhere @@ -93,8 +93,8 @@ HLRDFA *h_lr0_dfa(HCFGrammar *g) // compute closure // if destination is a new state: // add it to state set - // add transition to it // add it to the work list + // add transition to it while(!h_slist_empty(work)) { size_t state_idx = (uintptr_t)h_slist_pop(work); diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 33082c6c278beb09b2abf767e5314d18ab471db4..f21e22ab00cdf7bd2fe906ddc9b82dd3c66382c3 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -3,7 +3,7 @@ #include "../internal.h" #include "../parsers/parser_internal.h" -// short-hand for creating cache values (regular case) +// short-hand for creating lowlevel parse cache values (parse result case) static HParserCacheValue * cached_result(HParseState *state, HParseResult *result) { HParserCacheValue *ret = a_new(HParserCacheValue, 1); @@ -13,7 +13,7 @@ HParserCacheValue * cached_result(HParseState *state, HParseResult *result) { return ret; } -// short-hand for caching parse results (left recursion case) +// short-hand for creating lowlevel parse cache values (left recursion case) static HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) { HParserCacheValue *ret = a_new(HParserCacheValue, 1); @@ -126,7 +126,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) h_hashtable_put(state->recursion_heads, &k->input_pos, head); HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); if (!old_cached || PC_LEFT == old_cached->value_type) - errx(1, "impossible match"); + h_platform_errx(1, "impossible match"); HParseResult *old_res = old_cached->right; // rewind the input @@ -148,7 +148,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) state->input_stream = cached->input_stream; return cached->right; } else { - errx(1, "impossible match"); + h_platform_errx(1, "impossible match"); } } } else { @@ -173,7 +173,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab return grow(k, state, growable->head); } } else { - errx(1, "lrAnswer with no head"); + h_platform_errx(1, "lrAnswer with no head"); } } @@ -181,25 +181,34 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab HParseResult* h_do_parse(const HParser* parser, HParseState *state) { HParserCacheKey *key = a_new(HParserCacheKey, 1); key->input_pos = state->input_stream; key->parser = parser; - HParserCacheValue *m = recall(key, state); + HParserCacheValue *m = NULL; + if (parser->vtable->higher) { + m = recall(key, state); + } // check to see if there is already a result for this object... if (!m) { // It doesn't exist, so create a dummy result to cache - HLeftRec *base = a_new(HLeftRec, 1); - base->seed = NULL; base->rule = parser; base->head = NULL; - h_slist_push(state->lr_stack, base); - // cache it - h_hashtable_put(state->cache, key, cached_lr(state, base)); - // parse the input + HLeftRec *base = NULL; + // But only cache it now if there's some chance it could grow; primitive parsers can't + if (parser->vtable->higher) { + base = a_new(HLeftRec, 1); + base->seed = NULL; base->rule = parser; base->head = NULL; + h_slist_push(state->lr_stack, base); + // cache it + h_hashtable_put(state->cache, key, cached_lr(state, base)); + // parse the input + } HParseResult *tmp_res = perform_lowlevel_parse(state, parser); - // the base variable has passed equality tests with the cache - h_slist_pop(state->lr_stack); - // update the cached value to our new position - HParserCacheValue *cached = h_hashtable_get(state->cache, key); - assert(cached != NULL); - cached->input_stream = state->input_stream; + if (parser->vtable->higher) { + // the base variable has passed equality tests with the cache + h_slist_pop(state->lr_stack); + // update the cached value to our new position + HParserCacheValue *cached = h_hashtable_get(state->cache, key); + assert(cached != NULL); + cached->input_stream = state->input_stream; + } // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one - if (NULL == base->head) { + if (!base || NULL == base->head) { h_hashtable_put(state->cache, key, cached_result(state, tmp_res)); return tmp_res; } else { diff --git a/src/backends/regex.c b/src/backends/regex.c index c4f6a2bfffbcd65640febe01813694694583d6c6..f6494fa98afea084ab347511ec0f25dc0e11379c 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -184,11 +184,15 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ -void svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) { +bool svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) { if (ctx->stack_count + addl >= ctx->stack_capacity) { ctx->stack = mm__->realloc(mm__, ctx->stack, sizeof(*ctx->stack) * (ctx->stack_capacity *= 2)); - // TODO: check for realloc failure + if (!ctx->stack) { + return false; + } + return true; } + return true; } HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, const uint8_t *input, int len) { @@ -204,7 +208,9 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, for (cur = trace; cur; cur = cur->next) { switch (cur->opcode) { case SVM_PUSH: - svm_stack_ensure_cap(mm__, &ctx, 1); + if (!svm_stack_ensure_cap(mm__, &ctx, 1)) { + goto fail; + } tmp_res = a_new(HParsedToken, 1); tmp_res->token_type = TT_MARK; tmp_res->index = cur->input_pos; @@ -264,7 +270,9 @@ uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* e size_t array_size = (prog->action_count + 1) * 2; // action_count+1 is a // power of two prog->actions = prog->allocator->realloc(prog->allocator, prog->actions, array_size * sizeof(*prog->actions)); - // TODO: Handle the allocation failed case nicely. + if (!prog->actions) { + longjmp(prog->except, 1); + } } HSVMAction *action = &prog->actions[prog->action_count]; @@ -280,7 +288,9 @@ uint16_t h_rvm_insert_insn(HRVMProg *prog, HRVMOp op, uint16_t arg) { size_t array_size = (prog->length + 1) * 2; // action_count+1 is a // power of two prog->insns = prog->allocator->realloc(prog->allocator, prog->insns, array_size * sizeof(*prog->insns)); - // TODO: Handle the allocation failed case nicely. + if (!prog->insns) { + longjmp(prog->except, 1); + } } prog->insns[prog->length].op = op; @@ -360,12 +370,16 @@ static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params prog->insns = NULL; prog->actions = NULL; prog->allocator = mm__; + if (setjmp(prog->except)) { + return false; + } if (!h_compile_regex(prog, parser)) { h_free(prog->insns); h_free(prog->actions); h_free(prog); return 2; } + memset(prog->except, 0, sizeof(prog->except)); h_rvm_insert_insn(prog, RVM_ACCEPT, 0); parser->backend_data = prog; return 0; diff --git a/src/backends/regex.h b/src/backends/regex.h index 4ea85a884b3fdec764f9ad94fc4c48880a0a6ea2..0c51fd0971b375860759fde73beebdce98c622e1 100644 --- a/src/backends/regex.h +++ b/src/backends/regex.h @@ -7,6 +7,8 @@ #ifndef HAMMER_BACKEND_REGEX__H #define HAMMER_BACKEND_REGEX__H +#include <setjmp.h> + // each insn is an 8-bit opcode and a 16-bit parameter // [a] are actions; they add an instruction to the stackvm that is being output. // [m] are match ops; they can either succeed or fail, depending on the current character @@ -57,6 +59,7 @@ struct HRVMProg_ { size_t action_count; HRVMInsn *insns; HSVMAction *actions; + jmp_buf except; }; // Returns true IFF the provided parser could be compiled. diff --git a/src/backends/regex_debug.c b/src/backends/regex_debug.c index 5ca6ca42624a8149eb472e5fabaac59ca1ec248d..5207d9e19a320df5620276a5a0503ffad854ddc0 100644 --- a/src/backends/regex_debug.c +++ b/src/backends/regex_debug.c @@ -1,15 +1,17 @@ // Intended to be included from regex_debug.c -#define _GNU_SOURCE -#include <stdio.h> +#include "../platform.h" #include <stdlib.h> +#define USE_DLADDR (0) - +#if USE_DLADDR // This is some spectacularly non-portable code... but whee! #include <dlfcn.h> -char* getsym(void* addr) { +#endif + +char* getsym(HSVMActionFunc addr) { char* retstr; -#if 0 +#if USE_DLADDR // This will be fixed later. Dl_info dli; if (dladdr(addr, &dli) != 0 && dli.dli_sname != NULL) { @@ -19,7 +21,7 @@ char* getsym(void* addr) { return retstr; } else #endif - if (asprintf(&retstr, "%p", addr) > 0) + if (h_platform_asprintf(&retstr, "%p", addr) > 0) return retstr; else return NULL; @@ -59,7 +61,7 @@ void dump_rvm_prog(HRVMProg *prog) { symref = getsym(prog->actions[insn->arg].action); // TODO: somehow format the argument to action printf("%s\n", symref); - free(symref); + (&system_allocator)->free(&system_allocator, symref); break; case RVM_MATCH: { uint8_t low, high; @@ -95,7 +97,7 @@ void dump_svm_prog(HRVMProg *prog, HRVMTrace *trace) { symref = getsym(prog->actions[trace->arg].action); // TODO: somehow format the argument to action printf("%s\n", symref); - free(symref); + (&system_allocator)->free(&system_allocator, symref); break; default: printf("\n"); diff --git a/src/benchmark.c b/src/benchmark.c index ce416dad99fcb39504f9dd2bb2d2ba21f6b0071e..b6a2876fa0a1a85711c610b1d2bc5f1143c77f87 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -1,19 +1,10 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include <time.h> #include <string.h> #include "hammer.h" #include "internal.h" - -#ifdef __MACH__ -#include <mach/clock.h> -#include <mach/mach.h> -#endif - -#ifdef __NetBSD__ -#include <sys/resource.h> -#endif +#include "platform.h" static const char* HParserBackendNames[] = { "Packrat", @@ -23,38 +14,6 @@ static const char* HParserBackendNames[] = { "GLR" }; -void h_benchmark_clock_gettime(struct timespec *ts) { - if (ts == NULL) - return; -#ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time - /* - * This returns real time, not CPU time. See http://stackoverflow.com/a/6725161 - * Possible solution: http://stackoverflow.com/a/11659289 - */ - clock_serv_t cclock; - mach_timespec_t mts; - host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); - clock_get_time(cclock, &mts); - mach_port_deallocate(mach_task_self(), cclock); - ts->tv_sec = mts.tv_sec; - ts->tv_nsec = mts.tv_nsec; -#elif defined(__NetBSD__) - // NetBSD doesn't have CLOCK_THREAD_CPUTIME_ID. We'll use getrusage instead - struct rusage rusage; - getrusage(RUSAGE_SELF, &rusage); - ts->tv_nsec = (rusage.ru_utime.tv_usec + rusage.ru_stime.tv_usec) * 1000; - // not going to overflow; can be at most 2e9-2 - ts->tv_sec = rusage.ru_utime.tv_sec + rusage.ru_utime.tv_sec; - if (ts->tv_nsec >= 1000000000) { - ts->tv_nsec -= 1000000000; // subtract a second - ts->tv_sec += 1; // add it back. - } - assert (ts->tv_nsec <= 1000000000); -#else - clock_gettime(CLOCK_THREAD_CPUTIME_ID, ts); -#endif -} - /* Usage: Create your parser (i.e., const HParser*), and an array of test cases @@ -107,21 +66,21 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest HParseResult *res = h_parse(parser, tc->input, tc->length); char* res_unamb; if (res != NULL) { - res_unamb = h_write_result_unamb(res->ast); + res_unamb = h_write_result_unamb(res->ast); } else - res_unamb = NULL; + res_unamb = NULL; if ((res_unamb == NULL && tc->output_unambiguous != NULL) - || (res_unamb != NULL && strcmp(res_unamb, tc->output_unambiguous) != 0)) { - // test case failed... - fprintf(stderr, "Parsing with %s failed\n", HParserBackendNames[backend]); - // We want to run all testcases, for purposes of generating a - // report. (eg, if users are trying to fix a grammar for a - // faster backend) - tc_failed++; - ret->results[backend].failed_testcases++; + || (res_unamb != NULL && strcmp(res_unamb, tc->output_unambiguous) != 0)) { + // test case failed... + fprintf(stderr, "Parsing with %s failed\n", HParserBackendNames[backend]); + // We want to run all testcases, for purposes of generating a + // report. (eg, if users are trying to fix a grammar for a + // faster backend) + tc_failed++; + ret->results[backend].failed_testcases++; } h_parse_result_free(res); - free(res_unamb); + (&system_allocator)->free(&system_allocator, res_unamb); } if (tc_failed > 0) { @@ -135,20 +94,16 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest for (tc = testcases; tc->input != NULL; tc++) { // The goal is to run each testcase for at least 50ms each - // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer) int count = 1, cur; - struct timespec ts_start, ts_end; int64_t time_diff; do { - count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway. - h_benchmark_clock_gettime(&ts_start); - for (cur = 0; cur < count; cur++) { - h_parse_result_free(h_parse(parser, tc->input, tc->length)); - } - h_benchmark_clock_gettime(&ts_end); - - // time_diff is in ns - time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); + count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway. + struct HStopWatch stopwatch; + h_platform_stopwatch_reset(&stopwatch); + for (cur = 0; cur < count; cur++) { + h_parse_result_free(h_parse(parser, tc->input, tc->length)); + } + time_diff = h_platform_stopwatch_ns(&stopwatch); } while (time_diff < 100000000); ret->results[backend].cases[cur_case].parse_time = (time_diff / count); ret->results[backend].cases[cur_case].length = tc->length; diff --git a/src/bindings/cpp/hammer/hammer.hpp b/src/bindings/cpp/hammer/hammer.hpp index b9de6716a93a844227bd5317699a5fb0726c66a2..f3939ab351865b3aa5a78540b3aa5450a20c5dee 100644 --- a/src/bindings/cpp/hammer/hammer.hpp +++ b/src/bindings/cpp/hammer/hammer.hpp @@ -2,6 +2,7 @@ #define HAMMER_HAMMER__HPP #include "../../../hammer.h" +#include "../../../internal.h" #include <string> #include <stdint.h> #include <cstdarg> @@ -49,7 +50,7 @@ namespace hammer { std::string asUnambiguous() { char* buf = h_write_result_unamb(token); std::string s = std::string(buf); - free(buf); + (&system_allocator)->free(&system_allocator, buf); return s; } }; diff --git a/src/bindings/cpp/hammer/hammer_test.hpp b/src/bindings/cpp/hammer/hammer_test.hpp index 77e6daa3fccd31d296d3d364a75bcc0cce5d7354..f3ab77a377d569a3b42255b050e7d6d8324837aa 100644 --- a/src/bindings/cpp/hammer/hammer_test.hpp +++ b/src/bindings/cpp/hammer/hammer_test.hpp @@ -5,7 +5,7 @@ #include <gtest/gtest.h> #include <hammer/hammer.hpp> -#define HAMMER_DECL_UNUSED __attribute__((unused)) +#define HAMMER_DECL_UNUSED H_GCC_ATTRIBUTE((unused)) static ::testing::AssertionResult ParseFails (hammer::Parser parser, const std::string &input) HAMMER_DECL_UNUSED; diff --git a/src/bindings/desugar-header.pl b/src/bindings/desugar-header.pl index 5bdd11e665b86af623583a94002551795d7b9ade..e836ad7a8bd0af6299008696f22f2f9a549d4b82 100644 --- a/src/bindings/desugar-header.pl +++ b/src/bindings/desugar-header.pl @@ -11,7 +11,7 @@ while(<>) { } elsif (/^HAMMER_FN_DECL\(([^,]*), ([^,]*), ([^)]*)\);/) { print "$1 $2($3);\n"; print "$1 $2__m(HAllocator* mm__, $3);\n"; - } elsif (/^HAMMER_FN_DECL_VARARGS_ATTR\((__attribute__\(\([^)]*\)\)), ([^,]*), ([^,]*), ([^)]*)\);/) { + } elsif (/^HAMMER_FN_DECL_VARARGS_ATTR\((H_GCC_ATTRIBUTE\(\([^)]*\)\)), ([^,]*), ([^,]*), ([^)]*)\);/) { print "$2 $3($4, ...);\n"; print "$2 $3__m(HAllocator *mm__, $4, ...);\n"; print "$2 $3__a(void* args);\n"; diff --git a/src/bindings/dotnet/SConscript b/src/bindings/dotnet/SConscript index 94f874ee41cc4741cff950ef4a88478dcfc06b31..afa4c30d3d8dcc0b11b502ecec5db8dc126628c4 100644 --- a/src/bindings/dotnet/SConscript +++ b/src/bindings/dotnet/SConscript @@ -27,7 +27,7 @@ csfiles = os.path.join(thisdir, "*.cs") # target to stand in for. hammer_wrap = AlwaysBuild(dotnetenv.Command(['hammer_wrap.c'], swig, ["rm %s/*.cs || true" % (thisdir,), - "swig $SWIGFLAGS $SOURCE"])) + "swig3.0 $SWIGFLAGS $SOURCE"])) libhammer_dotnet = dotnetenv.SharedLibrary(['hammer_dotnet'], hammer_wrap) hammer_dll = AlwaysBuild(dotnetenv.Command(['hammer.dll'], Glob('ext/*.cs'), '$CSC -t:library -unsafe -out:$TARGET %s/*.cs $SOURCE' %(thisdir,))) diff --git a/src/bindings/perl/SConscript b/src/bindings/perl/SConscript index 49b693a7035cabfe1914c0a2fc172d31a07e23dd..8a192a5a3ac05e5b1f83473f13fa3631d252b300 100644 --- a/src/bindings/perl/SConscript +++ b/src/bindings/perl/SConscript @@ -20,7 +20,7 @@ if 'PERL5LIB' in os.environ: swig = ['hammer.i'] -hammer_wrap = perlenv.Command(['hammer_wrap.c', 'hammer.pm'], swig, "swig $SWIGFLAGS $SOURCE") +hammer_wrap = perlenv.Command(['hammer_wrap.c', 'hammer.pm'], swig, "swig3.0 $SWIGFLAGS $SOURCE") makefile = perlenv.Command(['Makefile'], ['Makefile.PL'], "perl $SOURCE CC=" + perlenv['ENV']['CC']) targetdir = os.path.dirname(str(hammer_wrap[0].path)) diff --git a/src/bindings/php/SConscript b/src/bindings/php/SConscript index 34728af238c9a1b3ad478737e997921e8a0ff0b8..6791cbcc46d6c4f67fda5c756d46570ee8347c29 100644 --- a/src/bindings/php/SConscript +++ b/src/bindings/php/SConscript @@ -11,7 +11,7 @@ phpenv.Append(LIBS = ['hammer']) phpenv.Append(LIBPATH = ['../../']) swig = ['hammer.i'] -bindings_src = phpenv.Command(['hammer.php', 'hammer_wrap.c', 'php_hammer.h'], swig, 'swig -php -DHAMMER_INTERNAL__NO_STDARG_H -Isrc/ $SOURCE') +bindings_src = phpenv.Command(['hammer.php', 'hammer_wrap.c', 'php_hammer.h'], swig, 'swig3.0 -php -DHAMMER_INTERNAL__NO_STDARG_H -Isrc/ $SOURCE') libhammer_php = phpenv.SharedLibrary('hammer', ['hammer_wrap.c']) Default(swig, bindings_src, libhammer_php) diff --git a/src/bindings/python/SConscript b/src/bindings/python/SConscript index dac2d9596a58fdd2e8dd4edbcde46aa31b4d6024..5c7e4744def2572987be5411ab4542a2431d5cd4 100644 --- a/src/bindings/python/SConscript +++ b/src/bindings/python/SConscript @@ -7,7 +7,7 @@ pythonenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES = 0) swig = pythonenv.Command("hammer.i", "../swig/hammer.i", Copy("$TARGET", "$SOURCE")) setup = ['setup.py'] pydir = os.path.join(env['BUILD_BASE'], 'src/bindings/python') -libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], 'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --inplace') +libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], 'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --swig=swig3.0 --inplace') Default(libhammer_python) pytestenv = pythonenv.Clone() diff --git a/src/bitwriter.c b/src/bitwriter.c index 451815bd61f31ebd1f225c7f30f7e9f741680b90..74e273448aded2ae40ec99666f230447054c0dd0 100644 --- a/src/bitwriter.c +++ b/src/bitwriter.c @@ -13,10 +13,14 @@ HBitWriter *h_bit_writer_new(HAllocator* mm__) { HBitWriter *writer = h_new(HBitWriter, 1); memset(writer, 0, sizeof(*writer)); writer->buf = mm__->alloc(mm__, writer->capacity = 8); + if (!writer) { + return NULL; + } memset(writer->buf, 0, writer->capacity); writer->mm__ = mm__; writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN; - + writer->error = 0; + return writer; } @@ -37,6 +41,10 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) { size_t old_capacity = w->capacity; while (w->index + nbytes >= w->capacity) { w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2); + if (!w->buf) { + w->error = 1; + return; + } } if (old_capacity != w->capacity) diff --git a/src/cfgrammar.c b/src/cfgrammar.c index a8761b8d537ec236f7a4876e1ad86a30742df988..77e7ecad7ea1a70597a4c7c70ee21d9184a6c672 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -349,6 +349,7 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en return m->epsilon_branch; } +// A NULL result means no parse. NEED_INPUT means lookahead is too short. void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) { while(m) { @@ -362,9 +363,13 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) // reading bits from it does not consume them from the real input. uint8_t c = h_read_bits(&lookahead, 8, false); - if (lookahead.overrun) { // end of input - // XXX assumption of byte-wise grammar and input - return m->end_branch; + if (lookahead.overrun) { // end of chunk + if (lookahead.last_chunk) { // end of input + // XXX assumption of byte-wise grammar and input + return m->end_branch; + } else { + return NEED_INPUT; + } } // no match yet, descend @@ -672,7 +677,7 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, } -void h_pprint_char(FILE *f, char c) +void h_pprint_char(FILE *f, uint8_t c) { switch(c) { case '"': fputs("\\\"", f); break; @@ -685,12 +690,12 @@ void h_pprint_char(FILE *f, char c) if (isprint((int)c)) { fputc(c, f); } else { - fprintf(f, "\\x%.2X", c); + fprintf(f, "\\x%.2X", (unsigned int)c); } } } -static void pprint_charset_char(FILE *f, char c) +static void pprint_charset_char(FILE *f, uint8_t c) { switch(c) { case '"': fputc(c, f); break; @@ -896,8 +901,8 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, if (map->epsilon_branch) { if (!first) { fputc(sep, file); - first=false; } + first=false; if (n==0) { fputs("\"\"", file); } else { @@ -915,8 +920,8 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, if (map->end_branch) { if (!first) { fputs(",\"", file); - first=false; } + first=false; if (n>0) { fputs("\"\"", file); } diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 9cefc62e83f07048dc2a24f0cda1bde28ca72066..2e8ba83cee5c152baae1177ed7b99d45cf11042c 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -56,6 +56,9 @@ bool h_stringmap_empty(const HStringMap *m); static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c) { return h_hashtable_get(m->char_branches, (void *)char_key(c)); } +// dummy return value used by h_stringmap_get_lookahead when out of input +#define NEED_INPUT ((void *)-1) + /* Convert 'parser' into CFG representation by desugaring and compiling the set * of nonterminals. @@ -102,4 +105,4 @@ void h_pprint_stringset(FILE *file, const HStringMap *set, int indent); void h_pprint_stringmap(FILE *file, char sep, void (*valprint)(FILE *f, void *env, void *val), void *env, const HStringMap *map); -void h_pprint_char(FILE *file, char c); +void h_pprint_char(FILE *file, uint8_t c); diff --git a/src/compiler_specifics.h b/src/compiler_specifics.h new file mode 100644 index 0000000000000000000000000000000000000000..ed09d664fa52557ce5505f789c37ffb881a5f753 --- /dev/null +++ b/src/compiler_specifics.h @@ -0,0 +1,16 @@ +#ifndef HAMMER_COMPILER_SPECIFICS__H +#define HAMMER_COMPILER_SPECIFICS__H + +#if defined(__clang__) || defined(__GNUC__) +#define H_GCC_ATTRIBUTE(x) __attribute__(x) +#else +#define H_GCC_ATTRIBUTE(x) +#endif + +#if defined(_MSC_VER) +#define H_MSVC_DECLSPEC(x) __declspec(x) +#else +#define H_MSVC_DECLSPEC(x) +#endif + +#endif diff --git a/src/glue.c b/src/glue.c index cb3a7ce7de4dbc435da4ddefc4dfae956a3a063f..58fe4175d4fd326b62c76449449a74768605ca9e 100644 --- a/src/glue.c +++ b/src/glue.c @@ -106,11 +106,11 @@ HParsedToken *h_make_seqn(HArena *arena, size_t n) return ret; } -HParsedToken *h_make_bytes(HArena *arena, size_t len) +HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len) { HParsedToken *ret = h_make_(arena, TT_BYTES); ret->bytes.len = len; - ret->bytes.token = h_arena_malloc(arena, len); + ret->bytes.token = array; return ret; } diff --git a/src/glue.h b/src/glue.h index 6c1c56ca0e368bc407d846f342dd52ba934c9dda..0bbfe9cfa26ec1bb6376ff23aa3b2d6cc3b4e873 100644 --- a/src/glue.h +++ b/src/glue.h @@ -195,7 +195,7 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data); HParsedToken *h_make(HArena *arena, HTokenType type, void *value); HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n. -HParsedToken *h_make_bytes(HArena *arena, size_t len); +HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len); HParsedToken *h_make_sint(HArena *arena, int64_t val); HParsedToken *h_make_uint(HArena *arena, uint64_t val); @@ -203,7 +203,7 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); #define H_MAKE(TYP, VAL) h_make(p->arena, (HTokenType)TT_ ## TYP, VAL) #define H_MAKE_SEQ() h_make_seq(p->arena) #define H_MAKE_SEQN(N) h_make_seqn(p->arena, N) -#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) +#define H_MAKE_BYTES(VAL, LEN) h_make_bytes(p->arena, VAL, LEN) #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) diff --git a/src/hammer.c b/src/hammer.c index 6bb9ebb4febe53668a91ae9617ba05f2c158023d..70ebc8a4943d8e1b3a25e036a745c2296bf8ddfd 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -17,7 +17,6 @@ #include <assert.h> #include <ctype.h> -#include <err.h> #include <limits.h> #include <stdarg.h> #include <string.h> @@ -44,6 +43,7 @@ typedef struct { +#define DEFAULT_ENDIANNESS (BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN) HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { return h_parse__m(&system_allocator, parser, input, length); @@ -51,12 +51,14 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { // Set up a parse state... HInputStream input_stream = { + .pos = 0, .index = 0, .bit_offset = 0, .overrun = 0, - .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, + .endianness = DEFAULT_ENDIANNESS, .length = length, - .input = input + .input = input, + .last_chunk = true }; return backends[parser->backend]->parse(mm__, parser, &input_stream); @@ -97,3 +99,92 @@ int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, cons parser->backend = backend; return ret; } + + +HSuspendedParser* h_parse_start(const HParser* parser) { + return h_parse_start__m(&system_allocator, parser); +} +HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) { + if(!backends[parser->backend]->parse_start) + return NULL; + + // allocate and init suspended state + HSuspendedParser *s = h_new(HSuspendedParser, 1); + if(!s) + return NULL; + s->mm__ = mm__; + s->parser = parser; + s->backend_state = NULL; + s->done = false; + s->pos = 0; + s->bit_offset = 0; + s->endianness = DEFAULT_ENDIANNESS; + + // backend-specific initialization + // should allocate s->backend_state + backends[parser->backend]->parse_start(s); + + return s; +} + +bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) { + assert(backends[s->parser->backend]->parse_chunk != NULL); + + // no-op if parser is already done + if(s->done) + return true; + + // input + HInputStream input_stream = { + .pos = s->pos, + .index = 0, + .bit_offset = 0, + .overrun = 0, + .endianness = s->endianness, + .length = length, + .input = input, + .last_chunk = false + }; + + // process chunk + s->done = backends[s->parser->backend]->parse_chunk(s, &input_stream); + s->endianness = input_stream.endianness; + s->pos += input_stream.index; + s->bit_offset = input_stream.bit_offset; + + return s->done; +} + +HParseResult* h_parse_finish(HSuspendedParser* s) { + assert(backends[s->parser->backend]->parse_chunk != NULL); + assert(backends[s->parser->backend]->parse_finish != NULL); + + HAllocator *mm__ = s->mm__; + + // signal end of input if parser is not already done + if(!s->done) { + HInputStream empty = { + .pos = s->pos, + .index = 0, + .bit_offset = 0, + .overrun = 0, + .endianness = s->endianness, + .length = 0, + .input = NULL, + .last_chunk = true + }; + + s->done = backends[s->parser->backend]->parse_chunk(s, &empty); + assert(s->done); + } + + // extract result + HParseResult *r = backends[s->parser->backend]->parse_finish(s); + if(r) + r->bit_length = s->pos * 8 + s->bit_offset; + + // NB: backend should have freed backend_state + h_free(s); + + return r; +} diff --git a/src/hammer.h b/src/hammer.h index f893f10df4349d2ccc9d3a1c8c8675f60e014c9b..1be297c7a3b1230f2595ba47366a6591946b8777 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -17,6 +17,9 @@ #ifndef HAMMER_HAMMER__H #define HAMMER_HAMMER__H + +#include "compiler_specifics.h" + #ifndef HAMMER_INTERNAL__NO_STDARG_H #include <stdarg.h> #endif // HAMMER_INTERNAL__NO_STDARG_H @@ -48,6 +51,7 @@ typedef enum HParserBackend_ { typedef enum HTokenType_ { // Before you change the explicit values of these, think of the poor bindings ;_; + TT_INVALID = 0, TT_NONE = 1, TT_BYTES = 2, TT_SINT = 4, @@ -136,6 +140,8 @@ typedef struct HParser_ { HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ } HParser; +typedef struct HSuspendedParser_ HSuspendedParser; + /** * Type of an action to apply to an AST, used in the action() parser. * It can be any (user-defined) function that takes a HParseResult* @@ -261,6 +267,27 @@ typedef struct HBenchmarkResults_ { */ HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length); +/** + * Initialize a parser for iteratively consuming an input stream in chunks. + * This is only supported by some backends. + * + * Result is NULL if not supported by the backend. + */ +HAMMER_FN_DECL(HSuspendedParser*, h_parse_start, const HParser* parser); + +/** + * Run a suspended parser (as returned by h_parse_start) on a chunk of input. + * + * Returns true if the parser is done (needs no more input). + */ +bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length); + +/** + * Finish an iterative parse. Signals the end of input to the backend and + * returns the parse result. + */ +HParseResult* h_parse_finish(HSuspendedParser* s); + /** * Given a string, returns a parser that parses that string value. * @@ -268,6 +295,8 @@ HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* inp */ HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len); +#define h_literal(s) h_token(s, sizeof(s)-1) + /** * Given a single character, returns a parser that parses that * character. @@ -431,7 +460,7 @@ HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p); * * Result token type: TT_SEQUENCE */ -HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HParser* p); +HAMMER_FN_DECL_VARARGS_ATTR(H_GCC_ATTRIBUTE((sentinel)), HParser*, h_sequence, HParser* p); /** * Given an array of parsers, p_array, apply each parser in order. The @@ -440,7 +469,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HPa * * Result token type: The type of the first successful parser's result. */ -HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p); +HAMMER_FN_DECL_VARARGS_ATTR(H_GCC_ATTRIBUTE((sentinel)), HParser*, h_choice, HParser* p); /** * Given a null-terminated list of parsers, match a permutation phrase of these @@ -466,7 +495,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HPars * * Result token type: TT_SEQUENCE */ -HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_permutation, HParser* p); +HAMMER_FN_DECL_VARARGS_ATTR(H_GCC_ATTRIBUTE((sentinel)), HParser*, h_permutation, HParser* p); /** * Given two parsers, p1 and p2, this parser succeeds in the following diff --git a/src/internal.h b/src/internal.h index 0c4d4dc2739953c3cfffa487ea3bd73993698ebd..776f636811183a4b3e19de9de8c8ce5b2b66f27d 100644 --- a/src/internal.h +++ b/src/internal.h @@ -24,16 +24,21 @@ #define HAMMER_INTERNAL__H #include <stdint.h> #include <assert.h> -#include <err.h> #include <string.h> #include "hammer.h" +#include "platform.h" + +/* "Internal" in this case means "we're not ready to commit + * to a public API." Many structures and routines here will be + * useful in client programs. + */ #ifdef NDEBUG #define assert_message(check, message) do { } while(0) #else #define assert_message(check, message) do { \ if (!(check)) \ - errx(1, "Assertion failed (programmer error): %s", message); \ + h_platform_errx(1, "Assertion failed (programmer error): %s", message); \ } while(0) #endif @@ -67,13 +72,15 @@ typedef struct HCFStack_ HCFStack; typedef struct HInputStream_ { // This should be considered to be a really big value type. const uint8_t *input; + size_t pos; // position of this chunk in a multi-chunk stream size_t index; size_t length; char bit_offset; char margin; // The number of bits on the end that is being read // towards that should be ignored. char endianness; - char overrun; + bool overrun; + bool last_chunk; } HInputStream; typedef struct HSlistNode_ { @@ -148,20 +155,20 @@ static inline void h_sarray_clear(HSArray *arr) { typedef unsigned int *HCharset; static inline HCharset new_charset(HAllocator* mm__) { - HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int)); - memset(cs, 0, 256); + HCharset cs = h_new(unsigned int, 256 / (sizeof(unsigned int) * 8)); + memset(cs, 0, 32); // 32 bytes = 256 bits return cs; } static inline int charset_isset(HCharset cs, uint8_t pos) { - return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs)))); + return !!(cs[pos / (sizeof(*cs)*8)] & (1 << (pos % (sizeof(*cs)*8)))); } static inline void charset_set(HCharset cs, uint8_t pos, int val) { - cs[pos / sizeof(*cs)] = + cs[pos / (sizeof(*cs)*8)] = val - ? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs))) - : cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs))); + ? cs[pos / (sizeof(*cs)*8)] | (1 << (pos % (sizeof(*cs)*8))) + : cs[pos / (sizeof(*cs)*8)] & ~(1 << (pos % (sizeof(*cs)*8))); } typedef unsigned int HHashValue; @@ -205,10 +212,32 @@ struct HParseState_ { HSlist *symbol_table; // its contents are HHashTables }; +struct HSuspendedParser_ { + HAllocator *mm__; + const HParser *parser; + void *backend_state; + bool done; + + // input stream state + size_t pos; + uint8_t bit_offset; + uint8_t endianness; +}; + typedef struct HParserBackendVTable_ { int (*compile)(HAllocator *mm__, HParser* parser, const void* params); HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HInputStream* stream); void (*free)(HParser* parser); + + void (*parse_start)(HSuspendedParser *s); + // parse_start should allocate s->backend_state. + bool (*parse_chunk)(HSuspendedParser *s, HInputStream *input); + // if parser is done, return true. otherwise: + // parse_chunk MUST consume all input, integrating it into s->backend_state. + // parse_chunk will not be called again after it reports done. + HParseResult *(*parse_finish)(HSuspendedParser *s); + // parse_finish must free s->backend_state. + // parse_finish will not be called before parse_chunk reports done. } HParserBackendVTable; @@ -249,9 +278,9 @@ typedef struct HRecursionHead_ { /* A left recursion. * * Members: - * seed - - * rule - - * head - + * seed - the HResult yielded by rule + * rule - the HParser that produces seed + * head - the */ typedef struct HLeftRec_ { HParseResult *seed; @@ -282,6 +311,7 @@ struct HBitWriter_ { // of used bits in the current byte. i.e., 0 always // means that 8 bits are available for use. char flags; + char error; }; // }}} @@ -389,6 +419,7 @@ struct HParserVtable_ { bool (*isValidCF)(void *env); bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean. void (*desugar)(HAllocator *mm__, HCFStack *stk__, void *env); + bool higher; // false if primitive }; bool h_false(void*); diff --git a/src/parsers/action.c b/src/parsers/action.c index 04eb7a4c85c71f8ea3bc60b3371f052cc43d7603..a32433348c14c4d88b304e0b298e35f06f2dbd2e 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -81,6 +81,7 @@ static const HParserVtable action_vt = { .isValidCF = action_isValidCF, .desugar = desugar_action, .compile_to_rvm = action_ctrvm, + .higher = true, }; HParser* h_action(const HParser* p, const HAction a, void* user_data) { diff --git a/src/parsers/and.c b/src/parsers/and.c index c5c9836db57cc8864f785870a613e2ceb406b28c..e07bc9fcd36c91005d01eeef8d554a8202dce9ae 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -17,6 +17,7 @@ static const HParserVtable and_vt = { revision. --mlp, 18/12/12 */ .isValidCF = h_false, /* despite TODO above, this remains false. */ .compile_to_rvm = h_not_regular, + .higher = true, }; diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c index e8359ab03e06e681061dd75788d6ca6bb6e9b89b..f766774026074d49c9609891638eb33575211918 100644 --- a/src/parsers/attr_bool.c +++ b/src/parsers/attr_bool.c @@ -79,6 +79,7 @@ static const HParserVtable attr_bool_vt = { .isValidCF = ab_isValidCF, .desugar = desugar_ab, .compile_to_rvm = ab_ctrvm, + .higher = true, }; diff --git a/src/parsers/bind.c b/src/parsers/bind.c index f024a82fe9952efa82fed5dcdb4bf28b1d9e8545..7fa821dc7d9837ef717114e2245d7c31fa80cacb 100644 --- a/src/parsers/bind.c +++ b/src/parsers/bind.c @@ -60,6 +60,7 @@ static const HParserVtable bind_vt = { .isValidRegular = h_false, .isValidCF = h_false, .compile_to_rvm = h_not_regular, + .higher = true, }; HParser *h_bind(const HParser *p, HContinuation k, void *env) diff --git a/src/parsers/bits.c b/src/parsers/bits.c index 716524ce61a0a35cb0f9581646f72a0efa491c7f..be8f13f10a65f67e50d134c5f3557a1a7a209d62 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -102,6 +102,7 @@ static const HParserVtable bits_vt = { .isValidCF = h_true, .desugar = desugar_bits, .compile_to_rvm = bits_ctrvm, + .higher = false, }; HParser* h_bits(size_t len, bool sign) { diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c index f114a1fa5dbff8cdbee6bdf22670c271c2044e2e..24ece4bec6f7f80b0905401be6e72b10f73769f8 100644 --- a/src/parsers/butnot.c +++ b/src/parsers/butnot.c @@ -40,6 +40,7 @@ static const HParserVtable butnot_vt = { .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? .compile_to_rvm = h_not_regular, + .higher = true, }; HParser* h_butnot(const HParser* p1, const HParser* p2) { diff --git a/src/parsers/ch.c b/src/parsers/ch.c index b4386cff2be1e95158776323d50ff76b00f2afd5..3da1091a4b71505aebdc6ed5b396084d12b1fde4 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -47,6 +47,7 @@ static const HParserVtable ch_vt = { .isValidCF = h_true, .desugar = desugar_ch, .compile_to_rvm = ch_ctrvm, + .higher = false, }; HParser* h_ch(const uint8_t c) { diff --git a/src/parsers/charset.c b/src/parsers/charset.c index e1a910f8df149a16cb74fd7c661c5490e6d80198..a4b8c89c7daca326cf77ee9bf5c8ae4660884c56 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -76,6 +76,7 @@ static const HParserVtable charset_vt = { .isValidCF = h_true, .desugar = desugar_charset, .compile_to_rvm = cs_ctrvm, + .higher = false, }; HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { diff --git a/src/parsers/choice.c b/src/parsers/choice.c index bfc3f904f19a6d88ddc0bd77702b2c45f89c2b0f..dd3908ce93168f468ba6e1ff531a59476e404411 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -75,6 +75,7 @@ static const HParserVtable choice_vt = { .isValidCF = choice_isValidCF, .desugar = desugar_choice, .compile_to_rvm = choice_ctrvm, + .higher = true, }; HParser* h_choice(HParser* p, ...) { diff --git a/src/parsers/difference.c b/src/parsers/difference.c index 76a2cc447002da5a0e04119c016f7bf83fec443e..a24f5acf378c6b801677364f7a5902ae49ec60f1 100644 --- a/src/parsers/difference.c +++ b/src/parsers/difference.c @@ -39,6 +39,7 @@ static HParserVtable difference_vt = { .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? .compile_to_rvm = h_not_regular, + .higher = true, }; HParser* h_difference(const HParser* p1, const HParser* p2) { diff --git a/src/parsers/end.c b/src/parsers/end.c index 30b3ba121a859b87399a59dc04dc86f3a6104a88..85499d9348cd1df6503428a55d7a2ab878d1ef63 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -25,6 +25,7 @@ static const HParserVtable end_vt = { .isValidCF = h_true, .desugar = desugar_end, .compile_to_rvm = end_ctrvm, + .higher = false, }; HParser* h_end_p() { diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c index e3f53ab8225a75bde08ff7e3dd456822e1234b86..cb3abc3d3d2bf84dfe465aaec7833badbec2b5f6 100644 --- a/src/parsers/endianness.c +++ b/src/parsers/endianness.c @@ -46,6 +46,7 @@ static const HParserVtable endianness_vt = { .isValidCF = h_false, .desugar = NULL, .compile_to_rvm = h_not_regular, + .higher = true, }; HParser* h_with_endianness(char endianness, const HParser *p) diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index e8ef525ff79d523ab45c6357cfb852a6c3b4dd96..bb6e8beb31cca3ff09a565171b4e554e07f2ffad 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -18,6 +18,7 @@ static const HParserVtable epsilon_vt = { .isValidCF = h_true, .desugar = desugar_epsilon, .compile_to_rvm = epsilon_ctrvm, + .higher = false, }; HParser* h_epsilon_p() { diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index af606b0ea7567cf4e5068260386d907f10e0c8a7..c56802ac0885fc11429925f353a516d622b88a9d 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -49,6 +49,7 @@ static const HParserVtable ignore_vt = { .isValidCF = ignore_isValidCF, .desugar = desugar_ignore, .compile_to_rvm = ignore_ctrvm, + .higher = true, }; HParser* h_ignore(const HParser* p) { diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c index e562136fdf94eb28cf3f0796463d72f22f42932a..07bdc65c16b7eb1fb6b868999482fdb23dffa700 100644 --- a/src/parsers/ignoreseq.c +++ b/src/parsers/ignoreseq.c @@ -103,6 +103,7 @@ static const HParserVtable ignoreseq_vt = { .isValidCF = is_isValidCF, .desugar = desugar_ignoreseq, .compile_to_rvm = is_ctrvm, + .higher = true, }; diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 026286d3eb3d56be961050fc1467ccae1fdc8516..b36cb947921497f89cd35687f9259052e23a343e 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -31,6 +31,7 @@ static const HParserVtable indirect_vt = { .isValidCF = indirect_isValidCF, .desugar = desugar_indirect, .compile_to_rvm = h_not_regular, + .higher = true, }; void h_bind_indirect__m(HAllocator *mm__, HParser* indirect, const HParser* inner) { diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c index 2937993034c9b18a98f8aeeda7a8eaa6014bdd99..49b064218b507ca13d90dcc2ec654c8ae3a9ee19 100644 --- a/src/parsers/int_range.c +++ b/src/parsers/int_range.c @@ -117,6 +117,7 @@ static const HParserVtable int_range_vt = { .isValidCF = h_true, .desugar = desugar_int_range, .compile_to_rvm = ir_ctrvm, + .higher = false, }; HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { diff --git a/src/parsers/many.c b/src/parsers/many.c index 1e3b0221ceae76c782a317e1c5c17b21a496f4a1..6496bbe61860a9a20c83b2eff6ab007c630a3e77 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -59,6 +59,32 @@ static bool many_isValidCF(void *env) { repeat->sep->vtable->isValidCF(repeat->sep->env))); } +// turn (_ x (_ y (_ z ()))) into (x y z) where '_' are optional +static HParsedToken *reshape_many(const HParseResult *p, void *user) +{ + HCountedArray *seq = h_carray_new(p->arena); + + const HParsedToken *tok = p->ast; + while(tok) { + assert(tok->token_type == TT_SEQUENCE); + if(tok->seq->used > 0) { + size_t n = tok->seq->used; + assert(n <= 3); + h_carray_append(seq, tok->seq->elements[n-2]); + tok = tok->seq->elements[n-1]; + } else { + tok = NULL; + } + } + + HParsedToken *res = a_new_(p->arena, HParsedToken, 1); + res->token_type = TT_SEQUENCE; + res->seq = seq; + res->index = p->ast->index; + res->bit_offset = p->ast->bit_offset; + return res; +} + static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { // TODO: refactor this. HRepeat *repeat = (HRepeat*)env; @@ -93,7 +119,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { HCFS_BEGIN_CHOICE() { // Mar HCFS_BEGIN_SEQ() { if (repeat->sep != NULL) { - HCFS_DESUGAR(h_ignore__m(mm__, repeat->sep)); + HCFS_DESUGAR(repeat->sep); } //stk__->last_completed->reshape = h_act_ignore; // BUG: This modifies a memoized entry. HCFS_DESUGAR(repeat->p); @@ -108,7 +134,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { //HCFS_DESUGAR(h_ignore__m(mm__, h_epsilon_p())); } HCFS_END_SEQ(); } - HCFS_THIS_CHOICE->reshape = h_act_flatten; + HCFS_THIS_CHOICE->reshape = reshape_many; } HCFS_END_CHOICE(); } @@ -173,6 +199,7 @@ static const HParserVtable many_vt = { .isValidCF = many_isValidCF, .desugar = desugar_many, .compile_to_rvm = many_ctrvm, + .higher = true, }; HParser* h_many(const HParser* p) { @@ -246,7 +273,7 @@ static HParseResult* parse_length_value(void *env, HParseState *state) { if (!len) return NULL; if (len->ast->token_type != TT_UINT) - errx(1, "Length parser must return an unsigned integer"); + h_platform_errx(1, "Length parser must return an unsigned integer"); // TODO: allocate this using public functions HRepeat repeat = { .p = lv->value, diff --git a/src/parsers/not.c b/src/parsers/not.c index 6c34bad48dc09ca2a290ce351b89e921422da265..8c2003dec77b946c50db3d0f62b7117a8ff12f69 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -15,6 +15,7 @@ static const HParserVtable not_vt = { .isValidRegular = h_false, /* see and.c for why */ .isValidCF = h_false, .compile_to_rvm = h_not_regular, // Is actually regular, but the generation step is currently unable to handle it. TODO: fix this. + .higher = true, }; HParser* h_not(const HParser* p) { diff --git a/src/parsers/nothing.c b/src/parsers/nothing.c index 120c1e01d0824ab5a70e39f96c2c19657ea0bf18..0a60108bcc2c0fe69a656fb1cfb4f067ff290922 100644 --- a/src/parsers/nothing.c +++ b/src/parsers/nothing.c @@ -22,6 +22,7 @@ static const HParserVtable nothing_vt = { .isValidCF = h_true, .desugar = desugar_nothing, .compile_to_rvm = nothing_ctrvm, + .higher = false, }; HParser* h_nothing_p() { diff --git a/src/parsers/optional.c b/src/parsers/optional.c index ccee53fa864469600db64bb76562ce469559d09e..726606643056b103f9481cb882dadc19417dd607 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -84,6 +84,7 @@ static const HParserVtable optional_vt = { .isValidCF = opt_isValidCF, .desugar = desugar_optional, .compile_to_rvm = opt_ctrvm, + .higher = true, }; HParser* h_optional(const HParser* p) { diff --git a/src/parsers/permutation.c b/src/parsers/permutation.c index 564565af555a0059a8a85773a86f2ae9a320df0f..b16758413eeafe2ce2ae91db2ebbe7593681d3cd 100644 --- a/src/parsers/permutation.c +++ b/src/parsers/permutation.c @@ -104,6 +104,7 @@ static const HParserVtable permutation_vt = { .isValidCF = h_false, .desugar = NULL, .compile_to_rvm = h_not_regular, + .higher = true, }; HParser* h_permutation(HParser* p, ...) { diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 93c0cfb983200a33b7909fd1b2c73114711beac5..30de34a4885d7c56afdf6a0f00e0d34167a08dc2 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -93,6 +93,7 @@ static const HParserVtable sequence_vt = { .isValidCF = sequence_isValidCF, .desugar = desugar_sequence, .compile_to_rvm = sequence_ctrvm, + .higher = true, }; HParser* h_sequence(HParser* p, ...) { diff --git a/src/parsers/token.c b/src/parsers/token.c index d36ec54be4c07a35b729da71455c5bc3b3555cbc..19029726ad11a52fa0eadf62b67a7b15cd2e4744 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -73,6 +73,7 @@ const HParserVtable token_vt = { .isValidCF = h_true, .desugar = desugar_token, .compile_to_rvm = token_ctrvm, + .higher = false, }; HParser* h_token(const uint8_t *str, const size_t len) { diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index e3f3039407eacaa1d24689767a4a1038fce66a93..e085858bcf45f4219f111be3ba1328868a4aad4d 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -18,6 +18,7 @@ static const HParserVtable unimplemented_vt = { .isValidCF = h_false, .desugar = NULL, .compile_to_rvm = h_not_regular, + .higher = true, }; static HParser unimplemented = { diff --git a/src/parsers/value.c b/src/parsers/value.c index 531db7cb5274c30d3d482ee5bc84add58c1e9af7..7fa863a15b4abacdec1f5463f72c335121584c72 100644 --- a/src/parsers/value.c +++ b/src/parsers/value.c @@ -26,6 +26,7 @@ static const HParserVtable put_vt = { .isValidRegular = h_false, .isValidCF = h_false, .compile_to_rvm = h_not_regular, + .higher = true, }; HParser* h_put_value(const HParser* p, const char* name) { @@ -55,6 +56,7 @@ static const HParserVtable get_vt = { .isValidRegular = h_false, .isValidCF = h_false, .compile_to_rvm = h_not_regular, + .higher = true, }; HParser* h_get_value(const char* name) { diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 04284e86e61d242c58a1c42689607ecfd3794dfe..970a32c8b57209a66f3588bddb4ea30de9f87454 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -75,6 +75,7 @@ static const HParserVtable whitespace_vt = { .isValidCF = ws_isValidCF, .desugar = desugar_whitespace, .compile_to_rvm = ws_ctrvm, + .higher = false, }; HParser* h_whitespace(const HParser* p) { diff --git a/src/parsers/xor.c b/src/parsers/xor.c index e031d5d542f80d345324c746e63d255e3b308655..3a3f21d27a928bf6d2d180eeb39763c918275fd0 100644 --- a/src/parsers/xor.c +++ b/src/parsers/xor.c @@ -36,6 +36,7 @@ static const HParserVtable xor_vt = { .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? .compile_to_rvm = h_not_regular, + .higher = true, }; HParser* h_xor(const HParser* p1, const HParser* p2) { diff --git a/src/platform.h b/src/platform.h new file mode 100644 index 0000000000000000000000000000000000000000..e6eb7ec4d97ec5a47d5d613e1a67c5fc4d9f2dd8 --- /dev/null +++ b/src/platform.h @@ -0,0 +1,64 @@ +#ifndef HAMMER_PLATFORM__H +#define HAMMER_PLATFORM__H + +/** + * @file interface between hammer and the operating system / + * underlying platform. + */ + +#include "compiler_specifics.h" + +#include <stdarg.h> +#include <stdint.h> + +/* String Formatting */ + +/** see GNU C asprintf */ +int h_platform_asprintf(char **strp, const char *fmt, ...); + +/** see GNU C vasprintf */ +int h_platform_vasprintf(char **strp, const char *fmt, va_list arg); + +/* Error Reporting */ + +/* BSD errx function, seen in err.h */ +H_MSVC_DECLSPEC(noreturn) \ +void h_platform_errx(int err, const char* format, ...) \ + H_GCC_ATTRIBUTE((noreturn, format (printf,2,3))); + +/* Time Measurement */ + +struct HStopWatch; /* forward definition */ + +/* initialize a stopwatch */ +void h_platform_stopwatch_reset(struct HStopWatch* stopwatch); + +/* return difference between last reset point and now */ +int64_t h_platform_stopwatch_ns(struct HStopWatch* stopwatch); + +/* Platform dependent definitions for HStopWatch */ +#if defined(_MSC_VER) + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include <windows.h> +#undef WIN32_LEAN_AND_MEAN + +struct HStopWatch { + LARGE_INTEGER qpf; + LARGE_INTEGER start; +}; + +#else +/* Unix like platforms */ + +#include <time.h> + +struct HStopWatch { + struct timespec start; +}; + +#endif + +#endif diff --git a/src/platform_bsdlike.c b/src/platform_bsdlike.c new file mode 100644 index 0000000000000000000000000000000000000000..2ccf874264a740e0784e8fba14e2ae78a337fa08 --- /dev/null +++ b/src/platform_bsdlike.c @@ -0,0 +1,83 @@ +#define _GNU_SOURCE // to obtain asprintf/vasprintf +#include "platform.h" + +#include <stdio.h> + +#include <err.h> +#include <stdarg.h> + +#ifdef __MACH__ +#include <mach/clock.h> +#include <mach/mach.h> +#endif + +#ifdef __NetBSD__ +#include <sys/resource.h> +#endif + +int h_platform_asprintf(char **strp, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + int res = h_platform_vasprintf(strp, fmt, ap); + va_end(ap); + return res; +} + +int h_platform_vasprintf(char **strp, const char *fmt, va_list arg) +{ + return vasprintf(strp, fmt, arg); +} + +void h_platform_errx(int err, const char* format, ...) { + va_list ap; + va_start(ap, format); + verrx(err, format, ap); +} + +// TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer) + +static void gettime(struct timespec *ts) { + if (ts == NULL) + return; +#ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time + /* + * This returns real time, not CPU time. See http://stackoverflow.com/a/6725161 + * Possible solution: http://stackoverflow.com/a/11659289 + */ + clock_serv_t cclock; + mach_timespec_t mts; + host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); + clock_get_time(cclock, &mts); + mach_port_deallocate(mach_task_self(), cclock); + ts->tv_sec = mts.tv_sec; + ts->tv_nsec = mts.tv_nsec; +#elif defined(__NetBSD__) + // NetBSD doesn't have CLOCK_THREAD_CPUTIME_ID. We'll use getrusage instead + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + ts->tv_nsec = (rusage.ru_utime.tv_usec + rusage.ru_stime.tv_usec) * 1000; + // not going to overflow; can be at most 2e9-2 + ts->tv_sec = rusage.ru_utime.tv_sec + rusage.ru_utime.tv_sec; + if (ts->tv_nsec >= 1000000000) { + ts->tv_nsec -= 1000000000; // subtract a second + ts->tv_sec += 1; // add it back. + } + assert (ts->tv_nsec <= 1000000000); +#else + clock_gettime(CLOCK_THREAD_CPUTIME_ID, ts); +#endif +} + +void h_platform_stopwatch_reset(struct HStopWatch* stopwatch) { + gettime(&stopwatch->start); +} + +int64_t h_platform_stopwatch_ns(struct HStopWatch* stopwatch) { + struct timespec ts_now; + gettime(&ts_now); + + // time_diff is in ns + return (ts_now.tv_sec - stopwatch->start.tv_sec) * 1000000000 + + (ts_now.tv_nsec - stopwatch->start.tv_nsec); +} diff --git a/src/platform_win32.c b/src/platform_win32.c new file mode 100644 index 0000000000000000000000000000000000000000..9824b526d0bf0273660088f0cf24cb81507dad82 --- /dev/null +++ b/src/platform_win32.c @@ -0,0 +1,61 @@ +#include "platform.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +int h_platform_asprintf(char**strp, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + int res = h_platform_vasprintf(strp, fmt, ap); + va_end(ap); + return res; +} + +int h_platform_vasprintf(char**strp, const char *fmt, va_list args) +{ + va_list ap; + va_copy(ap, args); + int non_null_char_count = _vscprintf(fmt, ap); + va_end(ap); + + if (non_null_char_count < 0) { + return -1; + } + + size_t buffer_size = 1 + non_null_char_count; + char* buffer = malloc(buffer_size); + + va_copy(ap, args); + int res = vsnprintf_s(buffer, buffer_size, non_null_char_count, fmt, ap); + if (res < 0) { + free(buffer); + } else { + buffer[non_null_char_count] = 0; + *strp = buffer; + } + va_end(ap); + + return res; +} + +void h_platform_errx(int err, const char* format, ...) { + // FIXME(windows) TODO(uucidl): to be implemented + ExitProcess(err); +} + +void h_platform_stopwatch_reset(struct HStopWatch* stopwatch) { + QueryPerformanceFrequency(&stopwatch->qpf); + QueryPerformanceCounter(&stopwatch->start); +} + +/* return difference between last reset point and now */ +int64_t h_platform_stopwatch_ns(struct HStopWatch* stopwatch) { + LARGE_INTEGER now; + QueryPerformanceCounter(&now); + + return 1000000000 * (now.QuadPart - stopwatch->start.QuadPart) / stopwatch->qpf.QuadPart; +} diff --git a/src/pprint.c b/src/pprint.c index 8abbf5a7f4771f52d34badc0788969b735e7c893..11ec3d67411df66043ddd7880edeb22cb5d7db51 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -15,7 +15,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#define _GNU_SOURCE +#include "platform.h" + #include <stdio.h> #include <string.h> #include "hammer.h" @@ -67,11 +68,11 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { } break; case TT_USER: - fprintf(stream, "%*sUSER\n", indent, ""); + fprintf(stream, "%*sUSER:%s\n", indent, "", h_get_token_type_name(tok->token_type)); break; default: if(tok->token_type > TT_USER) { - fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER); + fprintf(stream, "%*sUSER:%s %d\n", indent, "", h_get_token_type_name(tok->token_type), tok->token_type-TT_USER); } else { assert_message(0, "Should not reach here."); } @@ -85,25 +86,53 @@ struct result_buf { size_t capacity; }; -static inline void ensure_capacity(struct result_buf *buf, int amt) { - while (buf->len + amt >= buf->capacity) - buf->output = realloc(buf->output, buf->capacity *= 2); +static inline bool ensure_capacity(struct result_buf *buf, int amt) { + while (buf->len + amt >= buf->capacity) { + buf->output = (&system_allocator)->realloc(&system_allocator, buf->output, buf->capacity *= 2); + if (!buf->output) { + return false; + } + } + return true; } -static inline void append_buf(struct result_buf *buf, const char* input, int len) { - ensure_capacity(buf, len); - memcpy(buf->output + buf->len, input, len); - buf->len += len; +static inline bool append_buf(struct result_buf *buf, const char* input, int len) { + if (ensure_capacity(buf, len)) { + memcpy(buf->output + buf->len, input, len); + buf->len += len; + return true; + } else { + return false; + } } -static inline void append_buf_c(struct result_buf *buf, char v) { - ensure_capacity(buf, 1); - buf->output[buf->len++] = v; +static inline bool append_buf_c(struct result_buf *buf, char v) { + if (ensure_capacity(buf, 1)) { + buf->output[buf->len++] = v; + return true; + } else { + return false; + } } -static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { +/** append a formatted string to the result buffer */ +static inline bool append_buf_formatted(struct result_buf *buf, char* format, ...) +{ char* tmpbuf; int len; + bool result; + va_list ap; + + va_start(ap, format); + len = h_platform_vasprintf(&tmpbuf, format, ap); + result = append_buf(buf, tmpbuf, len); + free(tmpbuf); + va_end(ap); + + return result; +} + +static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { if (!tok) { append_buf(buf, "NULL", 4); return; @@ -128,16 +157,12 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { break; case TT_SINT: if (tok->sint < 0) - len = asprintf(&tmpbuf, "s-%#" PRIx64, -tok->sint); + append_buf_formatted(buf, "s-%#" PRIx64, -tok->sint); else - len = asprintf(&tmpbuf, "s%#" PRIx64, tok->sint); - append_buf(buf, tmpbuf, len); - free(tmpbuf); + append_buf_formatted(buf, "s%#" PRIx64, tok->sint); break; case TT_UINT: - len = asprintf(&tmpbuf, "u%#" PRIx64, tok->uint); - append_buf(buf, tmpbuf, len); - free(tmpbuf); + append_buf_formatted(buf, "u%#" PRIx64, tok->uint); break; case TT_ERR: append_buf(buf, "ERR", 3); @@ -161,10 +186,13 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { char* h_write_result_unamb(const HParsedToken* tok) { struct result_buf buf = { - .output = malloc(16), + .output = (&system_allocator)->alloc(&system_allocator, 16), .len = 0, .capacity = 16 }; + if (!buf.output) { + return NULL; + } unamb_sub(tok, &buf); append_buf_c(&buf, 0); return buf.output; diff --git a/src/registry.c b/src/registry.c index 60aa8863e53b5c4c32175adb430fc87df069e901..2ebac1a94ad37a501c7c10a50c3eb6a45fc74dd0 100644 --- a/src/registry.c +++ b/src/registry.c @@ -46,24 +46,31 @@ static int compare_entries(const void* v1, const void* v2) { } HTokenType h_allocate_token_type(const char* name) { - Entry* new_entry = malloc(sizeof(*new_entry)); + Entry* new_entry = (&system_allocator)->alloc(&system_allocator, sizeof(*new_entry)); + if (!new_entry) { + return TT_INVALID; + } new_entry->name = name; new_entry->value = 0; Entry* probe = *(Entry**)tsearch(new_entry, &tt_registry, compare_entries); if (probe->value != 0) { // Token type already exists... // TODO: treat this as a bug? - free(new_entry); + (&system_allocator)->free(&system_allocator, new_entry); return probe->value; } else { // new value probe->name = strdup(probe->name); // drop ownership of name probe->value = tt_next++; if ((probe->value - TT_START) >= tt_by_id_sz) { - if (tt_by_id_sz == 0) + if (tt_by_id_sz == 0) { tt_by_id = malloc(sizeof(*tt_by_id) * ((tt_by_id_sz = (tt_next - TT_START) * 16))); - else + } else { tt_by_id = realloc(tt_by_id, sizeof(*tt_by_id) * ((tt_by_id_sz *= 2))); + } + if (!tt_by_id) { + return TT_INVALID; + } } assert(probe->value - TT_START < tt_by_id_sz); tt_by_id[probe->value - TT_START] = probe; diff --git a/src/system_allocator.c b/src/system_allocator.c index b34810fa3ba29db6de3c0aa43e74fa29f9aed77b..39a1a7e77040c865f2d4f99977eb264391286bb4 100644 --- a/src/system_allocator.c +++ b/src/system_allocator.c @@ -2,34 +2,82 @@ #include <stdlib.h> #include "internal.h" -//#define DEBUG__MEMFILL 0xFF +// NOTE(uucidl): undefine to automatically fill up newly allocated block +// with this byte: +// #define DEBUG__MEMFILL 0xFF + +#if defined(DEBUG__MEMFILL) +/** + * Blocks allocated by the system_allocator start with this header. + * I.e. the user part of the allocation directly follows. + */ +typedef struct HDebugBlockHeader_ +{ + size_t size; /** size of the user allocation */ +} HDebugBlockHeader; + +#define BLOCK_HEADER_SIZE (sizeof(HDebugBlockHeader)) +#else +#define BLOCK_HEADER_SIZE (0) +#endif + +/** + * Compute the total size needed for a given allocation size. + */ +static inline size_t block_size(size_t alloc_size) { + return BLOCK_HEADER_SIZE + alloc_size; +} + +/** + * Obtain the block containing the user pointer `uptr` + */ +static inline void* block_for_user_ptr(void *uptr) { + return ((char*)uptr) - BLOCK_HEADER_SIZE; +} + +/** + * Obtain the user area of the allocation from a given block + */ +static inline void* user_ptr(void *block) { + return ((char*)block) + BLOCK_HEADER_SIZE; +} static void* system_alloc(HAllocator *allocator, size_t size) { - - void* ptr = malloc(size + sizeof(size_t)); + void *block = malloc(block_size(size)); + if (!block) { + return NULL; + } + void *uptr = user_ptr(block); #ifdef DEBUG__MEMFILL - memset(ptr, DEBUG__MEMFILL, size + sizeof(size_t)); + memset(uptr, DEBUG__MEMFILL, size); + ((HDebugBlockHeader*)block)->size = size; #endif - *(size_t*)ptr = size; - return ptr + sizeof(size_t); + return uptr; } -static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) { - if (ptr == NULL) +static void* system_realloc(HAllocator *allocator, void* uptr, size_t size) { + if (!uptr) { return system_alloc(allocator, size); - ptr = realloc(ptr - sizeof(size_t), size + sizeof(size_t)); - *(size_t*)ptr = size; + } + void* block = realloc(block_for_user_ptr(uptr), block_size(size)); + if (!block) { + return NULL; + } + uptr = user_ptr(block); + #ifdef DEBUG__MEMFILL - size_t old_size = *(size_t*)ptr; + size_t old_size = ((HDebugBlockHeader*)block)->size; if (size > old_size) - memset(ptr+sizeof(size_t)+old_size, DEBUG__MEMFILL, size - old_size); + memset((char*)uptr+old_size, DEBUG__MEMFILL, size - old_size); + ((HDebugBlockHeader*)block)->size = size; #endif - return ptr + sizeof(size_t); + return uptr; } -static void system_free(HAllocator *allocator, void* ptr) { - if (ptr != NULL) - free(ptr - sizeof(size_t)); +static void system_free(HAllocator *allocator, void* uptr) { + if (uptr) { + free(block_for_user_ptr(uptr)); + } } HAllocator system_allocator = { diff --git a/src/t_parser.c b/src/t_parser.c index df9567ed201b1d07d1ebdf9e815fd625ba8de5c8..c42eca91321c241a1987b99116c8c90deefbdf64 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -1,6 +1,7 @@ #include <glib.h> #include <string.h> #include "hammer.h" +#include "glue.h" #include "internal.h" #include "test_suite.h" #include "parsers/parser_internal.h" @@ -443,6 +444,143 @@ static void test_rightrec(gconstpointer backend) { g_check_parse_match(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))"); } +static void test_iterative(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + p = h_token((uint8_t*)"foobar", 6); + g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_failed(p, be, "fou",3, "bar",3); + g_check_parse_chunks_failed(p, be, "foo",3, "par",3); + g_check_parse_chunks_failed(p, be, "foo",3, "baz",3); + + p = h_sequence(h_ch('f'), h_token((uint8_t*)"ooba", 4), h_ch('r'), NULL); + g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunks_failed(p, be, "fou",3, "bar",3); + g_check_parse_chunks_failed(p, be, "foo",3, "par",3); + g_check_parse_chunks_failed(p, be, "foo",3, "baz",3); + + p = h_choice(h_token((uint8_t*)"foobar", 6), + h_token((uint8_t*)"phupar", 6), NULL); + g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "phu",3, "par",3, "<70.68.75.70.61.72>"); + g_check_parse_chunks_failed(p, be, "fou",3, "bar",3); + g_check_parse_chunks_failed(p, be, "foo",3, "baz",3); + g_check_parse_chunks_match(p, be, "foobar",6, "",0, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "",0, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_failed(p, be, "foo",3, "",0); + g_check_parse_chunks_failed(p, be, "",0, "foo",3); + + p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2), + h_token((uint8_t*)"uu", 2), NULL), NULL); + g_check_parse_chunks_match(p, be, "f",1, "oo",2, "(u0x66 <6f.6f>)"); + g_check_parse_chunks_match(p, be, "f",1, "uu",2, "(u0x66 <75.75>)"); + g_check_parse_chunks_failed(p, be, "g",1, "oo",2); + g_check_parse_chunks_failed(p, be, "f",1, "ou",2); + g_check_parse_chunks_failed(p, be, "f",1, "uo",2); +} + +static void test_iterative_lookahead(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + // needs 2 lookahead + p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2), + h_token((uint8_t*)"ou", 2), NULL), NULL); + if(h_compile(p, be, (void *)2) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + // partial chunk consumed + g_check_parse_chunks_match_(p, "fo",2, "o",1, "(u0x66 <6f.6f>)"); + g_check_parse_chunks_match_(p, "fo",2, "u",1, "(u0x66 <6f.75>)"); + g_check_parse_chunks_failed_(p, "go",2, "o",1); + g_check_parse_chunks_failed_(p, "fa",2, "u",1); + g_check_parse_chunks_failed_(p, "fo",2, "b",1); +} + +static void test_iterative_result_length(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p = h_token((uint8_t*)"foobar", 6); + + if(h_compile(p, be, NULL) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + HSuspendedParser *s = h_parse_start(p); + if(!s) { + g_test_message("Chunked parsing not available"); + g_test_fail(); + return; + } + h_parse_chunk(s, (uint8_t*)"foo", 3); + h_parse_chunk(s, (uint8_t*)"ba", 2); + h_parse_chunk(s, (uint8_t*)"rbaz", 4); + HParseResult *r = h_parse_finish(s); + if(!r) { + g_test_message("Parse failed"); + g_test_fail(); + return; + } + + g_check_cmp_int64(r->bit_length, ==, 48); +} + +static void test_result_length(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p = h_token((uint8_t*)"foo", 3); + + if(h_compile(p, be, NULL) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + HParseResult *r = h_parse(p, (uint8_t*)"foobar", 6); + if(!r) { + g_test_message("Parse failed"); + g_test_fail(); + return; + } + + g_check_cmp_int64(r->bit_length, ==, 24); +} + +static void test_token_position(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p = h_sequence(h_token((uint8_t*)"foo",3), + h_token((uint8_t*)"bar",3), NULL); + + if(h_compile(p, be, NULL) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + HParseResult *r = h_parse(p, (uint8_t*)"foobar", 6); + if(!r) { + g_test_message("Parse failed"); + g_test_fail(); + return; + } + + assert(r->ast != NULL); + HParsedToken *foo = H_INDEX_TOKEN(r->ast, 0); + HParsedToken *bar = H_INDEX_TOKEN(r->ast, 1); + + g_check_cmp_uint64(foo->index, ==, 0); + g_check_cmp_uint64(foo->bit_offset, ==, 0); + g_check_cmp_uint64(bar->index, ==, 3); + g_check_cmp_uint64(bar->bit_offset, ==, 0); +} + static void test_ambiguous(gconstpointer backend) { HParser *d_ = h_ch('d'); HParser *p_ = h_ch('+'); @@ -653,6 +791,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); + g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length); + //g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); @@ -691,6 +831,11 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore); //g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec); g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec); + g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length); + //g_test_add_data_func("/core/parser/llk/token_position", GINT_TO_POINTER(PB_LLk), test_token_position); + g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative); + g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead); + g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length); g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token); g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch); @@ -703,8 +848,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/uint32", GINT_TO_POINTER(PB_REGULAR), test_uint32); g_test_add_data_func("/core/parser/regex/uint16", GINT_TO_POINTER(PB_REGULAR), test_uint16); g_test_add_data_func("/core/parser/regex/uint8", GINT_TO_POINTER(PB_REGULAR), test_uint8); - g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range); #if 0 + g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range); g_test_add_data_func("/core/parser/regex/float64", GINT_TO_POINTER(PB_REGULAR), test_float64); g_test_add_data_func("/core/parser/regex/float32", GINT_TO_POINTER(PB_REGULAR), test_float32); #endif @@ -728,6 +873,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p); g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool); g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore); + g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length); + g_test_add_data_func("/core/parser/regex/token_position", GINT_TO_POINTER(PB_REGULAR), test_token_position); g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token); g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch); @@ -767,6 +914,11 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec); g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne); g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); + g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length); + g_test_add_data_func("/core/parser/lalr/token_position", GINT_TO_POINTER(PB_LALR), test_token_position); + g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative); + g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead); + g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length); g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token); g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch); @@ -807,4 +959,6 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne); g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec); g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous); + g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length); + g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position); } diff --git a/src/t_regression.c b/src/t_regression.c index d05cbde0d0419addfac081a4a9292bbc18a007c4..aa78f2c8c02ad218db7e89f0543c06ebdc3e7f25 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -95,8 +95,90 @@ static void test_read_bits_48(void) { } } +static void test_llk_zero_end(void) { + HParserBackend be = PB_LLk; + HParser *z = h_ch('\x00'); + HParser *az = h_sequence(h_ch('a'), z, NULL); + HParser *ze = h_sequence(z, h_end_p(), NULL); + HParser *aze = h_sequence(h_ch('a'), z, h_end_p(), NULL); + + // some cases surrounding the bug + g_check_parse_match (z, be, "\x00", 1, "u0"); + g_check_parse_failed(z, be, "", 0); + g_check_parse_match (ze, be, "\x00", 1, "(u0)"); + g_check_parse_failed(ze, be, "\x00b", 2); + g_check_parse_failed(ze, be, "", 0); + g_check_parse_match (az, be, "a\x00", 2, "(u0x61 u0)"); + g_check_parse_match (aze, be, "a\x00", 2, "(u0x61 u0)"); + g_check_parse_failed(aze, be, "a\x00b", 3); + + // the following should not parse but did when the LL(k) backend failed to + // check for the end of input, mistaking it for a zero character. + g_check_parse_failed(az, be, "a", 1); + g_check_parse_failed(aze, be, "a", 1); +} + +static void test_lalr_charset_lhs(void) { + HParserBackend be = PB_LALR; + + HParser *p = h_many(h_choice(h_sequence(h_ch('A'), h_ch('B'), NULL), + h_in((uint8_t*)"AB",2), NULL)); + + // the above would abort because of an unhandled case in trying to resolve + // a conflict where an item's left-hand-side was an HCF_CHARSET. + // however, the compile should fail - the conflict cannot be resolved. + + if(h_compile(p, be, NULL) == 0) { + g_test_message("LALR compile didn't detect ambiguous grammar"); + + // it says it compiled it - well, then it should parse it! + // (this helps us see what it thinks it should be doing.) + g_check_parse_match(p, be, "AA",2, "(u0x41 u0x41)"); + g_check_parse_match(p, be, "AB",2, "((u0x41 u0x42))"); + + g_test_fail(); + return; + } +} + +static void test_cfg_many_seq(void) { + HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL)); + + g_check_parse_match(p, PB_LLk, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + g_check_parse_match(p, PB_GLR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + // these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty + // reshape on h_many. +} + +static uint8_t test_charset_bits__buf[256]; +static void *test_charset_bits__alloc(HAllocator *allocator, size_t size) +{ + g_check_cmp_uint64(size, ==, 256/8); + assert(size <= 256); + return test_charset_bits__buf; +} +static void test_charset_bits(void) { + // charset would allocate 256 bytes instead of 256 bits (= 32 bytes) + + HAllocator alloc = { + .alloc = test_charset_bits__alloc, + .realloc = NULL, + .free = NULL, + }; + test_charset_bits__buf[32] = 0xAB; + HCharset cs = new_charset(&alloc); + for(size_t i=0; i<32; i++) + g_check_cmp_uint32(test_charset_bits__buf[i], ==, 0); + g_check_cmp_uint32(test_charset_bits__buf[32], ==, 0xAB); +} + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); g_test_add_func("/core/regression/read_bits_48", test_read_bits_48); + g_test_add_func("/core/regression/llk_zero_end", test_llk_zero_end); + g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs); + g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq); + g_test_add_func("/core/regression/charset_bits", test_charset_bits); } diff --git a/src/test_suite.h b/src/test_suite.h index 9a58a20fc40fe266ae286e047dfb81bed09869c8..49f13cf81c50864eb8ae03ed705f582a7dd1ca0f 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -21,6 +21,8 @@ #include <stdlib.h> #include <inttypes.h> +#include "internal.h" + // Equivalent to g_assert_*, but not using g_assert... #define g_check_inttype(fmt, typ, n1, op, n2) do { \ typ _n1 = (n1); \ @@ -88,7 +90,8 @@ #define g_check_parse_failed(parser, backend, input, inp_len) do { \ int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \ if(skip != 0) { \ - g_test_message("Backend not applicable, skipping test"); \ + g_test_message("Compile failed"); \ + g_test_fail(); \ break; \ } \ const HParseResult *result = h_parse(parser, (const uint8_t*)input, inp_len); \ @@ -101,7 +104,8 @@ #define g_check_parse_ok(parser, backend, input, inp_len) do { \ int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ if(skip) { \ - g_test_message("Backend not applicable, skipping test"); \ + g_test_message("Compile failed"); \ + g_test_fail(); \ break; \ } \ HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \ @@ -122,7 +126,8 @@ #define g_check_parse_match(parser, backend, input, inp_len, result) do { \ int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ if(skip) { \ - g_test_message("Backend not applicable, skipping test"); \ + g_test_message("Compile failed"); \ + g_test_fail(); \ break; \ } \ HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \ @@ -132,7 +137,70 @@ } else { \ char* cres = h_write_result_unamb(res->ast); \ g_check_string(cres, ==, result); \ - free(cres); \ + (&system_allocator)->free(&system_allocator, cres); \ + HArenaStats stats; \ + h_allocator_stats(res->arena, &stats); \ + g_test_message("Parse used %zd bytes, wasted %zd bytes. " \ + "Inefficiency: %5f%%", \ + stats.used, stats.wasted, \ + stats.wasted * 100. / (stats.used+stats.wasted)); \ + h_delete_arena(res->arena); \ + } \ + } while(0) + +#define g_check_parse_chunks_failed(parser, backend, chunk1, c1_len, chunk2, c2_len) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunks_failed_(parser, chunk1, c1_len, chunk2, c2_len); \ + } while(0) + +#define g_check_parse_chunks_failed_(parser, chunk1, c1_len, chunk2, c2_len) do { \ + HSuspendedParser *s = h_parse_start(parser); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + h_parse_chunk(s, (const uint8_t*)chunk2, c2_len); \ + const HParseResult *res = h_parse_finish(s); \ + if (NULL != res) { \ + g_test_message("Check failed: shouldn't have succeeded, but did"); \ + g_test_fail(); \ + } \ + } while(0) + +#define g_check_parse_chunks_match(parser, backend, chunk1, c1_len, chunk2, c2_len, result) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunks_match_(parser, chunk1, c1_len, chunk2, c2_len, result); \ + } while(0) + +#define g_check_parse_chunks_match_(parser, chunk1, c1_len, chunk2, c2_len, result) do { \ + HSuspendedParser *s = h_parse_start(parser); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + h_parse_chunk(s, (const uint8_t*)chunk2, c2_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + char* cres = h_write_result_unamb(res->ast); \ + g_check_string(cres, ==, result); \ + (&system_allocator)->free(&system_allocator, cres); \ HArenaStats stats; \ h_allocator_stats(res->arena, &stats); \ g_test_message("Parse used %zd bytes, wasted %zd bytes. " \ diff --git a/tools/windows/README.md b/tools/windows/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3b28eea4af4d8c436d747e3117e947c38411453c --- /dev/null +++ b/tools/windows/README.md @@ -0,0 +1 @@ +Support tools for the Windows (win32/win64) port. \ No newline at end of file diff --git a/tools/windows/build.bat b/tools/windows/build.bat new file mode 100644 index 0000000000000000000000000000000000000000..20f878acab296420cb8b29fa36df74aad26eeb44 --- /dev/null +++ b/tools/windows/build.bat @@ -0,0 +1,47 @@ +@echo off +setlocal + +REM This script must be run after vcvarsall.bat has been run, +REM so that cl.exe is in your path. +where cl.exe || goto vsmissing_err + +REM HEREPATH is <drive_letter>:<script_directory> +set HEREPATH=%~d0%~p0 + +REM Set up SRC, BUILD and CLFLAGS +call %HEREPATH%\env.bat +call %HEREPATH%\clvars.bat + +echo SRC=%SRC%, BUILD=%BUILD% +echo Building with flags: %CLFLAGS% + +pushd %SRC% +mkdir %BUILD%\obj +del /Q %BUILD%\obj\ + +cl.exe -nologo -FC -EHsc -Z7 -Oi -GR- -Gm- %CLFLAGS% -c ^ + @%HEREPATH%\hammer_lib_src_list ^ + -Fo%BUILD%\obj\ +if %errorlevel% neq 0 goto err + +lib.exe %BUILD%\obj\*.obj -OUT:%BUILD%\hammer.lib +echo STATIC_LIBRARY %BUILD%\hammer.lib +if %errorlevel% neq 0 goto err +popd + +REM TODO(uucidl): how to build and run the tests? They are written with glib.h +REM which might be a challenge on windows. On the other hand the API of glib.h +REM does not seem too hard to reimplement. + +echo SUCCESS: Successfully built +endlocal +exit /b 0 + +:vsmissing_err +echo ERROR: CL.EXE missing. Have you run vcvarsall.bat? +exit /b 1 + +:err +endlocal +echo ERROR: Failed to build +exit /b %errorlevel% diff --git a/tools/windows/build_examples.bat b/tools/windows/build_examples.bat new file mode 100644 index 0000000000000000000000000000000000000000..c431faebcd29d7b1a1aaeaa77558b948fc3454f0 --- /dev/null +++ b/tools/windows/build_examples.bat @@ -0,0 +1,53 @@ +@echo off +setlocal + +REM This script must be run after vcvarsall.bat has been run, +REM so that cl.exe is in your path. +where cl.exe || goto vsmissing_err + +REM HEREPATH is <drive_letter>:<script_directory> +set HEREPATH=%~d0%~p0 + +REM Set up SRC, BUILD and CLFLAGS +call %HEREPATH%\env.bat +call %HEREPATH%\clvars.bat + +echo SRC=%SRC%, BUILD=%BUILD% +echo CLFLAGS=%CLFLAGS% + +set HAMMERLIB=%BUILD%\hammer.lib + +REM Now let's build some example programs + +cl.exe -nologo %CLFLAGS% examples\base64.c %HAMMERLIB% -Fo%BUILD%\ -Fe%BUILD%\ +if %errorlevel% neq 0 goto err +echo PROGRAM build\base64.exe +cl.exe -nologo %CLFLAGS% examples\base64_sem1.c %HAMMERLIB% -Fo%BUILD%\ -Fe%BUILD%\ +if %errorlevel% neq 0 goto err +echo PROGRAM build\base64_sem1.exe +cl.exe -nologo %CLFLAGS% examples\base64_sem2.c %HAMMERLIB% -Fo%BUILD%\ -Fe%BUILD%\ +if %errorlevel% neq 0 goto err +echo PROGRAM build\base64_sem2.exe + +REM FIXME(windows) TODO(uucidl): dns.c only works on posix +REM cl.exe -nologo %CLFLAGS% examples\dns.c %HAMMERLIB% -Fo%BUILD%\ -Fe%BUILD%\ +REM if %errorlevel% neq 0 goto err +REM echo PROGRAM build\dns.exe + +REM FIXME(windows) TODO(uucidl): grammar.c needs to be fixed +cl.exe -nologo %CLFLAGS% examples\ties.c examples\grammar.c %HAMMERLIB% -Fo%BUILD%\ -Fe%BUILD%\ +if %errorlevel% neq 0 goto err +echo PROGRAM build\ties.exe + +echo SUCCESS: Successfully built +endlocal +exit /b 0 + +:vsmissing_err +echo ERROR: CL.EXE missing. Have you run vcvarsall.bat? +exit /b 1 + +:err +echo ERROR: Failed to build +endlocal +exit /b %errorlevel% diff --git a/tools/windows/clvars.bat b/tools/windows/clvars.bat new file mode 100644 index 0000000000000000000000000000000000000000..8e29226871988207083b61197b5efea11c1ebb69 --- /dev/null +++ b/tools/windows/clvars.bat @@ -0,0 +1,59 @@ +REM Don't call me directly +REM Exports CLFLAGS + +REM Start with the most strict warning level +set WARNINGS=-W4 -Wall -WX + +REM c4457 (declaration shadowing function parameter) +REM FIXME(windows) TODO(uucidl): remove occurence of c4457 and reactivate +REM FIXME(windows) TODO(uucidl): remove occurence of c4456 and reactivate +REM see -Wshadow +set WARNINGS=%WARNINGS% -wd4457 -wd4456 + +REM c4701 (potentially unitialized local variable) +REM FIXME(windows) TODO(uucidl): remove occurence of c4701 if possible +set WARNINGS=%WARNINGS% -wd4701 + +REM We disable implicit casting warnings (c4244), as they occur too often here. +REM Its gcc/clang counterpart is Wconversion which does not seem to +REM be enabled by default. +REM See: [[https://gcc.gnu.org/wiki/NewWconversion#Frequently_Asked_Questions]] +REM +REM Likewise for c4242 (conversion with potential loss of data) and c4267 +REM (conversion away from size_t to a smaller type) and c4245 (conversion +REM from int to size_t signed/unsigned mismatch) +set WARNINGS=%WARNINGS% -wd4242 -wd4244 -wd4245 -wd4267 + +REM c4100 (unreferenced formal parameter) is equivalent to -Wno-unused-parameter +set WARNINGS=%WARNINGS% -wd4100 + +REM c4200 (zero-sized array) is a C idiom supported by C99 +set WARNINGS=%WARNINGS% -wd4200 + +REM c4204 (non-constant aggregate initializers) ressembles C99 support +set WARNINGS=%WARNINGS% -wd4204 + +REM c4201 (anonymous unions) ressembles C11 support. +REM see -std=gnu99 vs -std=c99 +set WARNINGS=%WARNINGS% -wd4201 + +REM c4820 (warnings about padding) and c4324 (intentional padding) are +REM not useful +set WARNINGS=%WARNINGS% -wd4820 -wd4324 + +REM c4710 (inlining could not be performed) is not useful +set WARNINGS=%WARNINGS% -wd4710 + +REM c4255 ( () vs (void) ambiguity) is not useful +set WARNINGS=%WARNINGS% -wd4255 + +REM c4127 (conditional expression is constant) is not useful +set WARNINGS=%WARNINGS% -wd4127 + +REM c4668 (an undefined symbol in a preprocessor directive) is not useful +set WARNINGS=%WARNINGS% -wd4668 + +REM we use sprintf so this should be enabled +set DEFINES=-D_CRT_SECURE_NO_WARNINGS + +set CLFLAGS=-Od -Z7 %DEFINES% %WARNINGS% -Debug diff --git a/tools/windows/env.bat b/tools/windows/env.bat new file mode 100644 index 0000000000000000000000000000000000000000..4037578cccb96202cdd20541cb84932e22a663ab --- /dev/null +++ b/tools/windows/env.bat @@ -0,0 +1,16 @@ +REM Don't call me directly. +REM +REM Expects HEREPATH (this directory) +REM Exports SRC (hammer's src directory) +REM Exports BUILD (hammer's build directory) + +set TOP=%HEREPATH%..\.. + +REM Get canonical path for TOP +pushd . +cd %TOP% +set TOP=%CD% +popd + +set SRC=%TOP%\src +set BUILD=%TOP%\build diff --git a/tools/windows/hammer_lib_src_list b/tools/windows/hammer_lib_src_list new file mode 100644 index 0000000000000000000000000000000000000000..a8a4dc4790e73ed6b64544c196416eaebc1e3db3 --- /dev/null +++ b/tools/windows/hammer_lib_src_list @@ -0,0 +1,42 @@ +platform_win32.c +allocator.c +benchmark.c +bitreader.c +bitwriter.c +cfgrammar.c +desugar.c +glue.c +hammer.c +pprint.c +system_allocator.c +parsers/action.c +parsers/and.c +parsers/attr_bool.c +parsers/butnot.c +parsers/ch.c +parsers/charset.c +parsers/difference.c +parsers/end.c +parsers/endianness.c +parsers/epsilon.c +parsers/ignore.c +parsers/ignoreseq.c +parsers/indirect.c +parsers/int_range.c +parsers/many.c +parsers/not.c +parsers/optional.c +parsers/permutation.c +parsers/sequence.c +parsers/token.c +parsers/unimplemented.c +parsers/whitespace.c +parsers/xor.c +parsers/value.c +backends/packrat.c +backends/llk.c +backends/regex.c +backends/glr.c +backends/lalr.c +backends/lr.c +backends/lr0.c diff --git a/tools/windows/status.bat b/tools/windows/status.bat new file mode 100644 index 0000000000000000000000000000000000000000..4f8bd11f9f7567f32cd17f843d2918ac6fd1e14d --- /dev/null +++ b/tools/windows/status.bat @@ -0,0 +1 @@ +git grep "FIXME(windows)"