diff --git a/SConstruct b/SConstruct index 972199a5eb57d24986090a21ed0535dcab79be6c..f74f8d579850f57dc78d94aff23b3506568a0dbb 100644 --- a/SConstruct +++ b/SConstruct @@ -1,6 +1,7 @@ # -*- python -*- import os import os.path +import platform import sys @@ -44,7 +45,7 @@ env['backendsincpath'] = calcInstallPath("$prefix", "include", "hammer", "backen env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig") env.ScanReplace('libhammer.pc.in') -env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes") +env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable") if env['PLATFORM'] == 'darwin': env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}') diff --git a/src/backends/glr.c b/src/backends/glr.c index 353d0e6433100357287e40487299aa8327baad07..e753ea55d938cc07582ceed83db92354cbacf68f 100644 --- a/src/backends/glr.c +++ b/src/backends/glr.c @@ -9,6 +9,9 @@ static bool glr_step(HParseResult **result, HSlist *engines, int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params) { + if (!parser->vtable->isValidCF(parser->env)) { + return -1; + } int result = h_lalr_compile(mm__, parser, params); if(result == -1 && parser->backend_data) { diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 93becf31b23f0ba5a1204441442c96622aa55fe7..14f64cd1a23cf2276a1377e0d1b78c3a24125ed8 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -49,8 +49,9 @@ static inline HLRTransition *transition(HArena *arena, static void transform_productions(const HLRTable *table, HLREnhGrammar *eg, size_t x, HCFChoice *xAy) { - if(xAy->type != HCF_CHOICE) + if (xAy->type != HCF_CHOICE) { return; + } // XXX CHARSET? HArena *arena = eg->arena; @@ -89,7 +90,7 @@ static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym) *esym = *sym; HHashSet *cs = h_hashtable_get(eg->corr, sym); - if(!cs) { + if (!cs) { cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol); h_hashtable_put(eg->corr, sym, cs); } @@ -151,9 +152,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti { int ret = 0; - if(fs->epsilon_branch) { + if (fs->epsilon_branch) { HLRAction *prev = tmap->epsilon_branch; - if(prev && prev != action) { + if (prev && prev != action) { // conflict tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action); ret = -1; @@ -162,9 +163,9 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti } } - if(fs->end_branch) { + if (fs->end_branch) { HLRAction *prev = tmap->end_branch; - if(prev && prev != action) { + if (prev && prev != action) { // conflict tmap->end_branch = h_lr_conflict(tmap->arena, prev, action); ret = -1; @@ -176,13 +177,14 @@ static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *acti H_FOREACH(fs->char_branches, void *key, HStringMap *fs_) HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key); - if(!tmap_) { + if (!tmap_) { tmap_ = h_stringmap_new(tmap->arena); h_hashtable_put(tmap->char_branches, key, tmap_); } - if(terminals_put(tmap_, fs_, action) < 0) + if (terminals_put(tmap_, fs_, action) < 0) { ret = -1; + } H_END_FOREACH return ret; @@ -197,8 +199,9 @@ static bool match_production(HLREnhGrammar *eg, HCFChoice **p, for(; *p && *rhs; p++, rhs++) { HLRTransition *t = h_hashtable_get(eg->smap, *p); assert(t != NULL); - if(!h_eq_symbol(t->symbol, *rhs)) + if (!h_eq_symbol(t->symbol, *rhs)) { return false; + } state = t->to; } return (*p == *rhs // both NULL @@ -231,18 +234,21 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) // build LR(0) table // if necessary, resolve conflicts "by conversion to SLR" + if (!parser->vtable->isValidCF(parser->env)) { + return -1; + } HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser)); if(g == NULL) // backend not suitable (language not context-free) return -1; HLRDFA *dfa = h_lr0_dfa(g); - if(dfa == NULL) { // this should normally not happen + if (dfa == NULL) { // this should normally not happen h_cfgrammar_free(g); return -1; } HLRTable *table = h_lr0_table(g, dfa); - if(table == NULL) { // this should normally not happen + if (table == NULL) { // this should normally not happen h_cfgrammar_free(g); return -1; } @@ -282,10 +288,11 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) H_FOREACH_KEY(lhss, HCFChoice *lhs) assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET? - for(HCFSequence **p=lhs->seq; *p; p++) { + for(HCFSequence **p=lhs->seq; *p; p++) { HCFChoice **rhs = (*p)->items; - if(!match_production(eg, rhs, item->rhs, state)) + if(!match_production(eg, rhs, item->rhs, state)) { continue; + } // the left-hand symbol's follow set is this production's // contribution to the lookahead @@ -297,11 +304,12 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) // for each lookahead symbol, put action into table cell if(terminals_put(table->tmap[state], fs, action) < 0) inadeq = true; - } H_END_FOREACH // enhanced production + } H_END_FOREACH // enhanced production H_END_FOREACH // reducible item - if(inadeq) + if(inadeq) { h_slist_push(table->inadeq, (void *)(uintptr_t)state); + } } } @@ -350,7 +358,7 @@ int test_lalr(void) printf("\n==== G R A M M A R ====\n"); HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); - if(g == NULL) { + if (g == NULL) { fprintf(stderr, "h_cfgrammar failed\n"); return 1; } @@ -358,21 +366,23 @@ int test_lalr(void) printf("\n==== D F A ====\n"); HLRDFA *dfa = h_lr0_dfa(g); - if(dfa) + if (dfa) { h_pprint_lrdfa(stdout, g, dfa, 0); - else + } else { fprintf(stderr, "h_lalr_dfa failed\n"); + } printf("\n==== L R ( 0 ) T A B L E ====\n"); HLRTable *table0 = h_lr0_table(g, dfa); - if(table0) + if (table0) { h_pprint_lrtable(stdout, g, table0, 0); - else + } else { fprintf(stderr, "h_lr0_table failed\n"); + } h_lrtable_free(table0); printf("\n==== L A L R T A B L E ====\n"); - if(h_compile(p, PB_LALR, NULL)) { + if (h_compile(p, PB_LALR, NULL)) { fprintf(stderr, "does not compile\n"); return 2; } @@ -380,10 +390,10 @@ int test_lalr(void) printf("\n==== P A R S E R E S U L T ====\n"); HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13); - if(res) + if (res) { h_pprint(stdout, res->ast, 0, 2); - else + } else { printf("no parse\n"); - + } return 0; } diff --git a/src/backends/regex.c b/src/backends/regex.c index a3c073c8914e3b99058106ea3a58780d388bcb59..c4f6a2bfffbcd65640febe01813694694583d6c6 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -35,10 +35,12 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, HRVMTrace *invert_trace(HRVMTrace *trace) { HRVMTrace *last = NULL; - if (!trace) + if (!trace) { return NULL; - if (!trace->next) + } + if (!trace->next) { return trace; + } do { HRVMTrace *next = trace->next; trace->next = last; @@ -83,8 +85,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ h_sarray_clear(heads_n); } memset(insn_seen, 0, prog->length); // no insns seen yet - if (!live_threads) + if (!live_threads) { goto match_fail; + } live_threads = 0; HRVMTrace *tr_head; H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) { @@ -111,8 +114,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ hi = (arg >> 8) & 0xff; lo = arg & 0xff; THREAD.ip++; - if (ch < lo || ch > hi) - ipq_top--; // terminate thread + if (ch < lo || ch > hi) { + ipq_top--; // terminate thread + } goto next_insn; case RVM_GOTO: THREAD.ip = arg; @@ -141,8 +145,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ goto next_insn; case RVM_EOF: THREAD.ip++; - if (off != len) + if (off != len) { ipq_top--; // Terminate thread + } goto next_insn; case RVM_STEP: // save thread @@ -249,8 +254,9 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* env) { for (uint16_t i = 0; i < prog->action_count; i++) { - if (prog->actions[i].action == action_func && prog->actions[i].env == env) + if (prog->actions[i].action == action_func && prog->actions[i].env == env) { return i; + } } // Ensure that there's room in the action array... if (!(prog->action_count & (prog->action_count + 1))) { @@ -294,8 +300,9 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) { size_t h_svm_count_to_mark(HSVMContext *ctx) { size_t ctm; for (ctm = 0; ctm < ctx->stack_count; ctm++) { - if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) + if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) { return ctm; + } } return ctx->stack_count; } @@ -320,8 +327,10 @@ bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) { } bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) { - while (ctx->stack_count > 0) { if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) + while (ctx->stack_count > 0) { + if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) { return true; + } } return false; // no mark found. } @@ -343,8 +352,9 @@ static void h_regex_free(HParser *parser) { } static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) { - if (!parser->vtable->isValidRegular(parser->env)) - return 1; + if (!parser->vtable->isValidRegular(parser->env)) { + return -1; + } HRVMProg *prog = h_new(HRVMProg, 1); prog->length = prog->action_count = 0; prog->insns = NULL; diff --git a/src/benchmark.c b/src/benchmark.c index 632d7db3ba1321b1fb0fa6532f9fb76719725a5b..408bfdb22716a31afb16a2dc75c72cf0fa34da7d 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -80,13 +80,14 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest // Step 1: Compile grammar for given parser... if (h_compile(parser, backend, NULL) == -1) { // backend inappropriate for grammar... - fprintf(stderr, "failed\n"); + fprintf(stderr, "Compiling for %s failed\n", HParserBackendNames[backend]); ret->results[backend].compile_success = false; ret->results[backend].n_testcases = 0; ret->results[backend].failed_testcases = 0; ret->results[backend].cases = NULL; continue; } + fprintf(stderr, "Compiled for %s\n", HParserBackendNames[backend]); ret->results[backend].compile_success = true; int tc_failed = 0; // Step 1: verify all test cases. @@ -103,7 +104,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest if ((res_unamb == NULL && tc->output_unambiguous != NULL) || (res_unamb != NULL && strcmp(res_unamb, tc->output_unambiguous) != 0)) { // test case failed... - fprintf(stderr, "failed\n"); + fprintf(stderr, "Parsing with %s failed\n", HParserBackendNames[backend]); // We want to run all testcases, for purposes of generating a // report. (eg, if users are trying to fix a grammar for a // faster backend) @@ -115,7 +116,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest if (tc_failed > 0) { // Can't use this parser; skip to the next - fprintf(stderr, "Backend failed testcases; skipping benchmark\n"); + fprintf(stderr, "%s failed testcases; skipping benchmark\n", HParserBackendNames[backend]); continue; } @@ -140,6 +141,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); } while (time_diff < 100000000); ret->results[backend].cases[cur_case].parse_time = (time_diff / count); + ret->results[backend].cases[cur_case].length = tc->length; cur_case++; } } @@ -148,11 +150,16 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest void h_benchmark_report(FILE* stream, HBenchmarkResults* result) { for (size_t i=0; i<result->len; ++i) { - fprintf(stream, "Backend %zd ... \n", i); + if (result->results[i].cases == NULL) { + fprintf(stream, "Skipping %s because grammar did not compile for it\n", HParserBackendNames[i]); + } else { + fprintf(stream, "Backend %zd (%s) ... \n", i, HParserBackendNames[i]); + } for (size_t j=0; j<result->results[i].n_testcases; ++j) { - if(result->results[i].cases == NULL) + if (result->results[i].cases == NULL) { continue; - fprintf(stream, "Case %zd: %zd ns/parse\n", j, result->results[i].cases[j].parse_time); + } + fprintf(stream, "Case %zd: %zd ns/parse, %zd ns/byte\n", j, result->results[i].cases[j].parse_time, result->results[i].cases[j].parse_time / result->results[i].cases[j].length); } } } diff --git a/src/cfgrammar.c b/src/cfgrammar.c index ab7388302b24c9bb1c8767fc5d66e990fa39c165..a8761b8d537ec236f7a4876e1ad86a30742df988 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -46,11 +46,14 @@ static void collect_geneps(HCFGrammar *grammar); HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) { + if (!parser->vtable->isValidCF(parser->env)) { + return NULL; + } // convert parser to CFG form ("desugar"). HCFChoice *desugared = h_desugar(mm__, NULL, parser); - if(desugared == NULL) + if (desugared == NULL) { return NULL; // -> backend not suitable for this parser - + } return h_cfgrammar_(mm__, desugared); } @@ -61,7 +64,7 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared) // recursively traverse the desugared form and collect all HCFChoices that // represent a nonterminal (type HCF_CHOICE or HCF_CHARSET). collect_nts(g, desugared); - if(h_hashset_empty(g->nts)) { + if (h_hashset_empty(g->nts)) { // desugared is a terminal. wrap it in a singleton HCF_CHOICE. HCFChoice *nt = h_new(HCFChoice, 1); nt->type = HCF_CHOICE; @@ -92,8 +95,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol) HCFSequence **s; // for the rhs (sentential form) of a production HCFChoice **x; // for a symbol in s - if(h_hashset_present(grammar->nts, symbol)) + if (h_hashset_present(grammar->nts, symbol)) { return; // already visited, get out + } switch(symbol->type) { case HCF_CHAR: @@ -127,8 +131,9 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol) /* Increase g->kmax if needed, allocating enough first/follow slots. */ static void ensure_k(HCFGrammar *g, size_t k) { - if(k <= g->kmax) return; - + if (k <= g->kmax) { + return; + } // NB: we don't actually use first/follow[0] but allocate it anyway // so indices of the array correspond neatly to values of k @@ -136,7 +141,7 @@ static void ensure_k(HCFGrammar *g, size_t k) HHashTable **first = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *)); HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *)); - if(g->kmax > 0) { + if (g->kmax > 0) { // we are resizing, copy the old tables over for(size_t i=0; i<=g->kmax; i++) { first[i] = g->first[i]; @@ -181,8 +186,9 @@ bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s) { // return true iff all symbols in s derive epsilon for(; *s; s++) { - if(!h_derives_epsilon(g, *s)) + if (!h_derives_epsilon(g, *s)) { return false; + } } return true; } @@ -190,8 +196,9 @@ bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s) /* Populate the geneps member of g; no-op if called multiple times. */ static void collect_geneps(HCFGrammar *g) { - if(g->geneps != NULL) + if (g->geneps != NULL) { return; + } g->geneps = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr); assert(g->geneps != NULL); @@ -206,15 +213,16 @@ static void collect_geneps(HCFGrammar *g) HHashTableEntry *hte; for(i=0; i < g->nts->capacity; i++) { for(hte = &g->nts->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + if (hte->key == NULL) { continue; + } const HCFChoice *symbol = hte->key; assert(symbol->type == HCF_CHOICE); // this NT derives epsilon if any one of its productions does. HCFSequence **p; for(p = symbol->seq; *p != NULL; p++) { - if(h_derives_epsilon_seq(g, (*p)->items)) { + if (h_derives_epsilon_seq(g, (*p)->items)) { h_hashset_put(g->geneps, symbol); break; } @@ -262,8 +270,9 @@ static void *combine_stringmap(void *v1, const void *v2) { HStringMap *m1 = v1; const HStringMap *m2 = v2; - if(!m1) + if (!m1) { m1 = h_stringmap_new(m2->arena); + } h_stringmap_update(m1, m2); return m1; @@ -272,12 +281,12 @@ static void *combine_stringmap(void *v1, const void *v2) /* Note: Does *not* reuse submaps from n in building m. */ void h_stringmap_update(HStringMap *m, const HStringMap *n) { - if(n->epsilon_branch) + if (n->epsilon_branch) { m->epsilon_branch = n->epsilon_branch; - - if(n->end_branch) + } + if (n->end_branch) { m->end_branch = n->end_branch; - + } h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches); } @@ -294,24 +303,34 @@ HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m) */ void h_stringmap_replace(HStringMap *m, void *old, void *new) { - if(!old) { - if(m->epsilon_branch) m->epsilon_branch = new; - if(m->end_branch) m->end_branch = new; + if (!old) { + if (m->epsilon_branch) { + m->epsilon_branch = new; + } + if (m->end_branch) { + m->end_branch = new; + } } else { - if(m->epsilon_branch == old) m->epsilon_branch = new; - if(m->end_branch == old) m->end_branch = new; + if (m->epsilon_branch == old) { + m->epsilon_branch = new; + } + if (m->end_branch == old) { + m->end_branch = new; + } } // iterate over m->char_branches const HHashTable *ht = m->char_branches; - for(size_t i=0; i < ht->capacity; i++) { - for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + for (size_t i=0; i < ht->capacity; i++) { + for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { continue; + } HStringMap *m_ = hte->value; - if(m_) + if (m_) { h_stringmap_replace(m_, old, new); + } } } } @@ -319,11 +338,13 @@ void h_stringmap_replace(HStringMap *m, void *old, void *new) void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end) { for(size_t i=0; i<n; i++) { - if(i==n-1 && end && m->end_branch) + if (i==n-1 && end && m->end_branch) { return m->end_branch; + } m = h_stringmap_get_char(m, str[i]); - if(!m) + if (!m) { return NULL; + } } return m->epsilon_branch; } @@ -331,7 +352,7 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) { while(m) { - if(m->epsilon_branch) { // input matched + if (m->epsilon_branch) { // input matched // assert: another lookahead would not bring a more specific match. // this is for the table generator to ensure. (LLk) return m->epsilon_branch; @@ -341,7 +362,7 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) // reading bits from it does not consume them from the real input. uint8_t c = h_read_bits(&lookahead, 8, false); - if(lookahead.overrun) { // end of input + if (lookahead.overrun) { // end of input // XXX assumption of byte-wise grammar and input return m->end_branch; } @@ -377,14 +398,15 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x) uint8_t c; // shortcut: first_0(X) is always {""} - if(k==0) + if (k==0) { return g->singleton_epsilon; - + } // memoize via g->first ensure_k(g, k); ret = h_hashtable_get(g->first[k], x); - if(ret != NULL) + if (ret != NULL) { return ret; + } ret = h_stringmap_new(g->arena); assert(ret != NULL); h_hashtable_put(g->first[k], x, ret); @@ -399,7 +421,7 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x) case HCF_CHARSET: c=0; do { - if(charset_isset(x->charset, c)) { + if (charset_isset(x->charset, c)) { h_stringmap_put_char(ret, c, INSET); } } while(c++ < 255); @@ -432,9 +454,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) { // shortcut: the first set of the empty sequence, for any k, is {""} - if(*s == NULL) + if (*s == NULL) { return g->singleton_epsilon; - + } // first_k(X tail) = { a b | a <- first_k(X), b <- first_l(tail), l=k-|a| } HCFChoice *x = s[0]; @@ -443,12 +465,14 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) const HStringMap *first_x = h_first(k, g, x); // shortcut: if first_k(X) = {""}, just return first_k(tail) - if(is_singleton_epsilon(first_x)) + if (is_singleton_epsilon(first_x)) { return h_first_seq(k, g, tail); + } // shortcut: if no elements of first_k(X) have length <k, just return first_k(X) - if(!any_string_shorter(k, first_x)) + if (!any_string_shorter(k, first_x)) { return first_x; + } // create a new result set and build up the set described above HStringMap *ret = h_stringmap_new(g->arena); @@ -468,23 +492,25 @@ static bool is_singleton_epsilon(const HStringMap *m) static bool any_string_shorter(size_t k, const HStringMap *m) { - if(k==0) + if (k==0) { return false; - - if(m->epsilon_branch) + } + if (m->epsilon_branch) { return true; - + } // iterate over m->char_branches const HHashTable *ht = m->char_branches; - for(size_t i=0; i < ht->capacity; i++) { - for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + for (size_t i=0; i < ht->capacity; i++) { + for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { continue; + } HStringMap *m_ = hte->value; // check subtree for strings shorter than k-1 - if(any_string_shorter(k-1, m_)) + if (any_string_shorter(k-1, m_)) { return true; + } } } @@ -494,16 +520,21 @@ static bool any_string_shorter(size_t k, const HStringMap *m) // helper for h_predict static void remove_all_shorter(size_t k, HStringMap *m) { - if(k==0) return; + if (k==0) { + return; + } m->epsilon_branch = NULL; - if(k==1) return; + if (k==1) { + return; + } // iterate over m->char_branches const HHashTable *ht = m->char_branches; - for(size_t i=0; i < ht->capacity; i++) { - for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + for (size_t i=0; i < ht->capacity; i++) { + for (HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { continue; + } remove_all_shorter(k-1, hte->value); // recursion into subtree } } @@ -530,39 +561,41 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) HStringMap *ret; // shortcut: follow_0(X) is always {""} - if(k==0) + if (k==0) { return g->singleton_epsilon; - + } // memoize via g->follow ensure_k(g, k); ret = h_hashtable_get(g->follow[k], x); - if(ret != NULL) + if (ret != NULL) { return ret; + } ret = h_stringmap_new(g->arena); assert(ret != NULL); h_hashtable_put(g->follow[k], x, ret); // if X is the start symbol, the end token is in its follow set - if(x == g->start) + if (x == g->start) { h_stringmap_put_end(ret, INSET); - + } // iterate over g->nts size_t i; HHashTableEntry *hte; - for(i=0; i < g->nts->capacity; i++) { - for(hte = &g->nts->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + for (i=0; i < g->nts->capacity; i++) { + for (hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { continue; + } HCFChoice *a = (void *)hte->key; // production's left-hand symbol assert(a->type == HCF_CHOICE); // iterate over the productions for A HCFSequence **p; - for(p=a->seq; *p; p++) { + for (p=a->seq; *p; p++) { HCFChoice **s = (*p)->items; // production's right-hand side - for(; *s; s++) { - if(*s == x) { // occurance found + for (; *s; s++) { + if (*s == x) { // occurance found HCFChoice **tail = s+1; const HStringMap *first_tail = h_first_seq(k, g, tail); @@ -604,12 +637,12 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, size_t k, const HStringMap *as, StringSetFun f, HCFChoice **tail) { - if(as->epsilon_branch) { + if (as->epsilon_branch) { // for a="", add f_k(tail) to ret h_stringmap_update(ret, f(k, g, tail)); } - if(as->end_branch) { + if (as->end_branch) { // for a="$", nothing can follow; just add "$" to ret // NB: formally, "$" is considered to be of length k h_stringmap_put_end(ret, INSET); @@ -619,8 +652,9 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, const HHashTable *ht = as->char_branches; for(size_t i=0; i < ht->capacity; i++) { for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + if (hte->key == NULL) { continue; + } uint8_t c = key_char((HCharKey)hte->key); // follow the branch to find the set { a' | t a' <- as } @@ -648,7 +682,7 @@ void h_pprint_char(FILE *f, char c) case '\n': fputs("\\n", f); break; case '\r': fputs("\\r", f); break; default: - if(isprint((int)c)) { + if (isprint((int)c)) { fputc(c, f); } else { fprintf(f, "\\x%.2X", c); @@ -672,11 +706,11 @@ static void pprint_charset(FILE *f, const HCharset cs) fputc('[', f); for(i=0; i<256; i++) { - if(charset_isset(cs, i)) { + if (charset_isset(cs, i)) { pprint_charset_char(f, i); // detect ranges - if(i+2<256 && charset_isset(cs, i+1) && charset_isset(cs, i+2)) { + if (i+2<256 && charset_isset(cs, i+1) && charset_isset(cs, i+2)) { fputc('-', f); for(; i<256 && charset_isset(cs, i); i++); i--; // back to the last in range @@ -708,8 +742,9 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x) { fputc('"', f); for(; *x; x++) { - if((*x)->type != HCF_CHAR) + if ((*x)->type != HCF_CHAR) { break; + } h_pprint_char(f, (*x)->chr); } fputc('"', f); @@ -739,13 +774,14 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq) { HCFChoice **x = seq->items; - if(*x == NULL) { // the empty sequence + if (*x == NULL) { // the empty sequence fputs("\"\"", f); } else { while(*x) { - if(x != seq->items) fputc(' ', f); // internal separator - - if((*x)->type == HCF_CHAR) { + if (x != seq->items) { + fputc(' ', f); // internal separator + } + if ((*x)->type == HCF_CHAR) { // condense character strings x = pprint_string(f, x); } else { @@ -781,7 +817,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt, assert(nt->type == HCF_CHOICE); HCFSequence **p = nt->seq; - if(*p == NULL) return; // shouldn't happen + if (*p == NULL) { + return; // shouldn't happen + } pprint_sequence(f, g, *p++); // print first production on the same line for(; *p; p++) { // print the rest below with "or" bars for(i=0; i<column; i++) fputc(' ', f); // indent @@ -792,8 +830,9 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt, void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent) { - if(g->nts->used < 1) + if (g->nts->used < 1) { return; + } // determine maximum string length of symbol names int len; @@ -805,8 +844,9 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent) HHashTableEntry *hte; for(i=0; i < g->nts->capacity; i++) { for(hte = &g->nts->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + if (hte->key == NULL) { continue; + } const HCFChoice *a = hte->key; // production's left-hand symbol assert(a->type == HCF_CHOICE); @@ -828,10 +868,12 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in const HCFChoice *a = NULL; for(i=0; i < set->capacity; i++) { for(hte = &set->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + if (hte->key == NULL) { continue; - if(a != NULL) // we're not on the first element + } + if(a != NULL) { // we're not on the first element fputc(',', file); + } a = hte->key; // production's left-hand symbol @@ -851,9 +893,12 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, { assert(n < BUFSIZE-4); - if(map->epsilon_branch) { - if(!first) fputc(sep, file); first=false; - if(n==0) { + if (map->epsilon_branch) { + if (!first) { + fputc(sep, file); + first=false; + } + if (n==0) { fputs("\"\"", file); } else { fputs("\"", file); @@ -861,20 +906,27 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, fputs("\"", file); } - if(valprint) { + if (valprint) { fputc(':', file); valprint(file, env, map->epsilon_branch); } } - if(map->end_branch) { - if(!first) fputs(",\"", file); first=false; - if(n>0) fputs("\"\"", file); + if (map->end_branch) { + if (!first) { + fputs(",\"", file); + first=false; + } + if (n>0) { + fputs("\"\"", file); + } fwrite(prefix, 1, n, file); - if(n>0) fputs("\"\"", file); + if (n>0) { + fputs("\"\"", file); + } fputs("$", file); - if(valprint) { + if (valprint) { fputc(':', file); valprint(file, env, map->end_branch); } @@ -886,8 +938,9 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, HHashTableEntry *hte; for(i=0; i < ht->capacity; i++) { for(hte = &ht->contents[i]; hte; hte = hte->next) { - if(hte->key == NULL) + if (hte->key == NULL) { continue; + } uint8_t c = key_char((HCharKey)hte->key); HStringMap *ends = hte->value; @@ -901,10 +954,11 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, case '\n': prefix[n_++] = '\\'; prefix[n_++] = 'n'; break; case '\r': prefix[n_++] = '\\'; prefix[n_++] = 'r'; break; default: - if(isprint(c)) + if (isprint(c)) { prefix[n_++] = c; - else + } else { n_ += sprintf(prefix+n_, "\\x%.2X", c); + } } first = pprint_stringmap_elems(file, first, prefix, n_, diff --git a/src/datastructures.c b/src/datastructures.c index 45a7eba768e4fa0fa25b42aca1eebc5b8946e740..141adcd5ffa9df4d9a4a81269704a2361432266e 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -157,30 +157,67 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) { for (hte = &ht->contents[hashval & (ht->capacity - 1)]; hte != NULL; hte = hte->next) { - if (hte->key == NULL) + if (hte->key == NULL) { continue; - if (hte->hashval != hashval) + } + if (hte->hashval != hashval) { continue; - if (ht->equalFunc(key, hte->key)) + } + if (ht->equalFunc(key, hte->key)) { return hte->value; + } } return NULL; } +void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry); + +void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) { + bool do_resize = false; + size_t old_capacity = ht->capacity; + while (n * 1.3 > ht->capacity) { + ht->capacity *= 2; + do_resize = true; + } + if (!do_resize) + return; + HHashTableEntry *old_contents = ht->contents; + HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity); + ht->contents = new_contents; + ht->used = 0; + memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity); + for (size_t i = 0; i < old_capacity; ++i) + for (HHashTableEntry *entry = &old_contents[i]; + entry; + entry = entry->next) + if (entry->key) + h_hashtable_put_raw(ht, entry); + //h_arena_free(ht->arena, old_contents); +} + void h_hashtable_put(HHashTable* ht, const void* key, void* value) { // # Start with a rebalancing - //h_hashtable_ensure_capacity(ht, ht->used + 1); + h_hashtable_ensure_capacity(ht, ht->used + 1); HHashValue hashval = ht->hashFunc(key); + HHashTableEntry entry = { + .key = key, + .value = value, + .hashval = hashval + }; + h_hashtable_put_raw(ht, &entry); +} + +void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) { #ifdef CONSISTENCY_CHECK assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 #endif - HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + HHashTableEntry *hte = &ht->contents[new_entry->hashval & (ht->capacity - 1)]; if (hte->key != NULL) { for(;;) { // check each link, stay on last if not found - if (hte->hashval == hashval && ht->equalFunc(key, hte->key)) + if (hte->hashval == new_entry->hashval && ht->equalFunc(new_entry->key, hte->key)) goto insert_here; if (hte->next == NULL) break; @@ -196,9 +233,9 @@ void h_hashtable_put(HHashTable* ht, const void* key, void* value) { ht->used++; insert_here: - hte->key = key; - hte->value = value; - hte->hashval = hashval; + hte->key = new_entry->key; + hte->value = new_entry->value; + hte->hashval = new_entry->hashval; } void h_hashtable_update(HHashTable *dst, const HHashTable *src) { diff --git a/src/desugar.c b/src/desugar.c index 5ef8f9b95deb9c440c54fc568b73453ad5c69946..b2865f912afaf0c35a63984dea1c58a35917e778 100644 --- a/src/desugar.c +++ b/src/desugar.c @@ -8,14 +8,16 @@ HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) { if (nstk__ == NULL) { nstk__ = h_cfstack_new(mm__); } - if(nstk__->prealloc == NULL) + if (nstk__->prealloc == NULL) { nstk__->prealloc = h_new(HCFChoice, 1); + } // we're going to do something naughty and cast away the const to memoize assert(parser->vtable->desugar != NULL); ((HParser *)parser)->desugared = nstk__->prealloc; parser->vtable->desugar(mm__, nstk__, parser->env); - if (stk__ == NULL) + if (stk__ == NULL) { h_cfstack_free(mm__, nstk__); + } } else if (stk__ != NULL) { HCFS_APPEND(parser->desugared); } diff --git a/src/hammer.h b/src/hammer.h index dc403c0c407fc6f786a3ce96cacea858ec6190ea..f0ac6866731f59e824de55422d3a6e105d357c83 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -46,6 +46,14 @@ typedef enum HParserBackend_ { PB_MAX = PB_GLR } HParserBackend; +static const char* HParserBackendNames[] = { + "Packrat", + "Regular", + "LL(k)", + "LALR", + "GLR" +}; + typedef enum HTokenType_ { // Before you change the explicit values of these, think of the poor bindings ;_; TT_NONE = 1, @@ -178,6 +186,7 @@ typedef struct HCaseResult_ { #else HResultTiming timestamp; #endif + size_t length; } HCaseResult; typedef struct HBackendResults_ { diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index c91eaab5905229b178fa0c888dce1fa056babd88..026286d3eb3d56be961050fc1467ccae1fdc8516 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -1,16 +1,28 @@ #include "parser_internal.h" +typedef struct HIndirectEnv_ { + const HParser* parser; + bool touched; +} HIndirectEnv; + static HParseResult* parse_indirect(void* env, HParseState* state) { - return h_do_parse(env, state); + return h_do_parse(((HIndirectEnv*)env)->parser, state); } static bool indirect_isValidCF(void *env) { - HParser *p = (HParser*)env; - return p->vtable->isValidCF(p->env); + HIndirectEnv *ie = (HIndirectEnv*)env; + if (ie->touched) + return true; + ie->touched = true; + const HParser *p = ie->parser; + // self->vtable->isValidCF = h_true; + bool ret = p->vtable->isValidCF(p->env); + ie->touched = false; + return ret; } static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) { - HCFS_DESUGAR( (HParser *)env ); + HCFS_DESUGAR( ((HIndirectEnv *)env)->parser ); } static const HParserVtable indirect_vt = { @@ -27,12 +39,15 @@ void h_bind_indirect__m(HAllocator *mm__, HParser* indirect, const HParser* inne void h_bind_indirect(HParser* indirect, const HParser* inner) { assert_message(indirect->vtable == &indirect_vt, "You can only bind an indirect parser"); - indirect->env = (void*)inner; + ((HIndirectEnv*)indirect->env)->parser = inner; } HParser* h_indirect() { return h_indirect__m(&system_allocator); } HParser* h_indirect__m(HAllocator* mm__) { - return h_new_parser(mm__, &indirect_vt, NULL); + HIndirectEnv *env = h_new(HIndirectEnv, 1); + env->parser = NULL; + env->touched = false; + return h_new_parser(mm__, &indirect_vt, env); }