diff --git a/src/backends/glr.c b/src/backends/glr.c index 44b0c50cafd08486866eedf17e29c50236434f9b..ea69ea37ebb9275387b2eb67cf0e21fc64ac8960 100644 --- a/src/backends/glr.c +++ b/src/backends/glr.c @@ -225,6 +225,8 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* HLREngine *engine = h_slist_pop(engines); const HLRAction *action = h_lrengine_action(engine); glr_step(&result, engback, engine, action); + // XXX detect ambiguous results - two engines terminating at the same pos + // -> kill both engines, i.e. ignore if there is a later unamb. success } // swap the lists diff --git a/src/backends/lalr.c b/src/backends/lalr.c index ba10e0ca8587c866b72e96d03c1351a9a037d8a2..79a2eca50c1690fab329870bf3e8f7e994927ad3 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -338,7 +338,11 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) const HStringMap *fs = h_follow(1, eg->grammar, lhs); assert(fs != NULL); assert(fs->epsilon_branch == NULL); - assert(!h_stringmap_empty(fs)); + // NB: there is a case where fs can be empty: when reducing by lhs + // would lead to certain parse failure, by means of h_nothing_p() + // for instance. in that case, the below code correctly adds no + // reduce action. + assert(!h_stringmap_empty(fs)); // XXX // for each lookahead symbol, put action into table cell if(terminals_put(table->tmap[state], fs, action) < 0) @@ -351,6 +355,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) h_slist_push(table->inadeq, (void *)(uintptr_t)state); } } + + h_cfgrammar_free(eg->grammar); } h_cfgrammar_free(g); diff --git a/src/cfgrammar.c b/src/cfgrammar.c index aea45a452987a5d5e7728146a84253ad0d23f416..bd69588a70395e007cd8cf83eaa5a3cce69e771e 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -6,10 +6,25 @@ #include <ctype.h> +// type of pairs used as memoization keys by h_follow and h_first +struct k_nt {size_t k; const HCFChoice *nt;}; + // a special map value for use when the map is used to represent a set static void * const INSET = (void *)(uintptr_t)1; +static bool eq_k_nt(const void *p, const void *q) +{ + const struct k_nt *a=p, *b=q; + return a->k == b->k && a->nt == b->nt; +} + +static HHashValue hash_k_nt(const void *p) +{ + const struct k_nt *x = p; + return h_hash_ptr(x->nt) * x->k; +} + HCFGrammar *h_cfgrammar_new(HAllocator *mm__) { HCFGrammar *g = h_new(HCFGrammar, 1); @@ -20,14 +35,17 @@ HCFGrammar *h_cfgrammar_new(HAllocator *mm__) g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr); g->start = NULL; g->geneps = NULL; - g->first = NULL; - g->follow = NULL; - g->kmax = 0; // will be increased as needed by ensure_k + g->first = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt); + g->follow = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt); HStringMap *eps = h_stringmap_new(g->arena); h_stringmap_put_epsilon(eps, INSET); g->singleton_epsilon = eps; + HStringMap *end = h_stringmap_new(g->arena); + h_stringmap_put_end(end, INSET); + g->singleton_end = end; + return g; } @@ -132,42 +150,6 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol) } } -/* Increase g->kmax if needed, allocating enough first/follow slots. */ -static void ensure_k(HCFGrammar *g, size_t k) -{ - if (k <= g->kmax) { - return; - } - // NB: we don't actually use first/follow[0] but allocate it anyway - // so indices of the array correspond neatly to values of k - - // allocate the new arrays - HHashTable **first = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *)); - HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *)); - - if (g->kmax > 0) { - // we are resizing, copy the old tables over - for(size_t i=0; i<=g->kmax; i++) { - first[i] = g->first[i]; - follow[i] = g->follow[i]; - } - } else { - // we are initializing, allocate the first (in fact, dummy) tables - first[0] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr); - follow[0] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr); - } - - // allocate the new tables - for(size_t i=g->kmax+1; i<=k; i++) { - first[i] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr); - follow[i] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr); - } - - g->first = first; - g->follow = follow; - g->kmax = k; -} - bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol) { // XXX this can now also be implemented in terms of h_first: @@ -314,6 +296,7 @@ HStringMap *h_stringmap_new(HArena *a) m->end_branch = NULL; m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr); m->arena = a; + m->taint = false; return m; } @@ -470,30 +453,65 @@ bool h_stringmap_empty(const HStringMap *m) && h_hashtable_empty(m->char_branches)); } -const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x) +static bool eq_stringmap(const void *a, const void *b) +{ + return h_stringmap_equal(a, b); +} + +bool h_stringmap_equal(const HStringMap *a, const HStringMap *b) { + if (a->epsilon_branch != b->epsilon_branch) + return false; + if (a->end_branch != b->end_branch) + return false; + return h_hashtable_equal(a->char_branches, b->char_branches, eq_stringmap); +} + +// helper for h_follow and h_first +bool workset_equal(HHashTable *a, HHashTable *b) +{ + if (a == NULL || b == NULL) + return (a == b); + else + return h_hashtable_equal(a, b, eq_stringmap); +} + +static const HStringMap * +h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s); + +static const HStringMap * +h_first_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x) +{ + HHashTable *ws = *pws; HStringMap *ret; HCFSequence **p; uint8_t c; + struct k_nt kx = {k,x}; + struct k_nt *pkx = NULL; + bool taint = false; // shortcut: first_0(X) is always {""} if (k==0) { return g->singleton_epsilon; } - // memoize via g->first - ensure_k(g, k); - ret = h_hashtable_get(g->first[k], x); + // shortcut: first_k($) is always {$} + if (x->type == HCF_END) { + return g->singleton_end; + } + + // check memoization and workset + ret = h_hashtable_get(g->first, &kx); + if (ret == NULL && ws != NULL) + ret = h_hashtable_get(ws, &kx); if (ret != NULL) { return ret; } + + // not found, create result ret = h_stringmap_new(g->arena); assert(ret != NULL); - h_hashtable_put(g->first[k], x, ret); switch(x->type) { - case HCF_END: - h_stringmap_put_end(ret, INSET); - break; case HCF_CHAR: h_stringmap_put_char(ret, x->chr, INSET); break; @@ -507,30 +525,75 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x) break; case HCF_CHOICE: // this is a nonterminal + + // to avoid recursive loops, taint ret and place it in workset + ret->taint = true; + if (ws == NULL) + ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt); + pkx = h_arena_malloc(g->arena, sizeof kx); + *pkx = kx; + h_hashtable_put(ws, pkx, ret); + // return the union of the first sets of all productions - for(p=x->seq; *p; ++p) - h_stringmap_update(ret, h_first_seq(k, g, (*p)->items)); + for(p=x->seq; *p; ++p) { + const HStringMap *first_rhs = h_first_seq_work(k, g, pws, (*p)->items); + assert(ws == *pws); // call above did not change the workset pointer + taint |= first_rhs->taint; + h_stringmap_update(ret, first_rhs); + } break; default: // should not be reached - assert_message(0, "unknown HCFChoice type"); + assert_message(0, "unexpected HCFChoice type"); + } + + // immediately memoize ret and remove it from ws if untainted by recursion + if (!taint) { + if (pkx == NULL) { + pkx = h_arena_malloc(g->arena, sizeof kx); + *pkx = kx; + } else if (ws != NULL) { + // we already had a key, so ret might (will) be in ws; remove it. + h_hashtable_del(ws, pkx); + } + ret->taint = false; + h_hashtable_put(g->first, pkx, ret); } return ret; } +const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x) +{ + HHashTable *ws, *bak; + const HStringMap *ret; + + // fixpoint iteration on workset + ws = NULL; + do { + bak = ws; + ws = NULL; + ret = h_first_work(k, g, &ws, x); + } while(!workset_equal(ws, bak)); + + assert(ret != NULL); + return ret; +} + // helpers for h_first_seq, definitions below static bool is_singleton_epsilon(const HStringMap *m); static bool any_string_shorter(size_t k, const HStringMap *m); // pointer to functions like h_first_seq -typedef const HStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **); +typedef const HStringMap * + (*StringSetFun)(size_t, HCFGrammar *, HHashTable **, HCFChoice **); // helper for h_first_seq and h_follow -static void stringset_extend(HCFGrammar *g, HStringMap *ret, +static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret, size_t k, const HStringMap *as, StringSetFun f, HCFChoice **tail); -const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) +static const HStringMap * +h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s) { // shortcut: the first set of the empty sequence, for any k, is {""} if (*s == NULL) { @@ -541,11 +604,11 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) HCFChoice *x = s[0]; HCFChoice **tail = s+1; - const HStringMap *first_x = h_first(k, g, x); + const HStringMap *first_x = h_first_work(k, g, pws, x); // shortcut: if first_k(X) = {""}, just return first_k(tail) if (is_singleton_epsilon(first_x)) { - return h_first_seq(k, g, tail); + return h_first_seq_work(k, g, pws, tail); } // shortcut: if no elements of first_k(X) have length <k, just return first_k(X) @@ -557,11 +620,28 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) HStringMap *ret = h_stringmap_new(g->arena); // extend the elements of first_k(X) up to length k from tail - stringset_extend(g, ret, k, first_x, h_first_seq, tail); + ret->taint = stringset_extend(g, pws, ret, k, first_x, h_first_seq_work, tail); return ret; } +const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s) +{ + HHashTable *ws, *bak; + const HStringMap *ret; + + // fixpoint iteration on workset + ws = NULL; + do { + bak = ws; + ws = NULL; + ret = h_first_seq_work(k, g, &ws, s); + } while(!workset_equal(ws, bak)); + + assert(ret != NULL); + return ret; +} + static bool is_singleton_epsilon(const HStringMap *m) { return ( m->epsilon_branch @@ -620,13 +700,25 @@ static void remove_all_shorter(size_t k, HStringMap *m) } // h_follow adapted to the signature of StringSetFun -static inline -const HStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s) +static const HStringMap * +h_follow_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s) { + assert(pws == NULL); return h_follow(k, g, *s); } -const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) +static const HStringMap * +h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x); + +// h_follow_work adapted to the signature of StringSetFun +static const HStringMap * +h_follow_work_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s) +{ + return h_follow_work(k, g, pws, *s); +} + +static const HStringMap * +h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x) { // consider all occurances of X in g // the follow set of X is the union of: @@ -638,28 +730,45 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) // { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| } HStringMap *ret; + HHashTable *ws = *pws; + struct k_nt kx = {k,x}; + struct k_nt *pkx; + bool taint = false; // shortcut: follow_0(X) is always {""} if (k==0) { return g->singleton_epsilon; } - // memoize via g->follow - ensure_k(g, k); - ret = h_hashtable_get(g->follow[k], x); + + // check memoization and workset + ret = h_hashtable_get(g->follow, &kx); + if (ret == NULL && ws != NULL) + ret = h_hashtable_get(ws, &kx); if (ret != NULL) { return ret; } + + // not found, create result ret = h_stringmap_new(g->arena); assert(ret != NULL); - h_hashtable_put(g->follow[k], x, ret); + + // to avoid recursive loops, taint ret and place it in workset + ret->taint = true; + if (ws == NULL) + ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt); + pkx = h_arena_malloc(g->arena, sizeof kx); + *pkx = kx; + h_hashtable_put(ws, pkx, ret); // if X is the start symbol, the end token is in its follow set if (x == g->start) { h_stringmap_put_end(ret, INSET); } - // iterate over g->nts + + // iterate over g->nts, looking for X size_t i; HHashTableEntry *hte; + int x_found=0; for (i=0; i < g->nts->capacity; i++) { for (hte = &g->nts->contents[i]; hte; hte = hte->next) { if (hte->key == NULL) { @@ -674,22 +783,49 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) HCFChoice **s = (*p)->items; // production's right-hand side for (; *s; s++) { - if (*s == x) { // occurance found + if (*s == x) { // occurrence found + x_found=1; HCFChoice **tail = s+1; const HStringMap *first_tail = h_first_seq(k, g, tail); // extend the elems of first_k(tail) up to length k from follow(A) - stringset_extend(g, ret, k, first_tail, h_follow_, &a); + taint |= stringset_extend(g, pws, ret, k, + first_tail, h_follow_work_, &a); } } } } } + assert(x_found || x == g->start); // no orphan non-terminals + + // immediately memoize ret and remove it from ws if untainted by recursion + if (!taint) { + ret->taint = false; + h_hashtable_del(ws, pkx); + h_hashtable_put(g->follow, pkx, ret); + } return ret; } +const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) +{ + HHashTable *ws, *bak; + const HStringMap *ret; + + // fixpoint iteration on workset + ws = NULL; + do { + bak = ws; + ws = NULL; + ret = h_follow_work(k, g, &ws, x); + } while(!workset_equal(ws, bak)); + + assert(ret != NULL); + return ret; +} + HStringMap *h_predict(size_t k, HCFGrammar *g, const HCFChoice *A, const HCFSequence *rhs) { @@ -703,7 +839,7 @@ HStringMap *h_predict(size_t k, HCFGrammar *g, // casting the const off of A below. note: stringset_extend does // not touch this argument, only passes it through to h_follow // in this case, which accepts it, once again, as const. - stringset_extend(g, ret, k, first_rhs, h_follow_, (HCFChoice **)&A); + stringset_extend(g, NULL, ret, k, first_rhs, h_follow_, (HCFChoice **)&A); // make sure there are only strings of length _exactly_ k remove_all_shorter(k, ret); @@ -712,13 +848,17 @@ HStringMap *h_predict(size_t k, HCFGrammar *g, } // add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret -static void stringset_extend(HCFGrammar *g, HStringMap *ret, +static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret, size_t k, const HStringMap *as, StringSetFun f, HCFChoice **tail) { + bool taint = false; + if (as->epsilon_branch) { // for a="", add f_k(tail) to ret - h_stringmap_update(ret, f(k, g, tail)); + const HStringMap *f_tail = f(k, g, pws, tail); + taint |= f_tail->taint; + h_stringmap_update(ret, f_tail); } if (as->end_branch) { @@ -745,9 +885,11 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, HStringMap *ret_ = h_stringmap_new(g->arena); h_stringmap_put_after(ret, c, ret_); - stringset_extend(g, ret_, k-1, as_, f, tail); + taint |= stringset_extend(g, pws, ret_, k-1, as_, f, tail); } } + + return taint; } @@ -892,13 +1034,15 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt, fputs(name, f); i += strlen(name); for(; i<column; i++) fputc(' ', f); - fputs(" ->", f); assert(nt->type == HCF_CHOICE); HCFSequence **p = nt->seq; if (*p == NULL) { - return; // shouldn't happen + fputs(" -x\n", f); // empty choice, e.g. h_nothing_p() + return; } + + fputs(" ->", f); pprint_sequence(f, g, *p++); // print first production on the same line for(; *p; p++) { // print the rest below with "or" bars for(i=0; i<column; i++) fputc(' ', f); // indent @@ -909,6 +1053,8 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt, void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent) { + HAllocator *mm__ = g->mm__; + if (g->nts->used < 1) { return; } @@ -916,11 +1062,12 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent) // determine maximum string length of symbol names int len; size_t s; - for(len=1, s=26; s < g->nts->used; len++, s*=26); + for(len=1, s=26; s < g->nts->used; len++, s*=26); - // iterate over g->nts + // iterate over g->nts and collect its entries in an ordered array size_t i; HHashTableEntry *hte; + const HCFChoice **arr = h_new(const HCFChoice *, g->nts->used); for(i=0; i < g->nts->capacity; i++) { for(hte = &g->nts->contents[i]; hte; hte = hte->next) { if (hte->key == NULL) { @@ -929,9 +1076,16 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent) const HCFChoice *a = hte->key; // production's left-hand symbol assert(a->type == HCF_CHOICE); - pprint_ntrules(file, g, a, indent, len); + size_t id = (uintptr_t)hte->value; // nonterminal id + assert(id < g->nts->used); + arr[id] = a; } } + + // print rules in alphabetical order + for(i=0; i < g->nts->used; i++) + pprint_ntrules(file, g, arr[i], indent, len); + h_free(arr); } void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent) diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 2e8ba83cee5c152baae1177ed7b99d45cf11042c..8945ecb97d0adc1aa1f69391f54726a156c91211 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -8,15 +8,15 @@ typedef struct HCFGrammar_ { HHashSet *nts; // HCFChoices, each representing the alternative // productions for one nonterminal HHashSet *geneps; // set of NTs that can generate the empty string - HHashTable **first; // memoized first sets of the grammar's symbols - HHashTable **follow; // memoized follow sets of the grammar's NTs - size_t kmax; // maximum lookahead depth allocated + HHashTable *first; // memoized first sets of the grammar's symbols + HHashTable *follow; // memoized follow sets of the grammar's NTs HArena *arena; HAllocator *mm__; - // constant set containing only the empty string. - // this is only a member of HCFGrammar because it needs a pointer to arena. + // constant sets containing only the empty string or end symbol. + // these are only members of HCFGrammar because they need a pointer to arena. const struct HStringMap_ *singleton_epsilon; + const struct HStringMap_ *singleton_end; } HCFGrammar; @@ -37,6 +37,7 @@ typedef struct HStringMap_ { void *end_branch; // points to leaf value HHashTable *char_branches; // maps to inner nodes (HStringMaps) HArena *arena; + bool taint; // for use by h_follow() and h_first() } HStringMap; HStringMap *h_stringmap_new(HArena *a); @@ -52,6 +53,7 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead); bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end); bool h_stringmap_present_epsilon(const HStringMap *m); bool h_stringmap_empty(const HStringMap *m); +bool h_stringmap_equal(const HStringMap *a, const HStringMap *b); static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c) { return h_hashtable_get(m->char_branches, (void *)char_key(c)); } diff --git a/src/datastructures.c b/src/datastructures.c index 6971e0e0bee2fc8bbc644a0c6d9f9967d4ab6a01..8a09b5ce755e8880542f02c82b3b3e0db4f2fa48 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -387,16 +387,18 @@ static bool hte_same_length(HHashTableEntry *xs, HHashTableEntry *ys) { } // helper for hte_equal: are all elements of xs present in ys? -static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) +static bool hte_subset(HEqualFunc eq, HEqualFunc value_eq, + HHashTableEntry *xs, HHashTableEntry *ys) { for(; xs; xs=xs->next) { if(xs->key == NULL) continue; // element not present HHashTableEntry *hte; for(hte=ys; hte; hte=hte->next) { - if(hte->key == xs->key) break; // assume an element is equal to itself + // assume an element is equal to itself + if(hte->key == xs->key && hte->value == xs->value) break; if(hte->hashval != xs->hashval) continue; // shortcut - if(eq(hte->key, xs->key)) break; + if(eq(hte->key, xs->key) && value_eq(hte->value, xs->value)) break; } if(hte == NULL) return false; // element not found } @@ -404,19 +406,20 @@ static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) } // compare two lists of HHashTableEntries -static inline bool hte_equal(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) { - return (hte_same_length(xs, ys) && hte_subset(eq, xs, ys)); +static inline bool hte_equal(HEqualFunc eq, HEqualFunc value_eq, + HHashTableEntry *xs, HHashTableEntry *ys) { + return (hte_same_length(xs, ys) && hte_subset(eq, value_eq, xs, ys)); } -/* Set equality of HHashSets. +/* Equality of HHashTables. * Obviously, 'a' and 'b' must use the same equality function. * Not strictly necessary, but we also assume the same hash function. */ -bool h_hashset_equal(const HHashSet *a, const HHashSet *b) { +bool h_hashtable_equal(const HHashSet *a, const HHashSet *b, HEqualFunc value_eq) { if(a->capacity == b->capacity) { // iterate over the buckets in parallel for(size_t i=0; i < a->capacity; i++) { - if(!hte_equal(a->equalFunc, &a->contents[i], &b->contents[i])) + if(!hte_equal(a->equalFunc, value_eq, &a->contents[i], &b->contents[i])) return false; } } else { @@ -426,6 +429,18 @@ bool h_hashset_equal(const HHashSet *a, const HHashSet *b) { return true; } +static bool eq_dontcare(const void *p, const void *q) { + return true; +} + +/* Set equality of HHashSets. + * Obviously, 'a' and 'b' must use the same equality function. + * Not strictly necessary, but we also assume the same hash function. + */ +bool h_hashset_equal(const HHashSet *a, const HHashSet *b) { + return h_hashtable_equal(a, b, eq_dontcare); +} + bool h_eq_ptr(const void *p, const void *q) { return (p==q); } diff --git a/src/internal.h b/src/internal.h index 07420681275a989925a08f6c596e3bc4a59202c1..f25d18ba4d1f42df96f77f79b53115be302c0490 100644 --- a/src/internal.h +++ b/src/internal.h @@ -382,6 +382,7 @@ int h_hashtable_present(const HHashTable *ht, const void *key); void h_hashtable_del(HHashTable *ht, const void *key); void h_hashtable_free(HHashTable *ht); static inline bool h_hashtable_empty(const HHashTable *ht) { return (ht->used == 0); } +bool h_hashtable_equal(const HHashTable *a, const HHashTable *b, HEqualFunc value_eq); typedef HHashTable HHashSet; #define h_hashset_new(a,eq,hash) h_hashtable_new(a,eq,hash) diff --git a/src/t_regression.c b/src/t_regression.c index b276f320f6dc83118d049dff3293640c52b64552..4ff3f96263e28ad4002cd2bfc00c58be7effc1dd 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -384,6 +384,60 @@ static void test_issue91() { g_check_cmp_int(r, ==, -2); } +static void test_issue92() { + HParser *a = h_ch('a'); + HParser *b = h_ch('b'); + + HParser *str_a = h_indirect(); + HParser *str_b = h_choice(h_sequence(b, str_a, NULL), str_a, NULL); + //h_sequence(h_optional(b), str_a, NULL); // this works + HParser *str_a_ = h_optional(h_sequence(a, str_b, NULL)); + HParser *str = str_a; + h_bind_indirect(str_a, str_a_); + /* + * grammar generated from the above: + * + * A -> B -- "augmented" with a fresh start symbol + * B -> C -- B = str_a + * | "" + * C -> "a" D -- C = h_sequence(a, str_b) + * D -> E -- D = str_b + * | B + * E -> "b" B -- E = h_sequence(b, str_a) + * + * transformed to the following "enhanced grammar": + * + * S -> 0B3 + * 0B3 -> 0C2 + * | "" + * 1B4 -> 1C2 + * | "" + * 6B8 -> 6C2 + * | "" (*) here + * 0C2 -> "a" 1D7 + * 1C2 -> "a" 1D7 + * 6C2 -> "a" 1D7 + * 1D7 -> 1E5 + * | 1B4 + * 1E5 -> "b" 6B8 + */ + + /* + * the following call would cause an assertion failure. + * + * assertion "!h_stringmap_empty(fs)" failed: file + * "src/backends/lalr.c", line 341, function "h_lalr_compile" + * + * the bug happens when trying to compute h_follow() for 6B8 in state 6, + * production "" (*). intermediate results could end up in the memoization + * table and be treated as final by later calls to h_follow(). the problem + * could appear or not depending on the order of nonterminals (i.e. pointers) + * in a hashtable. + */ + int r = h_compile(str, PB_LALR, NULL); + g_check_cmp_int(r, ==, 0); +} + static void test_issue83() { HParser *p = h_sequence(h_sequence(NULL, NULL), h_nothing_p(), NULL); /* @@ -421,5 +475,6 @@ void register_regression_tests(void) { g_test_add_func("/core/regression/flatten_null", test_flatten_null); //XXX g_test_add_func("/core/regression/ast_length_index", test_ast_length_index); g_test_add_func("/core/regression/issue91", test_issue91); + g_test_add_func("/core/regression/issue92", test_issue92); g_test_add_func("/core/regression/issue83", test_issue83); }