From 62f651cd909d72f203bc9bd7e88607835b082c37 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Thu, 20 Feb 2020 13:35:11 +0100 Subject: [PATCH] "taint" memoized follow sets that came from intermediate results tentative commit. this should solve issue 92, but isn't working, yet. the idea is to taint any memoized follow set that hit a recursive loop, directly or indirectly. the recursive calls (h_follow_rec) are allowed to use these intermediates, but propagate the taint. the top-level routine (h_follow) places the original taint on its result and may remove it at the end. it will not return memoized results that are tainted (but compute them afresh). --- src/cfgrammar.c | 115 ++++++++++++++++++++++++++++++++++-------------- src/cfgrammar.h | 1 + 2 files changed, 82 insertions(+), 34 deletions(-) diff --git a/src/cfgrammar.c b/src/cfgrammar.c index 9697e12b..22442376 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -240,6 +240,7 @@ HStringMap *h_stringmap_new(HArena *a) m->end_branch = NULL; m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr); m->arena = a; + m->taint = false; return m; } @@ -452,7 +453,7 @@ static bool any_string_shorter(size_t k, const HStringMap *m); typedef const HStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **); // helper for h_first_seq and h_follow -static void stringset_extend(HCFGrammar *g, HStringMap *ret, +static bool stringset_extend(HCFGrammar *g, HStringMap *ret, size_t k, const HStringMap *as, StringSetFun f, HCFChoice **tail); @@ -546,44 +547,25 @@ static void remove_all_shorter(size_t k, HStringMap *m) } // h_follow adapted to the signature of StringSetFun -static inline -const HStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s) +static const HStringMap * +h_follow_(size_t k, HCFGrammar *g, HCFChoice **s) { return h_follow(k, g, *s); } -const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) -{ - // consider all occurances of X in g - // the follow set of X is the union of: - // {$} if X is the start symbol - // given a production "A -> alpha X tail": - // first_k(tail follow_k(A)) - - // first_k(tail follow_k(A)) = - // { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| } +static const HStringMap *h_follow_rec(size_t k, HCFGrammar *g, HCFChoice **s); - HStringMap *ret; - - // shortcut: follow_0(X) is always {""} - if (k==0) { - return g->singleton_epsilon; - } - // memoize via g->follow - ensure_k(g, k); - ret = h_hashtable_get(g->follow[k], x); - if (ret != NULL) { - return ret; - } - ret = h_stringmap_new(g->arena); - assert(ret != NULL); - h_hashtable_put(g->follow[k], x, ret); +static bool +follow_work(size_t k, HCFGrammar *g, const HCFChoice *x, HStringMap *ret) +{ + bool taint = false; // if X is the start symbol, the end token is in its follow set if (x == g->start) { h_stringmap_put_end(ret, INSET); } - // iterate over g->nts + + // iterate over g->nts, looking for X size_t i; HHashTableEntry *hte; int x_found=0; @@ -608,7 +590,7 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) const HStringMap *first_tail = h_first_seq(k, g, tail); // extend the elems of first_k(tail) up to length k from follow(A) - stringset_extend(g, ret, k, first_tail, h_follow_, &a); + taint |= stringset_extend(g, ret, k, first_tail, h_follow_rec, &a); } } } @@ -616,7 +598,66 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) } assert(x_found || x == g->start); // no orphan non-terminals - h_hashtable_del(g->follow[k], x); + return taint; +} + +// inner (recursion) variant of h_follow +static const HStringMap *h_follow_rec(size_t k, HCFGrammar *g, HCFChoice **s) +{ + HStringMap *ret; + HCFChoice *x = *s; + + // shortcut: follow_0(X) is always {""} + if (k==0) { + return g->singleton_epsilon; + } + + // memoize via g->follow + assert(k <= g->kmax); + ret = h_hashtable_get(g->follow[k], x); + if (ret != NULL) { // return regardless of taint + return ret; + } + ret = h_stringmap_new(g->arena); + assert(ret != NULL); + h_hashtable_put(g->follow[k], x, ret); + + ret->taint = true; + ret->taint = follow_work(k, g, x, ret); + return ret; +} + +const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) +{ + // consider all occurances of X in g + // the follow set of X is the union of: + // {$} if X is the start symbol + // given a production "A -> alpha X tail": + // first_k(tail follow_k(A)) + + // first_k(tail follow_k(A)) = + // { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| } + + HStringMap *ret; + + // shortcut: follow_0(X) is always {""} + if (k==0) { + return g->singleton_epsilon; + } + + // memoize via g->follow + ensure_k(g, k); + ret = h_hashtable_get(g->follow[k], x); + if (ret != NULL && !ret->taint) { + return ret; + } + ret = h_stringmap_new(g->arena); + assert(ret != NULL); + h_hashtable_put(g->follow[k], x, ret); + + ret->taint = true; + follow_work(k, g, x, ret); + ret->taint = false; return ret; } @@ -643,13 +684,17 @@ HStringMap *h_predict(size_t k, HCFGrammar *g, } // add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret -static void stringset_extend(HCFGrammar *g, HStringMap *ret, +static bool stringset_extend(HCFGrammar *g, HStringMap *ret, size_t k, const HStringMap *as, StringSetFun f, HCFChoice **tail) { + bool taint = false; + if (as->epsilon_branch) { // for a="", add f_k(tail) to ret - h_stringmap_update(ret, f(k, g, tail)); + const HStringMap *f_tail = f(k, g, tail); + taint |= f_tail->taint; + h_stringmap_update(ret, f_tail); } if (as->end_branch) { @@ -676,9 +721,11 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, HStringMap *ret_ = h_stringmap_new(g->arena); h_stringmap_put_after(ret, c, ret_); - stringset_extend(g, ret_, k-1, as_, f, tail); + taint |= stringset_extend(g, ret_, k-1, as_, f, tail); } } + + return taint; } diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 2e8ba83c..6068a2bc 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -37,6 +37,7 @@ typedef struct HStringMap_ { void *end_branch; // points to leaf value HHashTable *char_branches; // maps to inner nodes (HStringMaps) HArena *arena; + bool taint; // for use by h_follow() and h_first() } HStringMap; HStringMap *h_stringmap_new(HArena *a); -- GitLab