diff --git a/src/backends/llk.c b/src/backends/llk.c index 69fb7efa979152fc1b9eaa0cc84f0be1ae99a9ed..27258e62db9ee53c75a0f19a7b4e0917d3a3f266 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -81,28 +81,6 @@ void h_llktable_free(HLLkTable *table) h_free(table); } -/* Compute the predict_k set of production "A -> rhs". - * Always returns a newly-allocated HCFStringMap. - */ -HCFStringMap *h_predict(size_t k, HCFGrammar *g, - const HCFChoice *A, const HCFSequence *rhs) -{ - assert(k==1); // XXX - HCFStringMap *ret = h_stringmap_new(g->arena); - - // predict(A -> rhs) = first(rhs) u follow(A) if "" can be derived from rhs - // predict(A -> rhs) = first(rhs) otherwise - - h_stringmap_update(ret, h_first_seq(k, g, rhs->items)); - if(h_derives_epsilon_seq(g, rhs->items)) - h_stringmap_update(ret, h_follow(k, g, A)); - - // make sure there are only strings of length _exactly_ k - ret->epsilon_branch = NULL; - - return ret; -} - void *const CONFLICT = (void *)(uintptr_t)(-1); // helper for stringmap_merge @@ -113,7 +91,7 @@ static void *combine_entries(HHashSet *workset, void *dst, const void *src) if(dst == CONFLICT) { // previous conflict h_hashset_put(workset, src); - } else if(dst == src) { // new conflict + } else if(dst != src) { // new conflict h_hashset_put(workset, dst); h_hashset_put(workset, src); dst = CONFLICT; @@ -133,6 +111,12 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap * combine_entries(workset, dst->epsilon_branch, src->epsilon_branch); else dst->epsilon_branch = src->epsilon_branch; + } else { + // if there is a non-conflicting value on the left (dst) side, it means + // that prediction is already unambiguous. we can drop the right (src) + // side we were going to extend with. + if(dst->epsilon_branch && dst->epsilon_branch != CONFLICT) + return; } if(src->end_branch) { @@ -164,9 +148,6 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap * } } -void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq); -void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x); - /* Generate entries for the production "A" in the given table row. */ static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row, const HCFChoice *A) @@ -181,6 +162,8 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row, // run until workset exhausted or kmax hit size_t k; for(k=1; k<=kmax; k++) { + printf("k=%lu\n", k); // XXX debug + // allocate a fresh workset for the next round HHashSet *nextset = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr); @@ -199,29 +182,30 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HCFStringMap *row, HCFStringMap *pred = h_predict(k, g, A, rhs); h_stringmap_replace(pred, NULL, rhs); + // XXX debug + printf("predict("); + h_pprint_sequence(stdout, g, rhs); + printf(") = "); + h_pprint_stringset(stdout, pred, 0); + // merge predict set into the row // accumulates conflicts in new workset stringmap_merge(nextset, row, pred); - - // XXX debug - if(A == g->start) { - printf("predict("); - pprint_sequence(stdout, g, rhs); - printf(" ) = "); - h_pprint_stringset(stdout, g, pred, 0); - } } } // XXX debug - if(A == g->start) { - printf("row("); - pprint_symbol(stdout, g, A); - printf(") = "); - h_pprint_stringset(stdout, g, row, 0); + printf("row("); + h_pprint_symbol(stdout, g, A); + printf(") = "); + h_pprint_stringset(stdout, row, 0); + if(h_stringmap_get(row, (uint8_t *)"a", 1, false)) { + printf(" a -> "); + h_pprint_sequence(stdout, g, h_stringmap_get(row, (uint8_t *)"a", 1, false)); + printf("\n"); } // switch to the updated workset - h_hashtable_free(workset); + h_hashset_free(workset); workset = nextset; // if the workset is empty, row is without conflict; we're done @@ -473,9 +457,9 @@ int test_llk(void) */ HParser *X = h_optional(h_ch('x')); - HParser *Y = h_sequence(h_ch('y'), NULL); - HParser *A = h_sequence(X, Y, h_ch('a'), NULL); - HParser *B = h_sequence(Y, h_ch('b'), NULL); + //HParser *Y = h_epsilon_p(); //h_sequence(h_ch('y'), NULL); + HParser *A = h_sequence(X, h_ch('a'), NULL); + HParser *B = h_sequence(h_ch('b'), NULL); HParser *p = h_choice(A, B, NULL); HCFGrammar *g = h_cfgrammar(&system_allocator, p); @@ -489,16 +473,16 @@ int test_llk(void) printf("derive epsilon: "); h_pprint_symbolset(stdout, g, g->geneps, 0); printf("first(A) = "); - h_pprint_stringset(stdout, g, h_first(3, g, g->start), 0); + h_pprint_stringset(stdout, h_first(3, g, g->start), 0); //printf("follow(C) = "); - //h_pprint_stringset(stdout, g, h_follow(3, g, h_desugar(&system_allocator, c)), 0); + //h_pprint_stringset(stdout, h_follow(3, g, h_desugar(&system_allocator, c)), 0); - if(h_compile(p, PB_LLk, NULL)) { + if(h_compile(p, PB_LLk, (void *)2)) { fprintf(stderr, "does not compile\n"); return 2; } - HParseResult *res = h_parse(p, (uint8_t *)"xa", 2); + HParseResult *res = h_parse(p, (uint8_t *)"ab", 2); if(res) h_pprint(stdout, res->ast, 0, 2); else diff --git a/src/cfgrammar.c b/src/cfgrammar.c index 1721122e8a3a34496f09949800ca85ff024ef101..d774dd0938d2168b640089c094e17614e40d9380 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -437,7 +437,23 @@ static bool any_string_shorter(size_t k, const HCFStringMap *m) return false; } -const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x); +// helper for h_predict +static void remove_all_shorter(size_t k, HCFStringMap *m) +{ + if(k==0) return; + m->epsilon_branch = NULL; + if(k==1) return; + + // iterate over m->char_branches + const HHashTable *ht = m->char_branches; + for(size_t i=0; i < ht->capacity; i++) { + for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { + if(hte->key == NULL) + continue; + remove_all_shorter(k-1, hte->value); // recursion into subtree + } + } +} // h_follow adapted to the signature of StringSetFun static inline const HCFStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s) @@ -507,6 +523,23 @@ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x) return ret; } +HCFStringMap *h_predict(size_t k, HCFGrammar *g, + const HCFChoice *A, const HCFSequence *rhs) +{ + HCFStringMap *ret = h_stringmap_new(g->arena); + + // predict_k(A -> rhs) = + // { ab | a <- first_k(rhs), b <- follow_k(A), |ab|=k } + + const HCFStringMap *first_rhs = h_first_seq(k, g, rhs->items); + stringset_extend(g, ret, k, first_rhs, h_follow_, (HCFChoice **)&A); + + // make sure there are only strings of length _exactly_ k + remove_all_shorter(k, ret); + + return ret; +} + // add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret static void stringset_extend(HCFGrammar *g, HCFStringMap *ret, size_t k, const HCFStringMap *as, @@ -624,7 +657,7 @@ static HCFChoice **pprint_string(FILE *f, HCFChoice **x) return x; } -void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x) +void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x) { switch(x->type) { case HCF_CHAR: @@ -643,32 +676,37 @@ void pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x) } } -void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq) +void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq) { HCFChoice **x = seq->items; if(*x == NULL) { // the empty sequence - fputs(" \"\"", f); + fputs("\"\"", f); } else { while(*x) { - fputc(' ', f); // separator + if(x != seq->items) fputc(' ', f); // internal separator if((*x)->type == HCF_CHAR) { // condense character strings x = pprint_string(f, x); } else { - pprint_symbol(f, g, *x); + h_pprint_symbol(f, g, *x); x++; } } } +} +// adds some separators expected below +static void pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq) +{ + fputc(' ', f); + h_pprint_sequence(f, g, seq); fputc('\n', f); } -static -void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt, - int indent, int len) +static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt, + int indent, int len) { int i; int column = indent + len; @@ -738,7 +776,7 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in a = hte->key; // production's left-hand symbol - pprint_symbol(file, g, a); + h_pprint_symbol(file, g, a); } } @@ -801,7 +839,7 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, return first; } -void h_pprint_stringset(FILE *file, const HCFGrammar *g, const HCFStringMap *set, int indent) +void h_pprint_stringset(FILE *file, const HCFStringMap *set, int indent) { int j; for(j=0; j<indent; j++) fputc(' ', file); diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 8dc4449ac91d2f98a3a4fbabb38a8b18e9e446bd..d2270ff08dbe296cf9d5d6d0152ccd307e77a019 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -50,7 +50,7 @@ void *h_stringmap_get(const HCFStringMap *m, const uint8_t *str, size_t n, bool bool h_stringmap_present(const HCFStringMap *m, const uint8_t *str, size_t n, bool end); bool h_stringmap_present_epsilon(const HCFStringMap *m); -static inline void *h_stringmap_get_char(const HCFStringMap *m, const uint8_t c) +static inline HCFStringMap *h_stringmap_get_char(const HCFStringMap *m, const uint8_t c) { return h_hashtable_get(m->char_branches, (void *)char_key(c)); } @@ -80,8 +80,16 @@ const HCFStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s); /* Compute follow_k set of symbol x. Memoized. */ const HCFStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x); +/* Compute the predict_k set of production "A -> rhs". + * Always returns a newly-allocated HCFStringMap. + */ +HCFStringMap *h_predict(size_t k, HCFGrammar *g, + const HCFChoice *A, const HCFSequence *rhs); + /* Pretty-printers for grammars and associated data. */ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent); +void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq); +void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x); void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent); -void h_pprint_stringset(FILE *file, const HCFGrammar *g, const HCFStringMap *set, int indent); +void h_pprint_stringset(FILE *file, const HCFStringMap *set, int indent); diff --git a/src/parsers/optional.c b/src/parsers/optional.c index 6cb5331adb3cfd0a36390528c42cfda82ca3c74a..87ba541b91310bae7bd20b30fe2b9387501c0045 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -25,7 +25,6 @@ static bool opt_isValidCF(void *env) { static HParsedToken* reshape_optional(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); - assert(p->ast->seq->used > 0); HParsedToken *res = p->ast->seq->elements[0]; if(res)