diff --git a/src/cfgrammar.c b/src/cfgrammar.c index 77e7ecad7ea1a70597a4c7c70ee21d9184a6c672..aea45a452987a5d5e7728146a84253ad0d23f416 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -42,6 +42,7 @@ void h_cfgrammar_free(HCFGrammar *g) // helpers static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol); static void collect_geneps(HCFGrammar *grammar); +static void eliminate_dead_rules(HCFGrammar *g); HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) @@ -83,6 +84,9 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared) g->start = desugared; } + // simplifications + eliminate_dead_rules(g); + // determine which nonterminals generate epsilon collect_geneps(g); @@ -232,6 +236,76 @@ static void collect_geneps(HCFGrammar *g) } while(g->geneps->used != prevused); } +static bool mentions_symbol(HCFChoice **s, const HCFChoice *x) +{ + for(; *s; s++) { + if (*s == x) + return true; + } + return false; +} + +static void remove_productions_with(HCFGrammar *g, const HCFChoice *x) +{ + HHashTableEntry *hte; + const HCFChoice *symbol; + size_t i; + + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) + continue; + symbol = hte->key; + assert(symbol->type == HCF_CHOICE); + + HCFSequence **p, **q; + for(p = symbol->seq; *p != NULL; ) { + if (mentions_symbol((*p)->items, x)) { + // remove production p + for(q=p; *(q+1) != NULL; q++); // q = last production + *p = *q; // move q over p + *q = NULL; // delete old q + } else { + p++; + } + } + } + } +} + +static void eliminate_dead_rules(HCFGrammar *g) +{ + HHashTableEntry *hte; + const HCFChoice *symbol; + size_t i; + bool found; + + do { + found = false; + for(i=0; !found && i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; !found && hte; hte = hte->next) { + if (hte->key == NULL) + continue; + symbol = hte->key; + assert(symbol->type == HCF_CHOICE); + + // this NT is dead if it has no productions + if (*symbol->seq == NULL) + found = true; + } + } + if (found) { + h_hashtable_del(g->nts, symbol); + remove_productions_with(g, symbol); + } + } while(found); // until nothing left to remove + + // rebuild g->nts. there may now be symbols that no longer appear in any + // productions. we also might have removed g->start. + g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr); + collect_nts(g, g->start); +} + HStringMap *h_stringmap_new(HArena *a) { diff --git a/src/t_regression.c b/src/t_regression.c index 7a04bc6e7b89646edc172c50b4078cbd162431d1..b276f320f6dc83118d049dff3293640c52b64552 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -384,6 +384,30 @@ static void test_issue91() { g_check_cmp_int(r, ==, -2); } +static void test_issue83() { + HParser *p = h_sequence(h_sequence(NULL, NULL), h_nothing_p(), NULL); + /* + * A -> B + * B -> C D + * C -> "" + * D -x + * + * (S) -> 0B1 + * 0B1 -> 0C2 2D3 + * 0C2 -> "" (*) h_follow() + * 2D3 -x + */ + + /* + * similar to issue 91, this would cause the same assertion failure, but for + * a different reason. the follow set of 0C2 above is equal to the first set + * of 2D3, but 2D3 is an empty choice. The first set of an empty choice + * is legitimately empty. the asserting in h_lalr_compile() missed this case. + */ + int r = h_compile(p, PB_LALR, NULL); + g_check_cmp_int(r, ==, 0); +} + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); @@ -397,4 +421,5 @@ void register_regression_tests(void) { g_test_add_func("/core/regression/flatten_null", test_flatten_null); //XXX g_test_add_func("/core/regression/ast_length_index", test_ast_length_index); g_test_add_func("/core/regression/issue91", test_issue91); + g_test_add_func("/core/regression/issue83", test_issue83); }