diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 14f64cd1a23cf2276a1377e0d1b78c3a24125ed8..272f00d6c598e59bdf253363c8f4440e619e47e3 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -52,7 +52,7 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg, if (xAy->type != HCF_CHOICE) { return; } - // XXX CHARSET? + // NB: nothing to do on quasi-terminal CHARSET which carries no list of rhs's HArena *arena = eg->arena; @@ -286,14 +286,28 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs); assert(lhss != NULL); H_FOREACH_KEY(lhss, HCFChoice *lhs) - assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET? - - for(HCFSequence **p=lhs->seq; *p; p++) { - HCFChoice **rhs = (*p)->items; - if(!match_production(eg, rhs, item->rhs, state)) { - continue; - } - + assert(lhs->type == HCF_CHOICE || lhs->type == HCF_CHARSET); + + bool match = false; + if(lhs->type == HCF_CHOICE) { + for(HCFSequence **p=lhs->seq; *p; p++) { + HCFChoice **rhs = (*p)->items; + if(match_production(eg, rhs, item->rhs, state)) { + match = true; + break; + } + } + } else { // HCF_CHARSET + assert(item->rhs[0] != NULL); + assert(item->rhs[1] == NULL); + assert(item->rhs[0]->type == HCF_CHAR); + HLRTransition *t = h_hashtable_get(eg->smap, lhs); + assert(t != NULL); + match = (t->to == state + && charset_isset(lhs->charset, item->rhs[0]->chr)); + } + + if(match) { // the left-hand symbol's follow set is this production's // contribution to the lookahead const HStringMap *fs = h_follow(1, eg->grammar, lhs); @@ -304,7 +318,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) // for each lookahead symbol, put action into table cell if(terminals_put(table->tmap[state], fs, action) < 0) inadeq = true; - } H_END_FOREACH // enhanced production + } + H_END_FOREACH // enhanced production H_END_FOREACH // reducible item if(inadeq) { diff --git a/src/cfgrammar.c b/src/cfgrammar.c index a8761b8d537ec236f7a4876e1ad86a30742df988..6cd87ee574d2819ec7a54f6b0d140add1c52209e 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -672,7 +672,7 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, } -void h_pprint_char(FILE *f, char c) +void h_pprint_char(FILE *f, uint8_t c) { switch(c) { case '"': fputs("\\\"", f); break; @@ -685,12 +685,12 @@ void h_pprint_char(FILE *f, char c) if (isprint((int)c)) { fputc(c, f); } else { - fprintf(f, "\\x%.2X", c); + fprintf(f, "\\x%.2X", (unsigned int)c); } } } -static void pprint_charset_char(FILE *f, char c) +static void pprint_charset_char(FILE *f, uint8_t c) { switch(c) { case '"': fputc(c, f); break; @@ -896,8 +896,8 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, if (map->epsilon_branch) { if (!first) { fputc(sep, file); - first=false; } + first=false; if (n==0) { fputs("\"\"", file); } else { @@ -915,8 +915,8 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, if (map->end_branch) { if (!first) { fputs(",\"", file); - first=false; } + first=false; if (n>0) { fputs("\"\"", file); } diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 9cefc62e83f07048dc2a24f0cda1bde28ca72066..0d31ef57116fadad6700241cf805bf325f550c69 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -102,4 +102,4 @@ void h_pprint_stringset(FILE *file, const HStringMap *set, int indent); void h_pprint_stringmap(FILE *file, char sep, void (*valprint)(FILE *f, void *env, void *val), void *env, const HStringMap *map); -void h_pprint_char(FILE *file, char c); +void h_pprint_char(FILE *file, uint8_t c); diff --git a/src/parsers/many.c b/src/parsers/many.c index 51d733fcf87e3191e6f413a9513ac7900d29d8f2..cae2b0eade03450cae13f48e8f53c37db4237721 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -59,6 +59,32 @@ static bool many_isValidCF(void *env) { repeat->sep->vtable->isValidCF(repeat->sep->env))); } +// turn (_ x (_ y (_ z ()))) into (x y z) where '_' are optional +static HParsedToken *reshape_many(const HParseResult *p, void *user) +{ + HCountedArray *seq = h_carray_new(p->arena); + + const HParsedToken *tok = p->ast; + while(tok) { + assert(tok->token_type == TT_SEQUENCE); + if(tok->seq->used > 0) { + size_t n = tok->seq->used; + assert(n <= 3); + h_carray_append(seq, tok->seq->elements[n-2]); + tok = tok->seq->elements[n-1]; + } else { + tok = NULL; + } + } + + HParsedToken *res = a_new_(p->arena, HParsedToken, 1); + res->token_type = TT_SEQUENCE; + res->seq = seq; + res->index = p->ast->index; + res->bit_offset = p->ast->bit_offset; + return res; +} + static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { // TODO: refactor this. HRepeat *repeat = (HRepeat*)env; @@ -93,7 +119,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { HCFS_BEGIN_CHOICE() { // Mar HCFS_BEGIN_SEQ() { if (repeat->sep != NULL) { - HCFS_DESUGAR(h_ignore__m(mm__, repeat->sep)); + HCFS_DESUGAR(repeat->sep); } //stk__->last_completed->reshape = h_act_ignore; // BUG: This modifies a memoized entry. HCFS_DESUGAR(repeat->p); @@ -108,7 +134,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { //HCFS_DESUGAR(h_ignore__m(mm__, h_epsilon_p())); } HCFS_END_SEQ(); } - HCFS_THIS_CHOICE->reshape = h_act_flatten; + HCFS_THIS_CHOICE->reshape = reshape_many; } HCFS_END_CHOICE(); } diff --git a/src/t_regression.c b/src/t_regression.c index 1eeaf8c2bed340fd103c696b1e386d529647d1ed..4097fb6cf1a64392438e9dddb4ac17427e5e0065 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -118,9 +118,33 @@ static void test_llk_zero_end(void) { g_check_parse_failed(aze, be, "a", 1); } +static void test_lalr_charset_lhs(void) { + HParserBackend be = PB_LALR; + + HParser *p = h_choice(h_ch('A'), h_uint8(), NULL); + + // the above would fail to compile because of an unhandled case in trying + // to resolve a conflict where an item's left-hand-side was an HCF_CHARSET. + + g_check_parse_match(p, be, "A",1, "u0x41"); + g_check_parse_match(p, be, "B",1, "u0x42"); +} + +static void test_cfg_many_seq(void) { + HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL)); + + g_check_parse_match(p, PB_LLk, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + g_check_parse_match(p, PB_GLR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + // these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty + // reshape on h_many. +} + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); g_test_add_func("/core/regression/read_bits_48", test_read_bits_48); g_test_add_func("/core/regression/llk_zero_end", test_llk_zero_end); + g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs); + g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq); }