From 4b5a93666723380e26e58ea23fad5c244b6ee66a Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Wed, 12 Jun 2013 16:38:50 +0200 Subject: [PATCH] handle charsets --- src/backends/lalr.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 8178f2d2..66fe42c7 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -229,15 +229,33 @@ static HHashSet *closure(HCFGrammar *g, const HHashSet *items) HCFChoice *sym = item->rhs[item->mark]; // symbol after mark // if there is a non-terminal after the mark, follow it - // XXX: do we have to count HCF_CHARSET as nonterminal? - if(sym != NULL && sym->type == HCF_CHOICE) { + // NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here + if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) { // add items corresponding to the productions of sym - for(HCFSequence **p=sym->seq; *p; p++) { - HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0); - if(!h_hashset_present(ret, it)) { - h_hashset_put(ret, it); - h_slist_push(work, it); + if(sym->type == HCF_CHOICE) { + for(HCFSequence **p=sym->seq; *p; p++) { + HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0); + if(!h_hashset_present(ret, it)) { + h_hashset_put(ret, it); + h_slist_push(work, it); + } + } + } else { // HCF_CHARSET + for(unsigned int i=0; i<256; i++) { + if(charset_isset(sym->charset, i)) { + HCFChoice **rhs = h_arena_malloc(arena, 2 * sizeof(HCFChoice *)); + rhs[0] = h_arena_malloc(arena, sizeof(HCFChoice)); + rhs[0]->type = HCF_CHAR; + rhs[0]->chr = i; + rhs[1] = NULL; + HLRItem *it = h_lritem_new(arena, sym, rhs, 0); + h_hashset_put(ret, it); + // single-character item needs no further work + } } + // if sym is a non-terminal, we need a reshape on it + // this seems as good a place as any to set it + sym->reshape = h_act_first; } // if sym derives epsilon, also advance over it @@ -615,8 +633,6 @@ h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol) } } -// XXX also, what about charsets!? - HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) { HLRTable *table = parser->backend_data; @@ -884,7 +900,7 @@ int test_lalr(void) */ // XXX make LALR example - HParser *X = h_optional(h_ch('x')); + HParser *X = h_optional(h_in((uint8_t *)"rst", 3)); HParser *Y = h_sequence(h_ch('y'), h_ch('y'), NULL); HParser *A = h_sequence(X, Y, h_ch('a'), NULL); HParser *B = h_sequence(Y, h_ch('b'), NULL); @@ -921,7 +937,7 @@ int test_lalr(void) h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0); printf("\n==== P A R S E R E S U L T ====\n"); - HParseResult *res = h_parse(p, (uint8_t *)"xyya", 4); + HParseResult *res = h_parse(p, (uint8_t *)"syya", 4); if(res) h_pprint(stdout, res->ast, 0, 2); else -- GitLab