diff --git a/src/backends/glr.c b/src/backends/glr.c index 7a5f8f512bc54bf6cef8ef299b11a9691c398ec8..864ecaac5601f8ea87fa67643b416e20c52128bc 100644 --- a/src/backends/glr.c +++ b/src/backends/glr.c @@ -119,11 +119,6 @@ HParserBackendVTable h__glr_backend_vtable = { // XXX TODO -// - split tables into -// - one mapping input bytes to actions (shift or reduce or conflict) -// - one mapping reduced-to lhs nonterminals to shift states -// - can there still be conflicts here? -// - use HStringMap to represent lookahead sets and the "piggyback" table // - implement engine merging // - triggered when two enter the same state // - old stacks (/engines?) saved diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 39c4afd80c5cb29cb062b56b32d66d28aadfed3b..242988eeed435fb77742ed92e9acbb91c4a67e66 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -13,9 +13,23 @@ static inline size_t seqsize(void *p_) return n+1; } +static HLRAction * +lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol) +{ + switch(symbol->type) { + case HCF_END: + return table->tmap[state]->end_branch; + case HCF_CHAR: + return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false); + default: + // nonterminal case + return h_hashtable_get(table->ntmap[state], symbol); + } +} + static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A) { - HLRAction *action = h_hashtable_get(table->rows[x], A); + HLRAction *action = lrtable_lookup(table, x, A); assert(action != NULL); assert(action->type == HLR_SHIFT); return action->nextstate; @@ -130,21 +144,48 @@ static inline bool has_conflicts(HLRTable *table) return !h_slist_empty(table->inadeq); } -// place a new terminal entry in tbl; records conflicts in tbl->inadeq +// for each lookahead symbol (fs), put action into tmap // returns 0 on success, -1 on conflict // ignores forall entries -static int terminal_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action) +static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action) { - HLRAction *prev = h_hashtable_get(tbl->rows[state], x); - if(prev && prev != action) { - // conflict - action = h_lr_conflict(tbl->arena, prev, action); - h_hashtable_put(tbl->rows[state], x, action); - return -1; - } else { - h_hashtable_put(tbl->rows[state], x, action); - return 0; + int ret = 0; + + if(fs->epsilon_branch) { + HLRAction *prev = tmap->epsilon_branch; + if(prev && prev != action) { + // conflict + tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action); + ret = -1; + } else { + tmap->epsilon_branch = action; + } + } + + if(fs->end_branch) { + HLRAction *prev = tmap->end_branch; + if(prev && prev != action) { + // conflict + tmap->end_branch = h_lr_conflict(tmap->arena, prev, action); + ret = -1; + } else { + tmap->end_branch = action; + } } + + H_FOREACH(fs->char_branches, void *key, HStringMap *fs_) + HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key); + + if(!tmap_) { + tmap_ = h_stringmap_new(tmap->arena); + h_hashtable_put(tmap->char_branches, key, tmap_); + } + + if(terminals_put(tmap_, fs_, action) < 0) + ret = -1; + H_END_FOREACH + + return ret; } // check whether a sequence of enhanced-grammar symbols (p) matches the given @@ -254,23 +295,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) assert(!h_stringmap_empty(fs)); // for each lookahead symbol, put action into table cell - if(fs->end_branch) { - HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice)); - terminal->type = HCF_END; - if(terminal_put(table, state, terminal, action) < 0) - inadeq = true; - } - H_FOREACH(fs->char_branches, void *key, HStringMap *m) - if(!m->epsilon_branch) - continue; - - HCFChoice *terminal = h_arena_malloc(arena, sizeof(HCFChoice)); - terminal->type = HCF_CHAR; - terminal->chr = key_char((HCharKey)key); - - if(terminal_put(table, state, terminal, action) < 0) - inadeq = true; - H_END_FOREACH // lookahead character + if(terminals_put(table->tmap[state], fs, action) < 0) + inadeq = true; } H_END_FOREACH // enhanced production H_END_FOREACH // reducible item @@ -306,6 +332,8 @@ HParserBackendVTable h__lalr_backend_vtable = { // dummy! int test_lalr(void) { + HAllocator *mm__ = &system_allocator; + /* E -> E '-' T | T @@ -321,7 +349,7 @@ int test_lalr(void) HParser *p = E; printf("\n==== G R A M M A R ====\n"); - HCFGrammar *g = h_cfgrammar(&system_allocator, p); + HCFGrammar *g = h_cfgrammar_(mm__, augment(mm__, p)); if(g == NULL) { fprintf(stderr, "h_cfgrammar failed\n"); return 1; diff --git a/src/backends/lr.c b/src/backends/lr.c index cdd2a35ab3173241f91b0eae8028d2e09221056a..66a76b7ca92162a3a6a7bdfc23f593c4fdc4423a 100644 --- a/src/backends/lr.c +++ b/src/backends/lr.c @@ -1,4 +1,5 @@ #include <assert.h> +#include <ctype.h> #include "../parsers/parser_internal.h" #include "lr.h" @@ -118,14 +119,16 @@ HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows) HLRTable *ret = h_new(HLRTable, 1); ret->nrows = nrows; - ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *)); + ret->ntmap = h_arena_malloc(arena, nrows * sizeof(HHashTable *)); + ret->tmap = h_arena_malloc(arena, nrows * sizeof(HStringMap *)); ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *)); ret->inadeq = h_slist_new(arena); ret->arena = arena; ret->mm__ = mm__; for(size_t i=0; i<nrows; i++) { - ret->rows[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol); + ret->ntmap[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol); + ret->tmap[i] = h_stringmap_new(arena); ret->forall[i] = NULL; } @@ -186,6 +189,12 @@ HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new) return action; } +bool h_lrtable_row_empty(const HLRTable *table, size_t i) +{ + return (h_hashtable_empty(table->ntmap[i]) + && h_stringmap_empty(table->tmap[i])); +} + /* LR driver */ @@ -214,10 +223,14 @@ terminal_lookup(const HLREngine *engine, const HCFChoice *symbol) assert(state < table->nrows); if(table->forall[state]) { - assert(h_hashtable_empty(table->rows[state])); // that would be a conflict + assert(h_lrtable_row_empty(table, state)); // that would be a conflict return table->forall[state]; } else { - return h_hashtable_get(table->rows[state], symbol); + // XXX use the lookahead stream directly here (cf. llk) + if(symbol->type == HCF_END) + return table->tmap[state]->end_branch; + else + return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false); } } @@ -228,12 +241,9 @@ nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol) size_t state = engine->state; assert(state < table->nrows); - if(table->forall[state]) { - assert(h_hashtable_empty(table->rows[state])); // that would be a conflict - return table->forall[state]; - } else { - return h_hashtable_get(table->rows[state], symbol); - } + assert(!table->forall[state]); // contains only reduce entries + // we are only looking for shifts + return h_hashtable_get(table->ntmap[state], symbol); } const HLRAction *h_lrengine_action(const HLREngine *engine) @@ -500,6 +510,19 @@ void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action) } } +static void valprint_lraction(FILE *file, void *env, void *val) +{ + const HLRAction *action = val; + const HCFGrammar *grammar = env; + pprint_lraction(file, grammar, action); +} + +static void pprint_lrtable_terminals(FILE *file, const HCFGrammar *g, + const HStringMap *map) +{ + h_pprint_stringmap(file, ' ', valprint_lraction, (void *)g, map); +} + void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, unsigned int indent) { @@ -507,18 +530,19 @@ void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, for(unsigned int j=0; j<indent; j++) fputc(' ', f); fprintf(f, "%4lu:", i); if(table->forall[i]) { - fputs(" - ", f); + fputc(' ', f); pprint_lraction(f, g, table->forall[i]); - fputs(" -", f); - if(!h_hashtable_empty(table->rows[i])) + if(!h_lrtable_row_empty(table, i)) fputs(" !!", f); } - H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action) + H_FOREACH(table->ntmap[i], HCFChoice *symbol, HLRAction *action) fputc(' ', f); // separator h_pprint_symbol(f, g, symbol); fputc(':', f); pprint_lraction(f, g, action); H_END_FOREACH + fputc(' ', f); // separator + pprint_lrtable_terminals(f, g, table->tmap[i]); fputc('\n', f); } diff --git a/src/backends/lr.h b/src/backends/lr.h index ee0c1f3ee4e9895df63d9a3bb1e51e65ef819d0b..ca8418e8bfdefa568d748d12259a4be2520491c6 100644 --- a/src/backends/lr.h +++ b/src/backends/lr.h @@ -48,8 +48,9 @@ typedef struct HLRAction_ { } HLRAction; typedef struct HLRTable_ { - size_t nrows; - HHashTable **rows; // map symbols to HLRActions + size_t nrows; // dimension of the pointer arrays below + HHashTable **ntmap; // map nonterminal symbols to HLRActions, per row + HStringMap **tmap; // map lookahead strings to HLRActions, per row HLRAction **forall; // shortcut to set an action for an entire row HCFChoice *start; // start symbol HSlist *inadeq; // indices of any inadequate states @@ -110,6 +111,7 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, HLRAction *h_reduce_action(HArena *arena, const HLRItem *item); HLRAction *h_shift_action(HArena *arena, size_t nextstate); HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new); +bool h_lrtable_row_empty(const HLRTable *table, size_t i); bool h_eq_symbol(const void *p, const void *q); bool h_eq_lr_itemset(const void *p, const void *q); diff --git a/src/backends/lr0.c b/src/backends/lr0.c index 1bd63e54d483179dc675989bdea93170b79478ab..5add53a212b2000e54fb6e8b2d50fae41514c535 100644 --- a/src/backends/lr0.c +++ b/src/backends/lr0.c @@ -166,7 +166,18 @@ void put_shift(HLRTable *table, size_t state, const HCFChoice *symbol, size_t nextstate) { HLRAction *action = h_shift_action(table->arena, nextstate); - h_hashtable_put(table->rows[state], symbol, action); + + switch(symbol->type) { + case HCF_END: + h_stringmap_put_end(table->tmap[state], action); + break; + case HCF_CHAR: + h_stringmap_put_char(table->tmap[state], symbol->chr, action); + break; + default: + // nonterminal case + h_hashtable_put(table->ntmap[state], symbol, action); + } } HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa) @@ -210,7 +221,8 @@ HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa) // check for shift/reduce conflict with other entries // NOTE: these are not recorded as HLR_CONFLICTs at this point - if(!h_hashtable_empty(table->rows[i])) + + if(!h_lrtable_row_empty(table, i)) inadeq = true; } H_END_FOREACH diff --git a/src/cfgrammar.c b/src/cfgrammar.c index 199ef5f1295b96774d1cbd6b3d1499a8f739b9b1..b01c44c1cf8c25430faacc86a8a776a03817f350 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -813,27 +813,43 @@ void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, in #define BUFSIZE 512 static bool -pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, - const HStringMap *set) +pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, + void (*valprint)(FILE *f, void *env, void *val), void *env, + const HStringMap *map) { assert(n < BUFSIZE-4); - if(set->epsilon_branch) { - if(!first) fputc(',', file); first=false; - if(n==0) - fputs("''", file); - else + if(map->epsilon_branch) { + if(!first) fputc(sep, file); first=false; + if(n==0) { + fputs("\"\"", file); + } else { + fputs("\"", file); fwrite(prefix, 1, n, file); + fputs("\"", file); + } + + if(valprint) { + fputc(':', file); + valprint(file, env, map->epsilon_branch); + } } - if(set->end_branch) { - if(!first) fputc(',', file); first=false; + if(map->end_branch) { + if(!first) fputs(",\"", file); first=false; + if(n>0) fputs("\"\"", file); fwrite(prefix, 1, n, file); - fputc('$', file); + if(n>0) fputs("\"\"", file); + fputs("$", file); + + if(valprint) { + fputc(':', file); + valprint(file, env, map->end_branch); + } } - // iterate over set->char_branches - HHashTable *ht = set->char_branches; + // iterate over map->char_branches + HHashTable *ht = map->char_branches; size_t i; HHashTableEntry *hte; for(i=0; i < ht->capacity; i++) { @@ -859,20 +875,28 @@ pprint_stringset_elems(FILE *file, bool first, char *prefix, size_t n, n_ += sprintf(prefix+n_, "\\x%.2X", c); } - first = pprint_stringset_elems(file, first, prefix, n_, ends); + first = pprint_stringmap_elems(file, first, prefix, n_, + sep, valprint, env, ends); } } return first; } +void h_pprint_stringmap(FILE *file, char sep, + void (*valprint)(FILE *f, void *env, void *val), void *env, + const HStringMap *map) +{ + char buf[BUFSIZE]; + pprint_stringmap_elems(file, true, buf, 0, sep, valprint, env, map); +} + void h_pprint_stringset(FILE *file, const HStringMap *set, int indent) { int j; for(j=0; j<indent; j++) fputc(' ', file); - char buf[BUFSIZE]; fputc('{', file); - pprint_stringset_elems(file, true, buf, 0, set); + h_pprint_stringmap(file, ',', NULL, NULL, set); fputs("}\n", file); } diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 57f6f68bf55ed574edbc6596f4c1321b24f39b58..1f52bdd34ccaef9612e303ad023ac8a0fe4f9a5b 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -97,4 +97,7 @@ void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq); void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x); void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent); void h_pprint_stringset(FILE *file, const HStringMap *set, int indent); +void h_pprint_stringmap(FILE *file, char sep, + void (*valprint)(FILE *f, void *env, void *val), void *env, + const HStringMap *map); void h_pprint_char(FILE *file, char c);