diff --git a/src/Makefile b/src/Makefile index 1a2bff3530ed897f50814fdfae949ff7d8ef635b..380436ae42f951dedccb8b5c7e9757fd025b5476 100644 --- a/src/Makefile +++ b/src/Makefile @@ -43,6 +43,8 @@ HAMMER_PARTS := \ benchmark.o \ cfgrammar.o \ glue.o \ + backends/lr.o \ + backends/lr0.o \ $(PARSERS:%=parsers/%.o) \ $(BACKENDS:%=backends/%.o) diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 79e03c75a4bf9e16b4daae83e6dac93a2469344b..fa67e5a7d54870e5a98543c34b2e303a028f682d 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -1,437 +1,6 @@ #include <assert.h> -#include "../internal.h" -#include "../cfgrammar.h" -#include "../parsers/parser_internal.h" #include "contextfree.h" - - - -/* Data structures */ - -typedef HHashSet HLRState; // states are sets of LRItems - -typedef struct HLRDFA_ { - size_t nstates; - const HLRState **states; // array of size nstates - HSlist *transitions; -} HLRDFA; - -typedef struct HLRTransition_ { - size_t from; // index into 'states' array - const HCFChoice *symbol; - size_t to; // index into 'states' array -} HLRTransition; - -typedef struct HLRItem_ { - HCFChoice *lhs; - HCFChoice **rhs; // NULL-terminated - size_t len; // number of elements in rhs - size_t mark; -} HLRItem; - -typedef struct HLRAction_ { - enum {HLR_SHIFT, HLR_REDUCE} type; - union { - size_t nextstate; // used with SHIFT - struct { - HCFChoice *lhs; // symbol carrying semantic actions etc. - size_t length; // # of symbols in rhs -#ifndef NDEBUG - HCFChoice **rhs; // NB: the rhs symbols are not needed for the parse -#endif - } production; // used with REDUCE - }; -} HLRAction; - -typedef struct HLRTable_ { - size_t nrows; - HHashTable **rows; // map symbols to HLRActions - HLRAction **forall; // shortcut to set an action for an entire row - HCFChoice *start; // start symbol - HSlist *inadeq; // indices of any inadequate states - HArena *arena; - HAllocator *mm__; -} HLRTable; - -typedef struct HLREnhGrammar_ { - HCFGrammar *grammar; // enhanced grammar - HHashTable *tmap; // maps transitions to enhanced-grammar symbols - HHashTable *smap; // maps enhanced-grammar symbols to transitions - HHashTable *corr; // maps symbols to sets of corresponding e. symbols - HArena *arena; -} HLREnhGrammar; - -typedef struct HLREngine_ { - const HLRTable *table; - HSlist *left; // left stack; reductions happen here - HSlist *right; // right stack; input appears here - size_t state; - bool running; - HArena *arena; // will hold the results - HArena *tarena; // tmp, deleted after parse -} HLREngine; - - -// XXX move to internal.h or something -// XXX replace other hashtable iterations with this -#define H_FOREACH_(HT) { \ - const HHashTable *ht__ = HT; \ - for(size_t i__=0; i__ < ht__->capacity; i__++) { \ - for(HHashTableEntry *hte__ = &ht__->contents[i__]; \ - hte__; \ - hte__ = hte__->next) { \ - if(hte__->key == NULL) continue; - -#define H_FOREACH_KEY(HT, KEYVAR) H_FOREACH_(HT) \ - const KEYVAR = hte__->key; - -#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_KEY(HT, KEYVAR) \ - VALVAR = hte__->value; - -#define H_END_FOREACH \ - } \ - } \ - } - -// compare symbols - terminals by value, others by pointer -static bool eq_symbol(const void *p, const void *q) -{ - const HCFChoice *x=p, *y=q; - return (x==y - || (x->type==HCF_END && y->type==HCF_END) - || (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr)); -} - -// hash symbols - terminals by value, others by pointer -static HHashValue hash_symbol(const void *p) -{ - const HCFChoice *x=p; - if(x->type == HCF_END) - return 0; - else if(x->type == HCF_CHAR) - return x->chr * 33; - else - return h_hash_ptr(p); -} - -// compare LALR items by value -static bool eq_lalr_item(const void *p, const void *q) -{ - const HLRItem *a=p, *b=q; - - if(!eq_symbol(a->lhs, b->lhs)) return false; - if(a->mark != b->mark) return false; - if(a->len != b->len) return false; - - for(size_t i=0; i<a->len; i++) - if(!eq_symbol(a->rhs[i], b->rhs[i])) return false; - - return true; -} - -// compare LALR item sets (DFA states) -static inline bool eq_lalr_itemset(const void *p, const void *q) -{ - return h_hashset_equal(p, q); -} - -// hash LALR items -static inline HHashValue hash_lalr_item(const void *p) -{ - const HLRItem *x = p; - HHashValue hash = 0; - - hash += hash_symbol(x->lhs); - for(HCFChoice **p=x->rhs; *p; p++) - hash += hash_symbol(*p); - hash += x->mark; - - return hash; -} - -// hash LALR item sets (DFA states) - hash the elements and sum -static HHashValue hash_lalr_itemset(const void *p) -{ - HHashValue hash = 0; - - H_FOREACH_KEY((const HHashSet *)p, HLRItem *item) - hash += hash_lalr_item(item); - H_END_FOREACH - - return hash; -} - -HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark) -{ - HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem)); - - size_t len = 0; - for(HCFChoice **p=rhs; *p; p++) len++; - assert(mark <= len); - - ret->lhs = lhs; - ret->rhs = rhs; - ret->len = len; - ret->mark = mark; - - return ret; -} - -static inline HLRState *h_lrstate_new(HArena *arena) -{ - return h_hashset_new(arena, eq_lalr_item, hash_lalr_item); -} - -HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows) -{ - HArena *arena = h_new_arena(mm__, 0); // default blocksize - assert(arena != NULL); - - HLRTable *ret = h_new(HLRTable, 1); - ret->nrows = nrows; - ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *)); - ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *)); - ret->inadeq = h_slist_new(arena); - ret->arena = arena; - ret->mm__ = mm__; - - for(size_t i=0; i<nrows; i++) { - ret->rows[i] = h_hashtable_new(arena, eq_symbol, hash_symbol); - ret->forall[i] = NULL; - } - - return ret; -} - -void h_lrtable_free(HLRTable *table) -{ - HAllocator *mm__ = table->mm__; - h_delete_arena(table->arena); - h_free(table); -} - - - -/* Constructing the characteristic automaton (handle recognizer) */ - -static HLRItem *advance_mark(HArena *arena, const HLRItem *item) -{ - assert(item->rhs[item->mark] != NULL); - HLRItem *ret = h_arena_malloc(arena, sizeof(HLRItem)); - *ret = *item; - ret->mark++; - return ret; -} - -static void expand_to_closure(HCFGrammar *g, HHashSet *items) -{ - HAllocator *mm__ = g->mm__; - HArena *arena = g->arena; - HSlist *work = h_slist_new(arena); - - // initialize work list with items - H_FOREACH_KEY(items, HLRItem *item) - h_slist_push(work, (void *)item); - H_END_FOREACH - - while(!h_slist_empty(work)) { - const HLRItem *item = h_slist_pop(work); - HCFChoice *sym = item->rhs[item->mark]; // symbol after mark - - // if there is a non-terminal after the mark, follow it - // NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here - if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) { - // add items corresponding to the productions of sym - if(sym->type == HCF_CHOICE) { - for(HCFSequence **p=sym->seq; *p; p++) { - HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0); - if(!h_hashset_present(items, it)) { - h_hashset_put(items, it); - h_slist_push(work, it); - } - } - } else { // HCF_CHARSET - for(unsigned int i=0; i<256; i++) { - if(charset_isset(sym->charset, i)) { - // XXX allocate these single-character symbols statically somewhere - HCFChoice **rhs = h_new(HCFChoice *, 2); - rhs[0] = h_new(HCFChoice, 1); - rhs[0]->type = HCF_CHAR; - rhs[0]->chr = i; - rhs[1] = NULL; - HLRItem *it = h_lritem_new(arena, sym, rhs, 0); - h_hashset_put(items, it); - // single-character item needs no further work - } - } - // if sym is a non-terminal, we need a reshape on it - // this seems as good a place as any to set it - sym->reshape = h_act_first; - } - } - } -} - -HLRDFA *h_lr0_dfa(HCFGrammar *g) -{ - HArena *arena = g->arena; - - HHashSet *states = h_hashset_new(arena, eq_lalr_itemset, hash_lalr_itemset); - // maps itemsets to assigned array indices - HSlist *transitions = h_slist_new(arena); - - // list of states that need to be processed - // to save lookups, we push two elements per state, the itemset and its - // assigned index. - HSlist *work = h_slist_new(arena); - - // make initial state (kernel) - HLRState *start = h_lrstate_new(arena); - assert(g->start->type == HCF_CHOICE); - for(HCFSequence **p=g->start->seq; *p; p++) - h_hashset_put(start, h_lritem_new(arena, g->start, (*p)->items, 0)); - expand_to_closure(g, start); - h_hashtable_put(states, start, 0); - h_slist_push(work, start); - h_slist_push(work, 0); - - // while work to do (on some state) - // determine edge symbols - // for each edge symbol: - // advance respective items -> destination state (kernel) - // compute closure - // if destination is a new state: - // add it to state set - // add transition to it - // add it to the work list - - while(!h_slist_empty(work)) { - size_t state_idx = (uintptr_t)h_slist_pop(work); - HLRState *state = h_slist_pop(work); - - // maps edge symbols to neighbor states (item sets) of s - HHashTable *neighbors = h_hashtable_new(arena, eq_symbol, hash_symbol); - - // iterate over state (closure) and generate neighboring sets - H_FOREACH_KEY(state, HLRItem *item) - HCFChoice *sym = item->rhs[item->mark]; // symbol after mark - - if(sym != NULL) { // mark was not at the end - // find or create prospective neighbor set - HLRState *neighbor = h_hashtable_get(neighbors, sym); - if(neighbor == NULL) { - neighbor = h_lrstate_new(arena); - h_hashtable_put(neighbors, sym, neighbor); - } - - // ...and add the advanced item to it - h_hashset_put(neighbor, advance_mark(arena, item)); - } - H_END_FOREACH - - // merge expanded neighbor sets into the set of existing states - H_FOREACH(neighbors, HCFChoice *symbol, HLRState *neighbor) - expand_to_closure(g, neighbor); - - // look up existing state, allocate new if not found - size_t neighbor_idx; - if(!h_hashset_present(states, neighbor)) { - neighbor_idx = states->used; - h_hashtable_put(states, neighbor, (void *)(uintptr_t)neighbor_idx); - h_slist_push(work, neighbor); - h_slist_push(work, (void *)(uintptr_t)neighbor_idx); - } else { - neighbor_idx = (uintptr_t)h_hashtable_get(states, neighbor); - } - - // add transition "state --symbol--> neighbor" - HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition)); - t->from = state_idx; - t->to = neighbor_idx; - t->symbol = symbol; - h_slist_push(transitions, t); - H_END_FOREACH - } // end while(work) - - // fill DFA struct - HLRDFA *dfa = h_arena_malloc(arena, sizeof(HLRDFA)); - dfa->nstates = states->used; - dfa->states = h_arena_malloc(arena, dfa->nstates*sizeof(HLRState *)); - H_FOREACH(states, HLRState *state, void *v) - size_t idx = (uintptr_t)v; - dfa->states[idx] = state; - H_END_FOREACH - dfa->transitions = transitions; - - return dfa; -} - - - -/* LR(0) table generation */ - -static HLRAction *shift_action(HArena *arena, size_t nextstate) -{ - HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction)); - action->type = HLR_SHIFT; - action->nextstate = nextstate; - return action; -} - -static HLRAction *reduce_action(HArena *arena, const HLRItem *item) -{ - HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction)); - action->type = HLR_REDUCE; - action->production.lhs = item->lhs; - action->production.length = item->len; -#ifndef NDEBUG - action->production.rhs = item->rhs; -#endif - return action; -} - -HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa) -{ - HAllocator *mm__ = g->mm__; - - HLRTable *table = h_lrtable_new(mm__, dfa->nstates); - HArena *arena = table->arena; - - // remember start symbol - table->start = g->start; - - // add shift entries - for(HSlistNode *x = dfa->transitions->head; x; x = x->next) { - // for each transition x-A->y, add "shift, goto y" to table entry (x,A) - HLRTransition *t = x->elem; - - HLRAction *action = shift_action(arena, t->to); - h_hashtable_put(table->rows[t->from], t->symbol, action); - } - - // add reduce entries, record inadequate states - for(size_t i=0; i<dfa->nstates; i++) { - // find reducible items in state - H_FOREACH_KEY(dfa->states[i], HLRItem *item) - if(item->mark == item->len) { // mark at the end - // check for conflicts - // XXX store more informative stuff in the inadeq records? - if(table->forall[i]) { - // reduce/reduce conflict with a previous item - h_slist_push(table->inadeq, (void *)(uintptr_t)i); - } else if(!h_hashtable_empty(table->rows[i])) { - // shift/reduce conflict with one of the row's entries - h_slist_push(table->inadeq, (void *)(uintptr_t)i); - } - - // set reduce action for the entire row - table->forall[i] = reduce_action(arena, item); - } - H_END_FOREACH - } - - return table; -} +#include "lr.h" @@ -499,19 +68,7 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg, xAy->seq = seq; } -static bool eq_transition(const void *p, const void *q) -{ - const HLRTransition *a=p, *b=q; - return (a->from == b->from && a->to == b->to && eq_symbol(a->symbol, b->symbol)); -} - -static HHashValue hash_transition(const void *p) -{ - const HLRTransition *t = p; - return (hash_symbol(t->symbol) + t->from + t->to); // XXX ? -} - -HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym) +static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym) { HArena *arena = eg->arena; HCFChoice *esym = h_arena_malloc(arena, sizeof(HCFChoice)); @@ -519,13 +76,14 @@ HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym) HHashSet *cs = h_hashtable_get(eg->corr, sym); if(!cs) { - cs = h_hashset_new(arena, eq_symbol, hash_symbol); + cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol); h_hashtable_put(eg->corr, sym, cs); } h_hashset_put(cs, esym); return esym; } + static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRDFA *dfa, const HLRTable *table) { @@ -533,9 +91,9 @@ static HLREnhGrammar *enhance_grammar(const HCFGrammar *g, const HLRDFA *dfa, HArena *arena = g->arena; HLREnhGrammar *eg = h_arena_malloc(arena, sizeof(HLREnhGrammar)); - eg->tmap = h_hashtable_new(arena, eq_transition, hash_transition); + eg->tmap = h_hashtable_new(arena, h_eq_transition, h_hash_transition); eg->smap = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr); - eg->corr = h_hashtable_new(arena, eq_symbol, hash_symbol); + eg->corr = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol); // XXX must use h_eq/hash_ptr for symbols! so enhanced CHARs are different eg->arena = arena; @@ -590,14 +148,14 @@ int h_lrtable_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action) // check whether a sequence of enhanced-grammar symbols (p) matches the given // (original-grammar) production rhs and terminates in the given end state. -bool match_production(HLREnhGrammar *eg, HCFChoice **p, - HCFChoice **rhs, size_t endstate) +static bool match_production(HLREnhGrammar *eg, HCFChoice **p, + HCFChoice **rhs, size_t endstate) { size_t state = endstate; // initialized to end in case of empty rhs for(; *p && *rhs; p++, rhs++) { HLRTransition *t = h_hashtable_get(eg->smap, *p); assert(t != NULL); - if(!eq_symbol(t->symbol, *rhs)) + if(!h_eq_symbol(t->symbol, *rhs)) return false; state = t->to; } @@ -673,7 +231,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) continue; // action to place in the table cells indicated by lookahead - HLRAction *action = reduce_action(arena, item); + HLRAction *action = h_reduce_action(arena, item); // find all LR(0)-enhanced productions matching item HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs); @@ -729,304 +287,6 @@ void h_lalr_free(HParser *parser) -/* LR driver */ - -const HLRAction * -h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol) -{ - assert(state < table->nrows); - if(table->forall[state]) { - assert(h_hashtable_empty(table->rows[state])); // that would be a conflict - return table->forall[state]; - } else { - return h_hashtable_get(table->rows[state], symbol); - } -} - -HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table) -{ - HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); - - engine->table = table; - engine->left = h_slist_new(tarena); - engine->right = h_slist_new(tarena); - engine->state = 0; - engine->running = 1; - engine->arena = arena; - engine->tarena = tarena; - - return engine; -} - -void h_lrengine_step(HLREngine *engine, HInputStream *stream) -{ - // short-hand names - HSlist *left = engine->left; - HSlist *right = engine->right; - HArena *arena = engine->arena; - HArena *tarena = engine->tarena; - - // stack layout: - // on the left stack, we put pairs: (saved state, semantic value) - // on the right stack, we put pairs: (symbol, semantic value) - - // make sure there is input on the right stack - if(h_slist_empty(right)) { - // XXX use statically-allocated terminal symbols - HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); - HParsedToken *v; - - uint8_t c = h_read_bits(stream, 8, false); - - if(stream->overrun) { // end of input - x->type = HCF_END; - v = NULL; - } else { - x->type = HCF_CHAR; - x->chr = c; - v = h_arena_malloc(arena, sizeof(HParsedToken)); - v->token_type = TT_UINT; - v->uint = c; - } - - h_slist_push(right, v); - h_slist_push(right, x); - } - - // peek at input symbol on the right side - HCFChoice *symbol = right->head->elem; - - // table lookup - const HLRAction *action = h_lr_lookup(engine->table, engine->state, symbol); - if(action == NULL) { - // no handle recognizable in input, terminate - engine->running = false; - return; - } - - if(action->type == HLR_SHIFT) { - h_slist_push(left, (void *)(uintptr_t)engine->state); - h_slist_pop(right); // symbol (discard) - h_slist_push(left, h_slist_pop(right)); // semantic value - engine->state = action->nextstate; - } else { - assert(action->type == HLR_REDUCE); - size_t len = action->production.length; - HCFChoice *symbol = action->production.lhs; - - // semantic value of the reduction result - HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken)); - value->token_type = TT_SEQUENCE; - value->seq = h_carray_new_sized(arena, len); - - // pull values off the left stack, rewinding state accordingly - HParsedToken *v = NULL; - for(size_t i=0; i<len; i++) { - v = h_slist_pop(left); - engine->state = (uintptr_t)h_slist_pop(left); - - // collect values in result sequence - value->seq->elements[len-1-i] = v; - value->seq->used++; - } - if(v) { - // result position equals position of left-most symbol - value->index = v->index; - value->bit_offset = v->bit_offset; - } else { - // XXX how to get the position in this case? - } - - // perform token reshape if indicated - if(symbol->reshape) - value = (HParsedToken *)symbol->reshape(make_result(arena, value)); - - // call validation and semantic action, if present - if(symbol->pred && !symbol->pred(make_result(tarena, value))) { - // validation failed -> no parse; terminate - engine->running = false; - return; - } - if(symbol->action) - value = (HParsedToken *)symbol->action(make_result(arena, value)); - - // push result (value, symbol) onto the right stack - h_slist_push(right, value); - h_slist_push(right, symbol); - } -} - -HParseResult *h_lrengine_result(HLREngine *engine) -{ - // parsing was successful iff the start symbol is on top of the right stack - if(h_slist_pop(engine->right) == engine->table->start) { - // next on the right stack is the start symbol's semantic value - assert(!h_slist_empty(engine->right)); - HParsedToken *tok = h_slist_pop(engine->right); - return make_result(engine->arena, tok); - } else { - return NULL; - } -} - -HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) -{ - HLRTable *table = parser->backend_data; - if(!table) - return NULL; - - HArena *arena = h_new_arena(mm__, 0); // will hold the results - HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse - HLREngine *engine = h_lrengine_new(arena, tarena, table); - - // run while the recognizer finds handles in the input - while(engine->running) - h_lrengine_step(engine, stream); - - HParseResult *result = h_lrengine_result(engine); - if(!result) - h_delete_arena(arena); - h_delete_arena(tarena); - return result; -} - - - -/* Pretty-printers */ - -void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item) -{ - h_pprint_symbol(f, g, item->lhs); - fputs(" ->", f); - - HCFChoice **x = item->rhs; - HCFChoice **mark = item->rhs + item->mark; - if(*x == NULL) { - fputc('.', f); - } else { - while(*x) { - if(x == mark) - fputc('.', f); - else - fputc(' ', f); - - if((*x)->type == HCF_CHAR) { - // condense character strings - fputc('"', f); - h_pprint_char(f, (*x)->chr); - for(x++; *x; x++) { - if(x == mark) - break; - if((*x)->type != HCF_CHAR) - break; - h_pprint_char(f, (*x)->chr); - } - fputc('"', f); - } else { - h_pprint_symbol(f, g, *x); - x++; - } - } - if(x == mark) - fputs(".", f); - } -} - -void h_pprint_lrstate(FILE *f, const HCFGrammar *g, - const HLRState *state, unsigned int indent) -{ - bool first = true; - H_FOREACH_KEY(state, HLRItem *item) - if(!first) - for(unsigned int i=0; i<indent; i++) fputc(' ', f); - first = false; - h_pprint_lritem(f, g, item); - fputc('\n', f); - H_END_FOREACH -} - -static void pprint_transition(FILE *f, const HCFGrammar *g, const HLRTransition *t) -{ - fputs("-", f); - h_pprint_symbol(f, g, t->symbol); - fprintf(f, "->%lu", t->to); -} - -void h_pprint_lrdfa(FILE *f, const HCFGrammar *g, - const HLRDFA *dfa, unsigned int indent) -{ - for(size_t i=0; i<dfa->nstates; i++) { - unsigned int indent2 = indent + fprintf(f, "%4lu: ", i); - h_pprint_lrstate(f, g, dfa->states[i], indent2); - for(HSlistNode *x = dfa->transitions->head; x; x = x->next) { - const HLRTransition *t = x->elem; - if(t->from == i) { - for(unsigned int i=0; i<indent2-2; i++) fputc(' ', f); - pprint_transition(f, g, t); - fputc('\n', f); - } - } - } -} - -void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action) -{ - if(action->type == HLR_SHIFT) { - fprintf(f, "s%lu", action->nextstate); - } else { - fputs("r(", f); - h_pprint_symbol(f, g, action->production.lhs); - fputs(" -> ", f); -#ifdef NDEBUG - // if we can't print the production, at least print its length - fprintf(f, "[%lu]", action->production.length); -#else - HCFSequence seq = {action->production.rhs}; - h_pprint_sequence(f, g, &seq); -#endif - fputc(')', f); - } -} - -void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, - unsigned int indent) -{ - for(size_t i=0; i<table->nrows; i++) { - for(unsigned int j=0; j<indent; j++) fputc(' ', f); - fprintf(f, "%4lu:", i); - if(table->forall[i]) { - fputs(" - ", f); - pprint_lraction(f, g, table->forall[i]); - fputs(" -", f); - if(!h_hashtable_empty(table->rows[i])) - fputs(" !!", f); - } - H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action) - fputc(' ', f); // separator - h_pprint_symbol(f, g, symbol); - fputc(':', f); - if(table->forall[i]) { - fputc(action->type == HLR_SHIFT? 's' : 'r', f); - fputc('/', f); - fputc(table->forall[i]->type == HLR_SHIFT? 's' : 'r', f); - } else { - pprint_lraction(f, g, action); - } - H_END_FOREACH - fputc('\n', f); - } - -#if 0 - fputs("inadeq=", f); - for(HSlistNode *x=table->inadeq->head; x; x=x->next) { - fprintf(f, "%lu ", (uintptr_t)x->elem); - } - fputc('\n', f); -#endif -} - - - HParserBackendVTable h__lalr_backend_vtable = { .compile = h_lalr_compile, .parse = h_lr_parse, diff --git a/src/backends/lr.c b/src/backends/lr.c new file mode 100644 index 0000000000000000000000000000000000000000..c3062d5c652a0f76a0fdf18195af7583ae7856f6 --- /dev/null +++ b/src/backends/lr.c @@ -0,0 +1,458 @@ +#include <assert.h> +#include "../parsers/parser_internal.h" +#include "lr.h" + + + +/* Comparison and hashing functions */ + +// compare symbols - terminals by value, others by pointer +bool h_eq_symbol(const void *p, const void *q) +{ + const HCFChoice *x=p, *y=q; + return (x==y + || (x->type==HCF_END && y->type==HCF_END) + || (x->type==HCF_CHAR && y->type==HCF_CHAR && x->chr==y->chr)); +} + +// hash symbols - terminals by value, others by pointer +HHashValue h_hash_symbol(const void *p) +{ + const HCFChoice *x=p; + if(x->type == HCF_END) + return 0; + else if(x->type == HCF_CHAR) + return x->chr * 33; + else + return h_hash_ptr(p); +} + +// compare LR items by value +static bool eq_lr_item(const void *p, const void *q) +{ + const HLRItem *a=p, *b=q; + + if(!h_eq_symbol(a->lhs, b->lhs)) return false; + if(a->mark != b->mark) return false; + if(a->len != b->len) return false; + + for(size_t i=0; i<a->len; i++) + if(!h_eq_symbol(a->rhs[i], b->rhs[i])) return false; + + return true; +} + +// hash LALR items +static inline HHashValue hash_lr_item(const void *p) +{ + const HLRItem *x = p; + HHashValue hash = 0; + + hash += h_hash_symbol(x->lhs); + for(HCFChoice **p=x->rhs; *p; p++) + hash += h_hash_symbol(*p); + hash += x->mark; + + return hash; +} + +// compare item sets (DFA states) +bool h_eq_lr_itemset(const void *p, const void *q) +{ + return h_hashset_equal(p, q); +} + +// hash LR item sets (DFA states) - hash the elements and sum +HHashValue h_hash_lr_itemset(const void *p) +{ + HHashValue hash = 0; + + H_FOREACH_KEY((const HHashSet *)p, HLRItem *item) + hash += hash_lr_item(item); + H_END_FOREACH + + return hash; +} + +bool h_eq_transition(const void *p, const void *q) +{ + const HLRTransition *a=p, *b=q; + return (a->from == b->from && a->to == b->to && h_eq_symbol(a->symbol, b->symbol)); +} + +HHashValue h_hash_transition(const void *p) +{ + const HLRTransition *t = p; + return (h_hash_symbol(t->symbol) + t->from + t->to); // XXX ? +} + + + +/* Constructors */ + +HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark) +{ + HLRItem *ret = h_arena_malloc(a, sizeof(HLRItem)); + + size_t len = 0; + for(HCFChoice **p=rhs; *p; p++) len++; + assert(mark <= len); + + ret->lhs = lhs; + ret->rhs = rhs; + ret->len = len; + ret->mark = mark; + + return ret; +} + +HLRState *h_lrstate_new(HArena *arena) +{ + return h_hashset_new(arena, eq_lr_item, hash_lr_item); +} + +HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows) +{ + HArena *arena = h_new_arena(mm__, 0); // default blocksize + assert(arena != NULL); + + HLRTable *ret = h_new(HLRTable, 1); + ret->nrows = nrows; + ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *)); + ret->forall = h_arena_malloc(arena, nrows * sizeof(HLRAction *)); + ret->inadeq = h_slist_new(arena); + ret->arena = arena; + ret->mm__ = mm__; + + for(size_t i=0; i<nrows; i++) { + ret->rows[i] = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol); + ret->forall[i] = NULL; + } + + return ret; +} + +void h_lrtable_free(HLRTable *table) +{ + HAllocator *mm__ = table->mm__; + h_delete_arena(table->arena); + h_free(table); +} + +HLRAction *h_shift_action(HArena *arena, size_t nextstate) +{ + HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction)); + action->type = HLR_SHIFT; + action->nextstate = nextstate; + return action; +} + +HLRAction *h_reduce_action(HArena *arena, const HLRItem *item) +{ + HLRAction *action = h_arena_malloc(arena, sizeof(HLRAction)); + action->type = HLR_REDUCE; + action->production.lhs = item->lhs; + action->production.length = item->len; +#ifndef NDEBUG + action->production.rhs = item->rhs; +#endif + return action; +} + + + +/* LR driver */ + +const HLRAction * +h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol) +{ + assert(state < table->nrows); + if(table->forall[state]) { + assert(h_hashtable_empty(table->rows[state])); // that would be a conflict + return table->forall[state]; + } else { + return h_hashtable_get(table->rows[state], symbol); + } +} + +HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table) +{ + HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); + + engine->table = table; + engine->left = h_slist_new(tarena); + engine->right = h_slist_new(tarena); + engine->state = 0; + engine->running = 1; + engine->arena = arena; + engine->tarena = tarena; + + return engine; +} + +void h_lrengine_step(HLREngine *engine, HInputStream *stream) +{ + // short-hand names + HSlist *left = engine->left; + HSlist *right = engine->right; + HArena *arena = engine->arena; + HArena *tarena = engine->tarena; + + // stack layout: + // on the left stack, we put pairs: (saved state, semantic value) + // on the right stack, we put pairs: (symbol, semantic value) + + // make sure there is input on the right stack + if(h_slist_empty(right)) { + // XXX use statically-allocated terminal symbols + HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); + HParsedToken *v; + + uint8_t c = h_read_bits(stream, 8, false); + + if(stream->overrun) { // end of input + x->type = HCF_END; + v = NULL; + } else { + x->type = HCF_CHAR; + x->chr = c; + v = h_arena_malloc(arena, sizeof(HParsedToken)); + v->token_type = TT_UINT; + v->uint = c; + } + + h_slist_push(right, v); + h_slist_push(right, x); + } + + // peek at input symbol on the right side + HCFChoice *symbol = right->head->elem; + + // table lookup + const HLRAction *action = h_lr_lookup(engine->table, engine->state, symbol); + if(action == NULL) { + // no handle recognizable in input, terminate + engine->running = false; + return; + } + + if(action->type == HLR_SHIFT) { + h_slist_push(left, (void *)(uintptr_t)engine->state); + h_slist_pop(right); // symbol (discard) + h_slist_push(left, h_slist_pop(right)); // semantic value + engine->state = action->nextstate; + } else { + assert(action->type == HLR_REDUCE); + size_t len = action->production.length; + HCFChoice *symbol = action->production.lhs; + + // semantic value of the reduction result + HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken)); + value->token_type = TT_SEQUENCE; + value->seq = h_carray_new_sized(arena, len); + + // pull values off the left stack, rewinding state accordingly + HParsedToken *v = NULL; + for(size_t i=0; i<len; i++) { + v = h_slist_pop(left); + engine->state = (uintptr_t)h_slist_pop(left); + + // collect values in result sequence + value->seq->elements[len-1-i] = v; + value->seq->used++; + } + if(v) { + // result position equals position of left-most symbol + value->index = v->index; + value->bit_offset = v->bit_offset; + } else { + // XXX how to get the position in this case? + } + + // perform token reshape if indicated + if(symbol->reshape) + value = (HParsedToken *)symbol->reshape(make_result(arena, value)); + + // call validation and semantic action, if present + if(symbol->pred && !symbol->pred(make_result(tarena, value))) { + // validation failed -> no parse; terminate + engine->running = false; + return; + } + if(symbol->action) + value = (HParsedToken *)symbol->action(make_result(arena, value)); + + // push result (value, symbol) onto the right stack + h_slist_push(right, value); + h_slist_push(right, symbol); + } +} + +HParseResult *h_lrengine_result(HLREngine *engine) +{ + // parsing was successful iff the start symbol is on top of the right stack + if(h_slist_pop(engine->right) == engine->table->start) { + // next on the right stack is the start symbol's semantic value + assert(!h_slist_empty(engine->right)); + HParsedToken *tok = h_slist_pop(engine->right); + return make_result(engine->arena, tok); + } else { + return NULL; + } +} + +HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) +{ + HLRTable *table = parser->backend_data; + if(!table) + return NULL; + + HArena *arena = h_new_arena(mm__, 0); // will hold the results + HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse + HLREngine *engine = h_lrengine_new(arena, tarena, table); + + // run while the recognizer finds handles in the input + while(engine->running) + h_lrengine_step(engine, stream); + + HParseResult *result = h_lrengine_result(engine); + if(!result) + h_delete_arena(arena); + h_delete_arena(tarena); + return result; +} + + + +/* Pretty-printers */ + +void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item) +{ + h_pprint_symbol(f, g, item->lhs); + fputs(" ->", f); + + HCFChoice **x = item->rhs; + HCFChoice **mark = item->rhs + item->mark; + if(*x == NULL) { + fputc('.', f); + } else { + while(*x) { + if(x == mark) + fputc('.', f); + else + fputc(' ', f); + + if((*x)->type == HCF_CHAR) { + // condense character strings + fputc('"', f); + h_pprint_char(f, (*x)->chr); + for(x++; *x; x++) { + if(x == mark) + break; + if((*x)->type != HCF_CHAR) + break; + h_pprint_char(f, (*x)->chr); + } + fputc('"', f); + } else { + h_pprint_symbol(f, g, *x); + x++; + } + } + if(x == mark) + fputs(".", f); + } +} + +void h_pprint_lrstate(FILE *f, const HCFGrammar *g, + const HLRState *state, unsigned int indent) +{ + bool first = true; + H_FOREACH_KEY(state, HLRItem *item) + if(!first) + for(unsigned int i=0; i<indent; i++) fputc(' ', f); + first = false; + h_pprint_lritem(f, g, item); + fputc('\n', f); + H_END_FOREACH +} + +static void pprint_transition(FILE *f, const HCFGrammar *g, const HLRTransition *t) +{ + fputs("-", f); + h_pprint_symbol(f, g, t->symbol); + fprintf(f, "->%lu", t->to); +} + +void h_pprint_lrdfa(FILE *f, const HCFGrammar *g, + const HLRDFA *dfa, unsigned int indent) +{ + for(size_t i=0; i<dfa->nstates; i++) { + unsigned int indent2 = indent + fprintf(f, "%4lu: ", i); + h_pprint_lrstate(f, g, dfa->states[i], indent2); + for(HSlistNode *x = dfa->transitions->head; x; x = x->next) { + const HLRTransition *t = x->elem; + if(t->from == i) { + for(unsigned int i=0; i<indent2-2; i++) fputc(' ', f); + pprint_transition(f, g, t); + fputc('\n', f); + } + } + } +} + +void pprint_lraction(FILE *f, const HCFGrammar *g, const HLRAction *action) +{ + if(action->type == HLR_SHIFT) { + fprintf(f, "s%lu", action->nextstate); + } else { + fputs("r(", f); + h_pprint_symbol(f, g, action->production.lhs); + fputs(" -> ", f); +#ifdef NDEBUG + // if we can't print the production, at least print its length + fprintf(f, "[%lu]", action->production.length); +#else + HCFSequence seq = {action->production.rhs}; + h_pprint_sequence(f, g, &seq); +#endif + fputc(')', f); + } +} + +void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, + unsigned int indent) +{ + for(size_t i=0; i<table->nrows; i++) { + for(unsigned int j=0; j<indent; j++) fputc(' ', f); + fprintf(f, "%4lu:", i); + if(table->forall[i]) { + fputs(" - ", f); + pprint_lraction(f, g, table->forall[i]); + fputs(" -", f); + if(!h_hashtable_empty(table->rows[i])) + fputs(" !!", f); + } + H_FOREACH(table->rows[i], HCFChoice *symbol, HLRAction *action) + fputc(' ', f); // separator + h_pprint_symbol(f, g, symbol); + fputc(':', f); + if(table->forall[i]) { + fputc(action->type == HLR_SHIFT? 's' : 'r', f); + fputc('/', f); + fputc(table->forall[i]->type == HLR_SHIFT? 's' : 'r', f); + } else { + pprint_lraction(f, g, action); + } + H_END_FOREACH + fputc('\n', f); + } + +#if 0 + fputs("inadeq=", f); + for(HSlistNode *x=table->inadeq->head; x; x=x->next) { + fprintf(f, "%lu ", (uintptr_t)x->elem); + } + fputc('\n', f); +#endif +} diff --git a/src/backends/lr.h b/src/backends/lr.h new file mode 100644 index 0000000000000000000000000000000000000000..afd4042636f7fc47e677a0c32dd8b35dd8673159 --- /dev/null +++ b/src/backends/lr.h @@ -0,0 +1,131 @@ +#ifndef HAMMER_BACKENDS_LR__H +#define HAMMER_BACKENDS_LR__H + +#include "../hammer.h" +#include "../cfgrammar.h" +#include "../internal.h" + + +typedef HHashSet HLRState; // states are sets of LRItems + +typedef struct HLRDFA_ { + size_t nstates; + const HLRState **states; // array of size nstates + HSlist *transitions; +} HLRDFA; + +typedef struct HLRTransition_ { + size_t from; // index into 'states' array + const HCFChoice *symbol; + size_t to; // index into 'states' array +} HLRTransition; + +typedef struct HLRItem_ { + HCFChoice *lhs; + HCFChoice **rhs; // NULL-terminated + size_t len; // number of elements in rhs + size_t mark; +} HLRItem; + +typedef struct HLRAction_ { + enum {HLR_SHIFT, HLR_REDUCE} type; + union { + size_t nextstate; // used with SHIFT + struct { + HCFChoice *lhs; // symbol carrying semantic actions etc. + size_t length; // # of symbols in rhs +#ifndef NDEBUG + HCFChoice **rhs; // NB: the rhs symbols are not needed for the parse +#endif + } production; // used with REDUCE + }; +} HLRAction; + +typedef struct HLRTable_ { + size_t nrows; + HHashTable **rows; // map symbols to HLRActions + HLRAction **forall; // shortcut to set an action for an entire row + HCFChoice *start; // start symbol + HSlist *inadeq; // indices of any inadequate states + HArena *arena; + HAllocator *mm__; +} HLRTable; + +typedef struct HLREnhGrammar_ { + HCFGrammar *grammar; // enhanced grammar + HHashTable *tmap; // maps transitions to enhanced-grammar symbols + HHashTable *smap; // maps enhanced-grammar symbols to transitions + HHashTable *corr; // maps symbols to sets of corresponding e. symbols + HArena *arena; +} HLREnhGrammar; + +typedef struct HLREngine_ { + const HLRTable *table; + HSlist *left; // left stack; reductions happen here + HSlist *right; // right stack; input appears here + size_t state; + bool running; + HArena *arena; // will hold the results + HArena *tarena; // tmp, deleted after parse +} HLREngine; + + +// XXX move to internal.h or something +// XXX replace other hashtable iterations with this +#define H_FOREACH_(HT) { \ + const HHashTable *ht__ = HT; \ + for(size_t i__=0; i__ < ht__->capacity; i__++) { \ + for(HHashTableEntry *hte__ = &ht__->contents[i__]; \ + hte__; \ + hte__ = hte__->next) { \ + if(hte__->key == NULL) continue; + +#define H_FOREACH_KEY(HT, KEYVAR) H_FOREACH_(HT) \ + const KEYVAR = hte__->key; + +#define H_FOREACH(HT, KEYVAR, VALVAR) H_FOREACH_KEY(HT, KEYVAR) \ + VALVAR = hte__->value; + +#define H_END_FOREACH \ + } \ + } \ + } + + + +HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark); +HLRState *h_lrstate_new(HArena *arena); +HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows); +void h_lrtable_free(HLRTable *table); +HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table); +HLRAction *h_reduce_action(HArena *arena, const HLRItem *item); +HLRAction *h_shift_action(HArena *arena, size_t nextstate); + +bool h_eq_symbol(const void *p, const void *q); +bool h_eq_lr_itemset(const void *p, const void *q); +bool h_eq_transition(const void *p, const void *q); +HHashValue h_hash_symbol(const void *p); +HHashValue h_hash_lr_itemset(const void *p); +HHashValue h_hash_transition(const void *p); + +HLRDFA *h_lr0_dfa(HCFGrammar *g); +HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa); +int h_lrtable_put(HLRTable *tbl, size_t state, HCFChoice *x, HLRAction *action); + +int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params); +void h_lalr_free(HParser *parser); + +const HLRAction *h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol); +void h_lrengine_step(HLREngine *engine, HInputStream *stream); +HParseResult *h_lrengine_result(HLREngine *engine); +HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream); + +void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item); +void h_pprint_lrstate(FILE *f, const HCFGrammar *g, + const HLRState *state, unsigned int indent); +void h_pprint_lrdfa(FILE *f, const HCFGrammar *g, + const HLRDFA *dfa, unsigned int indent); +void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, + unsigned int indent); + +#endif diff --git a/src/backends/lr0.c b/src/backends/lr0.c new file mode 100644 index 0000000000000000000000000000000000000000..aab2ad19bd98af5c5406fe270318d6a6e2dced3f --- /dev/null +++ b/src/backends/lr0.c @@ -0,0 +1,205 @@ +#include <assert.h> +#include "lr.h" + + + +/* Constructing the characteristic automaton (handle recognizer) */ + +static HLRItem *advance_mark(HArena *arena, const HLRItem *item) +{ + assert(item->rhs[item->mark] != NULL); + HLRItem *ret = h_arena_malloc(arena, sizeof(HLRItem)); + *ret = *item; + ret->mark++; + return ret; +} + +static void expand_to_closure(HCFGrammar *g, HHashSet *items) +{ + HAllocator *mm__ = g->mm__; + HArena *arena = g->arena; + HSlist *work = h_slist_new(arena); + + // initialize work list with items + H_FOREACH_KEY(items, HLRItem *item) + h_slist_push(work, (void *)item); + H_END_FOREACH + + while(!h_slist_empty(work)) { + const HLRItem *item = h_slist_pop(work); + HCFChoice *sym = item->rhs[item->mark]; // symbol after mark + + // if there is a non-terminal after the mark, follow it + // NB: unlike LLk, we do consider HCF_CHARSET a non-terminal here + if(sym != NULL && (sym->type==HCF_CHOICE || sym->type==HCF_CHARSET)) { + // add items corresponding to the productions of sym + if(sym->type == HCF_CHOICE) { + for(HCFSequence **p=sym->seq; *p; p++) { + HLRItem *it = h_lritem_new(arena, sym, (*p)->items, 0); + if(!h_hashset_present(items, it)) { + h_hashset_put(items, it); + h_slist_push(work, it); + } + } + } else { // HCF_CHARSET + for(unsigned int i=0; i<256; i++) { + if(charset_isset(sym->charset, i)) { + // XXX allocate these single-character symbols statically somewhere + HCFChoice **rhs = h_new(HCFChoice *, 2); + rhs[0] = h_new(HCFChoice, 1); + rhs[0]->type = HCF_CHAR; + rhs[0]->chr = i; + rhs[1] = NULL; + HLRItem *it = h_lritem_new(arena, sym, rhs, 0); + h_hashset_put(items, it); + // single-character item needs no further work + } + } + // if sym is a non-terminal, we need a reshape on it + // this seems as good a place as any to set it + sym->reshape = h_act_first; + } + } + } +} + +HLRDFA *h_lr0_dfa(HCFGrammar *g) +{ + HArena *arena = g->arena; + + HHashSet *states = h_hashset_new(arena, h_eq_lr_itemset, h_hash_lr_itemset); + // maps itemsets to assigned array indices + HSlist *transitions = h_slist_new(arena); + + // list of states that need to be processed + // to save lookups, we push two elements per state, the itemset and its + // assigned index. + HSlist *work = h_slist_new(arena); + + // make initial state (kernel) + HLRState *start = h_lrstate_new(arena); + assert(g->start->type == HCF_CHOICE); + for(HCFSequence **p=g->start->seq; *p; p++) + h_hashset_put(start, h_lritem_new(arena, g->start, (*p)->items, 0)); + expand_to_closure(g, start); + h_hashtable_put(states, start, 0); + h_slist_push(work, start); + h_slist_push(work, 0); + + // while work to do (on some state) + // determine edge symbols + // for each edge symbol: + // advance respective items -> destination state (kernel) + // compute closure + // if destination is a new state: + // add it to state set + // add transition to it + // add it to the work list + + while(!h_slist_empty(work)) { + size_t state_idx = (uintptr_t)h_slist_pop(work); + HLRState *state = h_slist_pop(work); + + // maps edge symbols to neighbor states (item sets) of s + HHashTable *neighbors = h_hashtable_new(arena, h_eq_symbol, h_hash_symbol); + + // iterate over state (closure) and generate neighboring sets + H_FOREACH_KEY(state, HLRItem *item) + HCFChoice *sym = item->rhs[item->mark]; // symbol after mark + + if(sym != NULL) { // mark was not at the end + // find or create prospective neighbor set + HLRState *neighbor = h_hashtable_get(neighbors, sym); + if(neighbor == NULL) { + neighbor = h_lrstate_new(arena); + h_hashtable_put(neighbors, sym, neighbor); + } + + // ...and add the advanced item to it + h_hashset_put(neighbor, advance_mark(arena, item)); + } + H_END_FOREACH + + // merge expanded neighbor sets into the set of existing states + H_FOREACH(neighbors, HCFChoice *symbol, HLRState *neighbor) + expand_to_closure(g, neighbor); + + // look up existing state, allocate new if not found + size_t neighbor_idx; + if(!h_hashset_present(states, neighbor)) { + neighbor_idx = states->used; + h_hashtable_put(states, neighbor, (void *)(uintptr_t)neighbor_idx); + h_slist_push(work, neighbor); + h_slist_push(work, (void *)(uintptr_t)neighbor_idx); + } else { + neighbor_idx = (uintptr_t)h_hashtable_get(states, neighbor); + } + + // add transition "state --symbol--> neighbor" + HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition)); + t->from = state_idx; + t->to = neighbor_idx; + t->symbol = symbol; + h_slist_push(transitions, t); + H_END_FOREACH + } // end while(work) + + // fill DFA struct + HLRDFA *dfa = h_arena_malloc(arena, sizeof(HLRDFA)); + dfa->nstates = states->used; + dfa->states = h_arena_malloc(arena, dfa->nstates*sizeof(HLRState *)); + H_FOREACH(states, HLRState *state, void *v) + size_t idx = (uintptr_t)v; + dfa->states[idx] = state; + H_END_FOREACH + dfa->transitions = transitions; + + return dfa; +} + + + +/* LR(0) table generation */ + +HLRTable *h_lr0_table(HCFGrammar *g, const HLRDFA *dfa) +{ + HAllocator *mm__ = g->mm__; + + HLRTable *table = h_lrtable_new(mm__, dfa->nstates); + HArena *arena = table->arena; + + // remember start symbol + table->start = g->start; + + // add shift entries + for(HSlistNode *x = dfa->transitions->head; x; x = x->next) { + // for each transition x-A->y, add "shift, goto y" to table entry (x,A) + HLRTransition *t = x->elem; + + HLRAction *action = h_shift_action(arena, t->to); + h_hashtable_put(table->rows[t->from], t->symbol, action); + } + + // add reduce entries, record inadequate states + for(size_t i=0; i<dfa->nstates; i++) { + // find reducible items in state + H_FOREACH_KEY(dfa->states[i], HLRItem *item) + if(item->mark == item->len) { // mark at the end + // check for conflicts + // XXX store more informative stuff in the inadeq records? + if(table->forall[i]) { + // reduce/reduce conflict with a previous item + h_slist_push(table->inadeq, (void *)(uintptr_t)i); + } else if(!h_hashtable_empty(table->rows[i])) { + // shift/reduce conflict with one of the row's entries + h_slist_push(table->inadeq, (void *)(uintptr_t)i); + } + + // set reduce action for the entire row + table->forall[i] = h_reduce_action(arena, item); + } + H_END_FOREACH + } + + return table; +}