From 3bb26162c35db804f49a148011f06c828419b028 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Fri, 7 Jun 2013 22:39:09 +0200 Subject: [PATCH] end-of-day scratch commit (LALR table generation) --- src/backends/lalr.c | 134 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 114 insertions(+), 20 deletions(-) diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 7a07c3d4..b3d9b5bb 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -4,17 +4,6 @@ #include "../parsers/parser_internal.h" -// PLAN: -// data structures: -// - LR table is an array of hashtables that map grammar symbols (HCFChoice) -// to LRActions. - -// build LR(0) DFA -// extend with lookahead information by either: -// - reworking algorithm to propagate lookahead ("simple LALR generation") -// - follow sets of enhanced grammar ("conversion to SLR") - - /* Constructing the characteristic automaton (handle recognizer) */ // - states are hashsets containing LRItems @@ -288,17 +277,122 @@ HLRDFA *h_lalr_dfa(HCFGrammar *g) /* LALR table generation */ +typedef struct HLRAction_ { + enum {HLR_SHIFT, HLR_REDUCE} type; + union { + size_t nextstate; // used with shift + struct { + HCFChoice *lhs; + HCFChoice **rhs; + } production; // used with reduce + }; +} HLRAction; + +typedef struct HLRTable_ { + size_t nrows; + HHashTable **rows; // map symbols to HLRActions + HCFChoice *start; // start symbol + HArena *arena; + HAllocator *mm__; +} HLRTable; + +HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows) +{ + HArena *arena = h_new_arena(mm__, 0); // default blocksize + assert(arena != NULL); + + HLRTable *ret = h_new(HLRTable, 1); + ret->nrows = nrows; + ret->rows = h_arena_malloc(arena, nrows * sizeof(HHashTable *)); + ret->arena = arena; + ret->mm__ = mm__; + + for(size_t i=0; i<nrows; i++) + ret->rows[i] = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr); + + return ret; +} + +static HCFGrammar *transform_grammar(const HCFGrammar *g, const HLRTable *table, + const HLRDFA *dfa, HHashTable **syms) +{ + HCFGrammar *gt = h_cfgrammar_new(g->mm__); + HArena *arena = gt->arena; + + // old grammar symbol -> + //HHashTable *map = h_hashtable_new( + + for(size_t i=0; i<dfa->nstates; i++) { + const HLRState *state = dfa->states[i]; + + syms[i] = h_hashtable_new(arena, h_eq_ptr, h_hash_ptr); + + + } + + // iterate over g->nts + const HHashTable *ht = g->nts; + for(size_t i=0; i < ht->capacity; i++) { + for(HHashTableEntry *hte = &ht->contents[i]; hte; hte = hte->next) { + if(hte->key == NULL) + continue; + + const HCFChoice *A = hte->key; + + // iterate over the productions of A + for(HCFSequence **p=A->seq; *p; p++) { + // find all transitions marked by A + // yields xAy -> rhs' + // trace rhs starting in state x and following the transitions + } + } + } + + return gt; +} + int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) { - // generate grammar - // construct dfa / determine lookahead - // extract table - // create an array of hashtables, one per state - // for each transition a--S-->b: - // add "shift, goto b" to table entry (a,S) - // for each state: - // add reduce entries for its accepting items - return -1; + // generate CFG from parser + // construct LR(0) DFA + // build parse table, shift-entries only + // for each transition a--S-->b, add "shift, goto b" to table entry (a,S) + // determine lookahead "by conversion to SLR" + // transform grammar to encode transitions in symbols + // -> lookahead for an item is the transformed left-hand side's follow set + // finish table; for each state: + // add reduce entries for its accepting items + // in case of conflict, add lookahead info + + HCFGrammar *g = h_cfgrammar(mm__, parser); + if(g == NULL) // backend not suitable (language not context-free) + return -1; + + HLRDFA *dfa = h_lalr_dfa(g); + if(dfa == NULL) // this should actually not happen + return -1; + + // create table with shift actions + HLRTable *table = h_lrtable_new(mm__, dfa->nstates); + for(HSlistNode *x = dfa->transitions->head; x; x = x->next) { + HLRTransition *t = x->elem; + HLRAction *action = h_arena_malloc(table->arena, sizeof(HLRAction)); + action->type = HLR_SHIFT; + action->nextstate = t->to; + h_hashtable_put(table->rows[t->from], t->symbol, action); + } + + // mapping (state,item)-pairs to the symbols of the new grammar + HHashTable **syms = h_arena_malloc(g->arena, dfa->nstates * sizeof(HHashTable *)); + // XXX use a different arena for this (and other things) + + HCFGrammar *gt = transform_grammar(g, table, dfa, syms); + if(gt == NULL) // this should actually not happen + return -1; + + // XXX fill in reduce actions + + return 0; } void h_lalr_free(HParser *parser) -- GitLab