From c32cf709b2e51924f8f1f91693febc1856fedc43 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Fri, 21 Jun 2013 20:11:19 +0200 Subject: [PATCH] eliminate the right stack; work with the HInputStream directly --- src/backends/glr.c | 15 ++++---- src/backends/lr.c | 87 +++++++++++++++++++++++----------------------- src/backends/lr.h | 12 +++---- 3 files changed, 54 insertions(+), 60 deletions(-) diff --git a/src/backends/glr.c b/src/backends/glr.c index d460e8af..7a5f8f51 100644 --- a/src/backends/glr.c +++ b/src/backends/glr.c @@ -29,15 +29,14 @@ HLREngine *fork_engine(const HLREngine *engine) HLREngine *eng2 = h_arena_malloc(engine->tarena, sizeof(HLREngine)); eng2->table = engine->table; eng2->state = engine->state; + eng2->input = engine->input; - // shallow-copy the stacks + // shallow-copy the stack // this works because h_slist_push and h_slist_pop never modify // the underlying structure of HSlistNodes, only the head pointer. // in fact, this gives us prefix sharing for free. - eng2->left = h_arena_malloc(engine->tarena, sizeof(HSlist)); - eng2->right = h_arena_malloc(engine->tarena, sizeof(HSlist)); - *eng2->left = *engine->left; - *eng2->right = *engine->right; + eng2->stack = h_arena_malloc(engine->tarena, sizeof(HSlist)); + *eng2->stack = *engine->stack; eng2->arena = engine->arena; eng2->tarena = engine->tarena; @@ -54,7 +53,7 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse HSlist *engines = h_slist_new(tarena); - h_slist_push(engines, h_lrengine_new(arena, tarena, table)); + h_slist_push(engines, h_lrengine_new(arena, tarena, table, stream)); HParseResult *result = NULL; while(result == NULL && !h_slist_empty(engines)) { @@ -75,7 +74,7 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* continue; } - const HLRAction *action = h_lrengine_action(engine, stream); + const HLRAction *action = h_lrengine_action(engine); // fork engine on conflicts if(action && action->type == HLR_CONFLICT) { @@ -120,8 +119,6 @@ HParserBackendVTable h__glr_backend_vtable = { // XXX TODO -// - eliminate right stack by always doing a shift after reduce -// (shift should always follow reduce because rightmost) // - split tables into // - one mapping input bytes to actions (shift or reduce or conflict) // - one mapping reduced-to lhs nonterminals to shift states diff --git a/src/backends/lr.c b/src/backends/lr.c index 2603ff2a..f33aab82 100644 --- a/src/backends/lr.c +++ b/src/backends/lr.c @@ -202,65 +202,64 @@ h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol) } } -HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table) +HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, + const HInputStream *stream) { HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); engine->table = table; engine->state = 0; engine->run = true; - engine->left = h_slist_new(tarena); - engine->right = h_slist_new(tarena); + engine->stack = h_slist_new(tarena); + engine->input = *stream; engine->arena = arena; engine->tarena = tarena; return engine; } -const HLRAction *h_lrengine_action(HLREngine *engine, HInputStream *stream) +const HLRAction *h_lrengine_action(const HLREngine *engine) { - HSlist *right = engine->right; - HArena *arena = engine->arena; HArena *tarena = engine->tarena; - // make sure there is input on the right stack - if(h_slist_empty(right)) { - // XXX use statically-allocated terminal symbols - HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); - HParsedToken *v; - - uint8_t c = h_read_bits(stream, 8, false); + // XXX use statically-allocated terminal symbols + HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); - if(stream->overrun) { // end of input - x->type = HCF_END; - v = NULL; - } else { - x->type = HCF_CHAR; - x->chr = c; - v = h_arena_malloc(arena, sizeof(HParsedToken)); - v->token_type = TT_UINT; - v->uint = c; - } + HInputStream lookahead = engine->input; + uint8_t c = h_read_bits(&lookahead, 8, false); - h_slist_push(right, v); - h_slist_push(right, x); + if(lookahead.overrun) { // end of input + x->type = HCF_END; + } else { + x->type = HCF_CHAR; + x->chr = c; } - // peek at input symbol on the right side - HCFChoice *symbol = right->head->elem; + return h_lr_lookup(engine->table, engine->state, x); +} + +static HParsedToken *consume_input(HLREngine *engine) +{ + HParsedToken *v; - // table lookup - const HLRAction *action = h_lr_lookup(engine->table, engine->state, symbol); + uint8_t c = h_read_bits(&engine->input, 8, false); - return action; + if(engine->input.overrun) { // end of input + v = NULL; + } else { + v = h_arena_malloc(engine->arena, sizeof(HParsedToken)); + v->token_type = TT_UINT; + v->uint = c; + } + + return v; } // run LR parser for one round; returns false when finished static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action) { // short-hand names - HSlist *left = engine->left; - HSlist *right = engine->right; + HSlist *stack = engine->stack; HArena *arena = engine->arena; HArena *tarena = engine->tarena; @@ -278,11 +277,11 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action) value->token_type = TT_SEQUENCE; value->seq = h_carray_new_sized(arena, len); - // pull values off the left stack, rewinding state accordingly + // pull values off the stack, rewinding state accordingly HParsedToken *v = NULL; for(size_t i=0; i<len; i++) { - v = h_slist_drop(left); - engine->state = (uintptr_t)h_slist_drop(left); + v = h_slist_drop(stack); + engine->state = (uintptr_t)h_slist_drop(stack); // collect values in result sequence value->seq->elements[len-1-i] = v; @@ -315,17 +314,17 @@ static bool h_lrengine_step_(HLREngine *engine, const HLRAction *action) assert(shift->type == HLR_SHIFT); // piggy-back the shift right here, never touching the input - h_slist_push(left, (void *)(uintptr_t)engine->state); - h_slist_push(left, value); + h_slist_push(stack, (void *)(uintptr_t)engine->state); + h_slist_push(stack, value); engine->state = shift->nextstate; if(symbol == engine->table->start) return false; // reduced to start symbol; accept! } else { assert(action->type == HLR_SHIFT); - h_slist_push(left, (void *)(uintptr_t)engine->state); - h_slist_drop(right); // symbol (discard) - h_slist_push(left, h_slist_drop(right)); // semantic value + HParsedToken *value = consume_input(engine); + h_slist_push(stack, (void *)(uintptr_t)engine->state); + h_slist_push(stack, value); engine->state = action->nextstate; } @@ -341,9 +340,9 @@ void h_lrengine_step(HLREngine *engine, const HLRAction *action) HParseResult *h_lrengine_result(HLREngine *engine) { // parsing was successful iff after a shift the engine is back in state 0 - if(engine->state == 0 && !h_slist_empty(engine->left)) { + if(engine->state == 0 && !h_slist_empty(engine->stack)) { // on top of the stack is the start symbol's semantic value - HParsedToken *tok = engine->left->head->elem; + HParsedToken *tok = engine->stack->head->elem; return make_result(engine->arena, tok); } else { return NULL; @@ -358,11 +357,11 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* HArena *arena = h_new_arena(mm__, 0); // will hold the results HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse - HLREngine *engine = h_lrengine_new(arena, tarena, table); + HLREngine *engine = h_lrengine_new(arena, tarena, table, stream); // iterate engine to completion while(engine->run) - h_lrengine_step(engine, h_lrengine_action(engine, stream)); + h_lrengine_step(engine, h_lrengine_action(engine)); HParseResult *result = h_lrengine_result(engine); if(!result) diff --git a/src/backends/lr.h b/src/backends/lr.h index 5e2f0329..f76bd33f 100644 --- a/src/backends/lr.h +++ b/src/backends/lr.h @@ -70,11 +70,8 @@ typedef struct HLREngine_ { size_t state; bool run; - // stack layout: - // on the left stack, we put pairs: (saved state, semantic value) - // on the right stack, we put pairs: (symbol, semantic value) - HSlist *left; // left stack; reductions happen here - HSlist *right; // right stack; input appears here + HSlist *stack; // holds pairs: (saved state, semantic value) + HInputStream input; HArena *arena; // will hold the results HArena *tarena; // tmp, deleted after parse @@ -108,7 +105,8 @@ HLRItem *h_lritem_new(HArena *a, HCFChoice *lhs, HCFChoice **rhs, size_t mark); HLRState *h_lrstate_new(HArena *arena); HLRTable *h_lrtable_new(HAllocator *mm__, size_t nrows); void h_lrtable_free(HLRTable *table); -HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table); +HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, + const HInputStream *stream); HLRAction *h_reduce_action(HArena *arena, const HLRItem *item); HLRAction *h_shift_action(HArena *arena, size_t nextstate); HLRAction *h_lr_conflict(HArena *arena, HLRAction *action, HLRAction *new); @@ -128,7 +126,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params); void h_lalr_free(HParser *parser); const HLRAction *h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol); -const HLRAction *h_lrengine_action(HLREngine *engine, HInputStream *stream); +const HLRAction *h_lrengine_action(const HLREngine *engine); void h_lrengine_step(HLREngine *engine, const HLRAction *action); HParseResult *h_lrengine_result(HLREngine *engine); HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream); -- GitLab