diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 7a1c04ac2b7334683d0fb9a2453eb6f4554407c4..79e03c75a4bf9e16b4daae83e6dac93a2469344b 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -61,6 +61,16 @@ typedef struct HLREnhGrammar_ { HArena *arena; } HLREnhGrammar; +typedef struct HLREngine_ { + const HLRTable *table; + HSlist *left; // left stack; reductions happen here + HSlist *right; // right stack; input appears here + size_t state; + bool running; + HArena *arena; // will hold the results + HArena *tarena; // tmp, deleted after parse +} HLREngine; + // XXX move to internal.h or something // XXX replace other hashtable iterations with this @@ -733,118 +743,149 @@ h_lr_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol) } } -HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) +HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table) { - HLRTable *table = parser->backend_data; - if(!table) - return NULL; + HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); - HArena *arena = h_new_arena(mm__, 0); // will hold the results - HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse - HSlist *left = h_slist_new(tarena); // left stack; reductions happen here - HSlist *right = h_slist_new(tarena); // right stack; input appears here + engine->table = table; + engine->left = h_slist_new(tarena); + engine->right = h_slist_new(tarena); + engine->state = 0; + engine->running = 1; + engine->arena = arena; + engine->tarena = tarena; + + return engine; +} + +void h_lrengine_step(HLREngine *engine, HInputStream *stream) +{ + // short-hand names + HSlist *left = engine->left; + HSlist *right = engine->right; + HArena *arena = engine->arena; + HArena *tarena = engine->tarena; // stack layout: // on the left stack, we put pairs: (saved state, semantic value) // on the right stack, we put pairs: (symbol, semantic value) - // run while the recognizer finds handles in the input - size_t state = 0; - while(1) { - // make sure there is input on the right stack - if(h_slist_empty(right)) { - // XXX use statically-allocated terminal symbols - HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); - HParsedToken *v; - - uint8_t c = h_read_bits(stream, 8, false); - - if(stream->overrun) { // end of input - x->type = HCF_END; - v = NULL; - } else { - x->type = HCF_CHAR; - x->chr = c; - v = h_arena_malloc(arena, sizeof(HParsedToken)); - v->token_type = TT_UINT; - v->uint = c; - } + // make sure there is input on the right stack + if(h_slist_empty(right)) { + // XXX use statically-allocated terminal symbols + HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); + HParsedToken *v; + + uint8_t c = h_read_bits(stream, 8, false); - h_slist_push(right, v); - h_slist_push(right, x); + if(stream->overrun) { // end of input + x->type = HCF_END; + v = NULL; + } else { + x->type = HCF_CHAR; + x->chr = c; + v = h_arena_malloc(arena, sizeof(HParsedToken)); + v->token_type = TT_UINT; + v->uint = c; } - // peek at input symbol on the right side - HCFChoice *symbol = right->head->elem; + h_slist_push(right, v); + h_slist_push(right, x); + } - // table lookup - const HLRAction *action = h_lr_lookup(table, state, symbol); - if(action == NULL) - break; // no handle recognizable in input, terminate parsing + // peek at input symbol on the right side + HCFChoice *symbol = right->head->elem; - if(action->type == HLR_SHIFT) { - h_slist_push(left, (void *)(uintptr_t)state); - h_slist_pop(right); // symbol (discard) - h_slist_push(left, h_slist_pop(right)); // semantic value - state = action->nextstate; - } else { - assert(action->type == HLR_REDUCE); - size_t len = action->production.length; - HCFChoice *symbol = action->production.lhs; - - // semantic value of the reduction result - HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken)); - value->token_type = TT_SEQUENCE; - value->seq = h_carray_new_sized(arena, len); - - // pull values off the left stack, rewinding state accordingly - HParsedToken *v = NULL; - for(size_t i=0; i<len; i++) { - v = h_slist_pop(left); - state = (uintptr_t)h_slist_pop(left); - - // collect values in result sequence - value->seq->elements[len-1-i] = v; - value->seq->used++; - } - if(v) { - // result position equals position of left-most symbol - value->index = v->index; - value->bit_offset = v->bit_offset; - } else { - // XXX how to get the position in this case? - } + // table lookup + const HLRAction *action = h_lr_lookup(engine->table, engine->state, symbol); + if(action == NULL) { + // no handle recognizable in input, terminate + engine->running = false; + return; + } - // perform token reshape if indicated - if(symbol->reshape) - value = (HParsedToken *)symbol->reshape(make_result(arena, value)); + if(action->type == HLR_SHIFT) { + h_slist_push(left, (void *)(uintptr_t)engine->state); + h_slist_pop(right); // symbol (discard) + h_slist_push(left, h_slist_pop(right)); // semantic value + engine->state = action->nextstate; + } else { + assert(action->type == HLR_REDUCE); + size_t len = action->production.length; + HCFChoice *symbol = action->production.lhs; + + // semantic value of the reduction result + HParsedToken *value = h_arena_malloc(arena, sizeof(HParsedToken)); + value->token_type = TT_SEQUENCE; + value->seq = h_carray_new_sized(arena, len); + + // pull values off the left stack, rewinding state accordingly + HParsedToken *v = NULL; + for(size_t i=0; i<len; i++) { + v = h_slist_pop(left); + engine->state = (uintptr_t)h_slist_pop(left); + + // collect values in result sequence + value->seq->elements[len-1-i] = v; + value->seq->used++; + } + if(v) { + // result position equals position of left-most symbol + value->index = v->index; + value->bit_offset = v->bit_offset; + } else { + // XXX how to get the position in this case? + } - // call validation and semantic action, if present - if(symbol->pred && !symbol->pred(make_result(tarena, value))) - break; // validation failed -> no parse - if(symbol->action) - value = (HParsedToken *)symbol->action(make_result(arena, value)); + // perform token reshape if indicated + if(symbol->reshape) + value = (HParsedToken *)symbol->reshape(make_result(arena, value)); - // push result (value, symbol) onto the right stack - h_slist_push(right, value); - h_slist_push(right, symbol); + // call validation and semantic action, if present + if(symbol->pred && !symbol->pred(make_result(tarena, value))) { + // validation failed -> no parse; terminate + engine->running = false; + return; } - } - + if(symbol->action) + value = (HParsedToken *)symbol->action(make_result(arena, value)); + // push result (value, symbol) onto the right stack + h_slist_push(right, value); + h_slist_push(right, symbol); + } +} +HParseResult *h_lrengine_result(HLREngine *engine) +{ // parsing was successful iff the start symbol is on top of the right stack - HParseResult *result = NULL; - if(h_slist_pop(right) == table->start) { + if(h_slist_pop(engine->right) == engine->table->start) { // next on the right stack is the start symbol's semantic value - assert(!h_slist_empty(right)); - HParsedToken *tok = h_slist_pop(right); - result = make_result(arena, tok); + assert(!h_slist_empty(engine->right)); + HParsedToken *tok = h_slist_pop(engine->right); + return make_result(engine->arena, tok); } else { - h_delete_arena(arena); - result = NULL; + return NULL; } +} +HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) +{ + HLRTable *table = parser->backend_data; + if(!table) + return NULL; + + HArena *arena = h_new_arena(mm__, 0); // will hold the results + HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse + HLREngine *engine = h_lrengine_new(arena, tarena, table); + + // run while the recognizer finds handles in the input + while(engine->running) + h_lrengine_step(engine, stream); + + HParseResult *result = h_lrengine_result(engine); + if(!result) + h_delete_arena(arena); h_delete_arena(tarena); return result; }