diff --git a/src/backends/llk.c b/src/backends/llk.c index 4f73c469829f6cb7a86d0f3edc1a07ea25753943..c0cf6afef75aae37c9b9479cdc44223244326ab7 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -21,7 +21,7 @@ typedef struct HLLkTable_ { /* Interface to look up an entry in the parse table. */ const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x, - HInputStream lookahead) + const HInputStream *stream) { const HStringMap *row = h_hashtable_get(table->rows, x); assert(row != NULL); // the table should have one row for each nonterminal @@ -29,28 +29,7 @@ const HCFSequence *h_llk_lookup(const HLLkTable *table, const HCFChoice *x, assert(!row->epsilon_branch); // would match without looking at the input // XXX cases where this could be useful? - const HStringMap *m = row; - while(m) { - if(m->epsilon_branch) { // input matched - // assert: another lookahead would not bring a more specific match. - // this is for the table generator to ensure. - return m->epsilon_branch; - } - - // note the lookahead stream is passed by value, i.e. a copy. - // reading bits from it does not consume them from the real input. - uint8_t c = h_read_bits(&lookahead, 8, false); - - if(lookahead.overrun) { // end of input - // XXX assumption of byte-wise grammar and input - return m->end_branch; - } - - // no match yet, descend - m = h_stringmap_get_char(m, c); - } - - return NULL; + return h_stringmap_get_lookahead(row, *stream); } /* Allocate a new parse table. */ @@ -321,7 +300,7 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* seq = h_carray_new(arena); // look up applicable production in parse table - const HCFSequence *p = h_llk_lookup(table, x, *stream); + const HCFSequence *p = h_llk_lookup(table, x, stream); if(p == NULL) goto no_parse; diff --git a/src/backends/lr.c b/src/backends/lr.c index 66a76b7ca92162a3a6a7bdfc23f593c4fdc4423a..ca45582d65a11be833ba701827e6d2c51f0f3428 100644 --- a/src/backends/lr.c +++ b/src/backends/lr.c @@ -216,7 +216,7 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, } static const HLRAction * -terminal_lookup(const HLREngine *engine, const HCFChoice *symbol) +terminal_lookup(const HLREngine *engine, const HInputStream *stream) { const HLRTable *table = engine->table; size_t state = engine->state; @@ -226,11 +226,7 @@ terminal_lookup(const HLREngine *engine, const HCFChoice *symbol) assert(h_lrtable_row_empty(table, state)); // that would be a conflict return table->forall[state]; } else { - // XXX use the lookahead stream directly here (cf. llk) - if(symbol->type == HCF_END) - return table->tmap[state]->end_branch; - else - return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false); + return h_stringmap_get_lookahead(table->tmap[state], *stream); } } @@ -248,22 +244,7 @@ nonterminal_lookup(const HLREngine *engine, const HCFChoice *symbol) const HLRAction *h_lrengine_action(const HLREngine *engine) { - HArena *tarena = engine->tarena; - - // XXX use statically-allocated terminal symbols - HCFChoice *x = h_arena_malloc(tarena, sizeof(HCFChoice)); - - HInputStream lookahead = engine->input; - uint8_t c = h_read_bits(&lookahead, 8, false); - - if(lookahead.overrun) { // end of input - x->type = HCF_END; - } else { - x->type = HCF_CHAR; - x->chr = c; - } - - return terminal_lookup(engine, x); + return terminal_lookup(engine, &engine->input); } static HParsedToken *consume_input(HLREngine *engine) diff --git a/src/cfgrammar.c b/src/cfgrammar.c index b01c44c1cf8c25430faacc86a8a776a03817f350..196d9d3c8b6ee9cb77b24a98ff365b8b4634ac45 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -321,6 +321,31 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en return m->epsilon_branch; } +void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) +{ + while(m) { + if(m->epsilon_branch) { // input matched + // assert: another lookahead would not bring a more specific match. + // this is for the table generator to ensure. (LLk) + return m->epsilon_branch; + } + + // note the lookahead stream is passed by value, i.e. a copy. + // reading bits from it does not consume them from the real input. + uint8_t c = h_read_bits(&lookahead, 8, false); + + if(lookahead.overrun) { // end of input + // XXX assumption of byte-wise grammar and input + return m->end_branch; + } + + // no match yet, descend + m = h_stringmap_get_char(m, c); + } + + return NULL; +} + bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end) { return (h_stringmap_get(m, str, n, end) != NULL); diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 1f52bdd34ccaef9612e303ad023ac8a0fe4f9a5b..193f8ca327d2f9c0b74518b9942b5fe3f37c407b 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -47,6 +47,7 @@ void h_stringmap_put_char(HStringMap *m, uint8_t c, void *v); void h_stringmap_update(HStringMap *m, const HStringMap *n); void h_stringmap_replace(HStringMap *m, void *old, void *new); void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end); +void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead); bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end); bool h_stringmap_present_epsilon(const HStringMap *m); bool h_stringmap_empty(const HStringMap *m);