From 4f455aa97e41d51ad49ed43fa853190659797b3b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Mon, 7 Sep 2015 15:33:50 +0200 Subject: [PATCH] fix result bit_length for LL(k) --- src/backends/llk.c | 53 ++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/backends/llk.c b/src/backends/llk.c index 1cfaab86..ec5f7f71 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -266,6 +266,7 @@ typedef struct { HArena *tarena; // tmp, deleted after parse HSlist *stack; HCountedArray *seq; // accumulates current parse result + size_t index; // input position in bytes uint8_t *buf; // for lookahead across chunk boundaries // allocated to size 2*kmax @@ -297,6 +298,7 @@ static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser) s->tarena = h_new_arena(mm__, 0); s->stack = h_slist_new(s->tarena); s->seq = h_carray_new(s->arena); + s->index = 0; s->buf = h_arena_malloc(s->tarena, 2 * table->kmax); s->win.input = s->buf; @@ -348,10 +350,13 @@ static void save_win(size_t kmax, HLLkState *s, HInputStream *stream) // (0 kmax ) // ... \_old_/\_new_/ ... // + s->index += len; // position of the window shifts up len = s->win.length - s->win.index; + assert(len <= kmax); memmove(s->buf + kmax - len, s->buf + s->win.index, len); } else { // window not active? save stream to window. + s->index -= kmax; // window starts kmax bytes below next chunk memcpy(s->buf + kmax - len, stream->input + stream->index, len); } @@ -362,25 +367,16 @@ static void save_win(size_t kmax, HLLkState *s, HInputStream *stream) s->win.length = kmax; } -// helper: read from window until old chunk gone, then switch to stream -static uint8_t consume_input(size_t kmax, HLLkState *s, HInputStream *stream) -{ - if(s->win.length > 0) { - uint8_t b = h_read_bits(&s->win, 8, false); - if(s->win.index >= kmax) // old chunk consumed! - s->win.length = 0; // disable the window - return b; - } else { - return h_read_bits(stream, 8, false); - } -} - // returns partial result or NULL (no parse) static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, - HInputStream* stream) + HInputStream* chunk) { HParsedToken *tok = NULL; // will hold result token HCFChoice *x = NULL; // current symbol (from top of stack) + HInputStream *stream; + + assert(chunk->index == 0); + assert(chunk->bit_offset == 0); const HLLkTable *table = parser->backend_data; assert(table != NULL); @@ -389,12 +385,17 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, HArena *tarena = s->tarena; HSlist *stack = s->stack; HCountedArray *seq = s->seq; + size_t kmax = table->kmax; if(!seq) return NULL; // parse already failed - if(s->win.length > 0) - append_win(table->kmax, s, stream); + if(s->win.length > 0) { + append_win(kmax, s, chunk); + stream = &s->win; + } else { + stream = chunk; + } // when we empty the stack, the parse is complete. while(!h_slist_empty(stack)) { @@ -408,12 +409,11 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // x is a nonterminal; apply the appropriate production and continue // look up applicable production in parse table - HInputStream *lookup_stream = s->win.length > 0 ? &s->win : stream; - const HCFSequence *p = h_llk_lookup(table, x, lookup_stream); + const HCFSequence *p = h_llk_lookup(table, x, stream); if(p == NULL) goto no_parse; if(p == NEED_INPUT) { - save_win(table->kmax, s, stream); + save_win(kmax, s, chunk); goto need_input; } @@ -439,7 +439,7 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // the top of stack is such that there will be a result... tok = h_arena_malloc(arena, sizeof(HParsedToken)); - tok->index = stream->index; + tok->index = s->index + stream->index; tok->bit_offset = stream->bit_offset; if(x == MARK) { // hit stack frame boundary... @@ -456,7 +456,14 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // x is a terminal or simple charset; match against input // consume the input token - uint8_t input = consume_input(table->kmax, s, stream); + uint8_t input = h_read_bits(stream, 8, false); + + // when old chunk consumed from window, switch to new chunk + if(s->win.length > 0 && s->win.index >= kmax) { + s->win.length = 0; // disable the window + s->index += kmax; // new chunk starts kmax bytes above the window + stream = chunk; + } switch(x->type) { case HCF_END: @@ -512,11 +519,13 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // since we started with a single nonterminal on the stack, seq should // contain exactly the parse result. assert(seq->used == 1); + s->index += stream->index; return seq; no_parse: h_delete_arena(arena); s->arena = NULL; + s->index += stream->index; return NULL; need_input: @@ -525,6 +534,7 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, if(tok) h_arena_free(arena, tok); // no result, yet h_slist_push(stack, x); // try this symbol again next time + s->index += stream->index; return seq; } @@ -535,6 +545,7 @@ static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s) if(s->seq) { assert(s->seq->used == 1); res = make_result(s->arena, s->seq->elements[0]); + res->bit_length = s->index*8; } h_delete_arena(s->tarena); -- GitLab