diff --git a/src/backends/llk.c b/src/backends/llk.c index 898bfdcb2d1bfdcba989eef58e29fa00a9600081..865c30e65f64f7c56802f6f486d5b421fa8fcfd6 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -266,7 +266,6 @@ typedef struct { HArena *tarena; // tmp, deleted after parse HSlist *stack; HCountedArray *seq; // accumulates current parse result - size_t index; // input position in bytes uint8_t *buf; // for lookahead across chunk boundaries // allocated to size 2*kmax @@ -298,7 +297,6 @@ static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser) s->tarena = h_new_arena(mm__, 0); s->stack = h_slist_new(s->tarena); s->seq = h_carray_new(s->arena); - s->index = 0; s->buf = h_arena_malloc(s->tarena, 2 * table->kmax); s->win.input = s->buf; @@ -350,13 +348,14 @@ static void save_win(size_t kmax, HLLkState *s, HInputStream *stream) // (0 kmax ) // ... \_old_/\_new_/ ... // - s->index += len; // position of the window shifts up + s->win.pos += len; // position of the window shifts up len = s->win.length - s->win.index; assert(len <= kmax); memmove(s->buf + kmax - len, s->buf + s->win.index, len); } else { // window not active? save stream to window. - s->index -= kmax; // window starts kmax bytes below next chunk + // buffer starts kmax bytes below chunk boundary + s->win.pos = stream->pos - kmax; memcpy(s->buf + kmax - len, stream->input + stream->index, len); } @@ -439,7 +438,7 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // the top of stack is such that there will be a result... tok = h_arena_malloc(arena, sizeof(HParsedToken)); - tok->index = s->index + stream->index; + tok->index = stream->pos + stream->index; tok->bit_offset = stream->bit_offset; if(x == MARK) { // hit stack frame boundary... @@ -461,7 +460,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // when old chunk consumed from window, switch to new chunk if(s->win.length > 0 && s->win.index >= kmax) { s->win.length = 0; // disable the window - s->index += kmax; // new chunk starts kmax bytes above the window stream = chunk; } @@ -519,13 +517,11 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // since we started with a single nonterminal on the stack, seq should // contain exactly the parse result. assert(seq->used == 1); - s->index += stream->index; return seq; no_parse: h_delete_arena(arena); s->arena = NULL; - s->index += stream->index; return NULL; need_input: @@ -534,7 +530,6 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, if(tok) h_arena_free(arena, tok); // no result, yet h_slist_push(stack, x); // try this symbol again next time - s->index += stream->index; return seq; } @@ -545,7 +540,6 @@ static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s) if(s->seq) { assert(s->seq->used == 1); res = make_result(s->arena, s->seq->elements[0]); - res->bit_length = s->index*8; } h_delete_arena(s->tarena); @@ -560,7 +554,11 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* assert(stream->last_chunk); s->seq = llk_parse_chunk_(s, parser, stream); - return llk_parse_finish_(mm__, s); + HParseResult *res = llk_parse_finish_(mm__, s); + if(res) + res->bit_length = stream->index * 8 + stream->bit_offset; + + return res; } void h_llk_parse_start(HSuspendedParser *s) @@ -568,29 +566,17 @@ void h_llk_parse_start(HSuspendedParser *s) s->backend_state = llk_parse_start_(s->mm__, s->parser); } -void h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input) +bool h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input) { HLLkState *state = s->backend_state; state->seq = llk_parse_chunk_(state, s->parser, input); + + return (state->seq == NULL || h_slist_empty(state->stack)); } HParseResult *h_llk_parse_finish(HSuspendedParser *s) { - HLLkState *state = s->backend_state; - HInputStream empty = { - .index = 0, - .bit_offset = 0, - .overrun = 0, - .endianness = s->endianness, - .length = 0, - .input = NULL, - .last_chunk = true - }; - - // signal end of input (no-op parse already done) - state->seq = llk_parse_chunk_(state, s->parser, &empty); - return llk_parse_finish_(s->mm__, s->backend_state); } diff --git a/src/hammer.c b/src/hammer.c index 3422422717fb65417c0165d55ce9c03333755dcd..70ebc8a4943d8e1b3a25e036a745c2296bf8ddfd 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -51,6 +51,7 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { // Set up a parse state... HInputStream input_stream = { + .pos = 0, .index = 0, .bit_offset = 0, .overrun = 0, @@ -114,6 +115,9 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) { s->mm__ = mm__; s->parser = parser; s->backend_state = NULL; + s->done = false; + s->pos = 0; + s->bit_offset = 0; s->endianness = DEFAULT_ENDIANNESS; // backend-specific initialization @@ -126,8 +130,13 @@ HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) { bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) { assert(backends[s->parser->backend]->parse_chunk != NULL); + // no-op if parser is already done + if(s->done) + return true; + // input HInputStream input_stream = { + .pos = s->pos, .index = 0, .bit_offset = 0, .overrun = 0, @@ -138,19 +147,43 @@ bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) { }; // process chunk - backends[s->parser->backend]->parse_chunk(s, &input_stream); + s->done = backends[s->parser->backend]->parse_chunk(s, &input_stream); s->endianness = input_stream.endianness; + s->pos += input_stream.index; + s->bit_offset = input_stream.bit_offset; - return !input_stream.overrun; // parser wants no more input? done. + return s->done; } HParseResult* h_parse_finish(HSuspendedParser* s) { + assert(backends[s->parser->backend]->parse_chunk != NULL); assert(backends[s->parser->backend]->parse_finish != NULL); HAllocator *mm__ = s->mm__; + // signal end of input if parser is not already done + if(!s->done) { + HInputStream empty = { + .pos = s->pos, + .index = 0, + .bit_offset = 0, + .overrun = 0, + .endianness = s->endianness, + .length = 0, + .input = NULL, + .last_chunk = true + }; + + s->done = backends[s->parser->backend]->parse_chunk(s, &empty); + assert(s->done); + } + + // extract result HParseResult *r = backends[s->parser->backend]->parse_finish(s); - // NB: backend should have freed backend_state + if(r) + r->bit_length = s->pos * 8 + s->bit_offset; + + // NB: backend should have freed backend_state h_free(s); return r; diff --git a/src/internal.h b/src/internal.h index b81b50c9b5a66942df7e935f031940dee8a7f78f..b11186dd7f39c16d0e5485ce7beb111527e550ba 100644 --- a/src/internal.h +++ b/src/internal.h @@ -72,6 +72,7 @@ typedef struct HCFStack_ HCFStack; typedef struct HInputStream_ { // This should be considered to be a really big value type. const uint8_t *input; + size_t pos; // position of this chunk in a multi-chunk stream size_t index; size_t length; char bit_offset; @@ -215,8 +216,11 @@ struct HSuspendedParser_ { HAllocator *mm__; const HParser *parser; void *backend_state; + bool done; - // the only part of HInputStream that carries across chunks + // input stream state + size_t pos; + uint8_t bit_offset; uint8_t endianness; }; @@ -227,12 +231,13 @@ typedef struct HParserBackendVTable_ { void (*parse_start)(HSuspendedParser *s); // parse_start should allocate s->backend_state. - void (*parse_chunk)(HSuspendedParser *s, HInputStream *input); - // when parse_chunk leaves input.overrun unset, parse is done. else: + bool (*parse_chunk)(HSuspendedParser *s, HInputStream *input); + // if parser is done, return true. otherwise: // parse_chunk MUST consume all input, integrating it into s->backend_state. - // calling parse_chunk again after parse is done should have no effect. + // parse_chunk will not be called again after it reports done. HParseResult *(*parse_finish)(HSuspendedParser *s); // parse_finish must free s->backend_state. + // parse_finish will not be called before parse_chunk reports done. } HParserBackendVTable;