diff --git a/src/backends/lalr.c b/src/backends/lalr.c index 14f64cd1a23cf2276a1377e0d1b78c3a24125ed8..975735a16dab016576c625d5bb541d8028b776f7 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -52,7 +52,7 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg, if (xAy->type != HCF_CHOICE) { return; } - // XXX CHARSET? + // NB: nothing to do on quasi-terminal CHARSET which carries no list of rhs's HArena *arena = eg->arena; @@ -286,14 +286,28 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs); assert(lhss != NULL); H_FOREACH_KEY(lhss, HCFChoice *lhs) - assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET? - - for(HCFSequence **p=lhs->seq; *p; p++) { - HCFChoice **rhs = (*p)->items; - if(!match_production(eg, rhs, item->rhs, state)) { - continue; - } - + assert(lhs->type == HCF_CHOICE || lhs->type == HCF_CHARSET); + + bool match = false; + if(lhs->type == HCF_CHOICE) { + for(HCFSequence **p=lhs->seq; *p; p++) { + HCFChoice **rhs = (*p)->items; + if(match_production(eg, rhs, item->rhs, state)) { + match = true; + break; + } + } + } else { // HCF_CHARSET + assert(item->rhs[0] != NULL); + assert(item->rhs[1] == NULL); + assert(item->rhs[0]->type == HCF_CHAR); + HLRTransition *t = h_hashtable_get(eg->smap, lhs); + assert(t != NULL); + match = (t->to == state + && charset_isset(lhs->charset, item->rhs[0]->chr)); + } + + if(match) { // the left-hand symbol's follow set is this production's // contribution to the lookahead const HStringMap *fs = h_follow(1, eg->grammar, lhs); @@ -304,7 +318,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) // for each lookahead symbol, put action into table cell if(terminals_put(table->tmap[state], fs, action) < 0) inadeq = true; - } H_END_FOREACH // enhanced production + } + H_END_FOREACH // enhanced production H_END_FOREACH // reducible item if(inadeq) { @@ -331,7 +346,10 @@ void h_lalr_free(HParser *parser) HParserBackendVTable h__lalr_backend_vtable = { .compile = h_lalr_compile, .parse = h_lr_parse, - .free = h_lalr_free + .free = h_lalr_free, + .parse_start = h_lr_parse_start, + .parse_chunk = h_lr_parse_chunk, + .parse_finish = h_lr_parse_finish }; @@ -340,8 +358,6 @@ HParserBackendVTable h__lalr_backend_vtable = { // dummy! int test_lalr(void) { - HAllocator *mm__ = &system_allocator; - /* E -> E '-' T | T @@ -356,44 +372,24 @@ int test_lalr(void) h_bind_indirect(E, E_); HParser *p = E; - printf("\n==== G R A M M A R ====\n"); - HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); - if (g == NULL) { - fprintf(stderr, "h_cfgrammar failed\n"); + HCFGrammar *g = h_pprint_lr_info(stdout, p); + if(!g) return 1; - } - h_pprint_grammar(stdout, g, 0); - - printf("\n==== D F A ====\n"); - HLRDFA *dfa = h_lr0_dfa(g); - if (dfa) { - h_pprint_lrdfa(stdout, g, dfa, 0); - } else { - fprintf(stderr, "h_lalr_dfa failed\n"); - } - - printf("\n==== L R ( 0 ) T A B L E ====\n"); - HLRTable *table0 = h_lr0_table(g, dfa); - if (table0) { - h_pprint_lrtable(stdout, g, table0, 0); - } else { - fprintf(stderr, "h_lr0_table failed\n"); - } - h_lrtable_free(table0); - printf("\n==== L A L R T A B L E ====\n"); + fprintf(stdout, "\n==== L A L R T A B L E ====\n"); if (h_compile(p, PB_LALR, NULL)) { - fprintf(stderr, "does not compile\n"); + fprintf(stdout, "does not compile\n"); return 2; } h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0); - printf("\n==== P A R S E R E S U L T ====\n"); + fprintf(stdout, "\n==== P A R S E R E S U L T ====\n"); HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13); if (res) { h_pprint(stdout, res->ast, 0, 2); } else { - printf("no parse\n"); + fprintf(stdout, "no parse\n"); } + return 0; } diff --git a/src/backends/llk.c b/src/backends/llk.c index afccb7456904ec63fc649dad3a24c8dd6eeeb754..865c30e65f64f7c56802f6f486d5b421fa8fcfd6 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -12,6 +12,7 @@ static const size_t DEFAULT_KMAX = 1; * maps lookahead strings to productions (HCFSequence). */ typedef struct HLLkTable_ { + size_t kmax; HHashTable *rows; HCFChoice *start; // start symbol HArena *arena; @@ -188,6 +189,7 @@ static int fill_table_row(size_t kmax, HCFGrammar *g, HStringMap *row, */ static int fill_table(size_t kmax, HCFGrammar *g, HLLkTable *table) { + table->kmax = kmax; table->start = g->start; // iterate over g->nts @@ -259,56 +261,172 @@ void h_llk_free(HParser *parser) /* LL(k) driver */ -HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) +typedef struct { + HArena *arena; // will hold the results + HArena *tarena; // tmp, deleted after parse + HSlist *stack; + HCountedArray *seq; // accumulates current parse result + + uint8_t *buf; // for lookahead across chunk boundaries + // allocated to size 2*kmax + // new chunk starts at index kmax + // ( 0 ... kmax ... 2*kmax-1 ) + // \_old_/\______new_______/ + HInputStream win; // win.length is set to 0 when not in use +} HLLkState; + +// in order to construct the parse tree, we delimit the symbol stack into +// frames corresponding to production right-hand sides. since only left-most +// derivations are produced this linearization is unique. +// the 'mark' allocated below simply reserves a memory address to use as the +// frame delimiter. +// nonterminals, instead of being popped and forgotten, are put back onto the +// stack below the mark to tell us which validations and semantic actions to +// execute on their corresponding result. +// also on the stack below the mark, we store the previously accumulated +// value for the surrounding production. +static void const * const MARK = &MARK; // stack frame delimiter + +static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser) { const HLLkTable *table = parser->backend_data; assert(table != NULL); - HArena *arena = h_new_arena(mm__, 0); // will hold the results - HArena *tarena = h_new_arena(mm__, 0); // tmp, deleted after parse - HSlist *stack = h_slist_new(tarena); - HCountedArray *seq = h_carray_new(arena); // accumulates current parse result - - // in order to construct the parse tree, we delimit the symbol stack into - // frames corresponding to production right-hand sides. since only left-most - // derivations are produced this linearization is unique. - // the 'mark' allocated below simply reserves a memory address to use as the - // frame delimiter. - // nonterminals, instead of being popped and forgotten, are put back onto the - // stack below the mark to tell us which validations and semantic actions to - // execute on their corresponding result. - // also on the stack below the mark, we store the previously accumulated - // value for the surrounding production. - void *mark = h_arena_malloc(tarena, 1); + HLLkState *s = h_new(HLLkState, 1); + s->arena = h_new_arena(mm__, 0); + s->tarena = h_new_arena(mm__, 0); + s->stack = h_slist_new(s->tarena); + s->seq = h_carray_new(s->arena); + s->buf = h_arena_malloc(s->tarena, 2 * table->kmax); + + s->win.input = s->buf; + s->win.length = 0; // unused // initialize with the start symbol on the stack. - h_slist_push(stack, table->start); + h_slist_push(s->stack, table->start); + + return s; +} + +// helper: add new input to the lookahead window +static void append_win(size_t kmax, HLLkState *s, HInputStream *stream) +{ + assert(stream->bit_offset == 0); + assert(s->win.input == s->buf); + assert(s->win.length == kmax); + assert(s->win.index < kmax); + + size_t n = stream->length - stream->index; // bytes to copy + if(n > kmax) + n = kmax; + + memcpy(s->buf + kmax, stream->input + stream->index, n); + s->win.length += n; +} + +// helper: save old input to the lookahead window +static void save_win(size_t kmax, HLLkState *s, HInputStream *stream) +{ + assert(stream->bit_offset == 0); + + size_t len = stream->length - stream->index; + assert(len < kmax); + + if(len == 0) { + // stream empty? nothing to do. + return; + } else if(s->win.length > 0) { + // window active? should contain all of stream. + assert(s->win.length == kmax + len); + assert(s->win.index <= kmax); + + // shift contents down: + // + // (0 kmax ) + // ... \_old_/\_new_/ ... + // + // (0 kmax ) + // ... \_old_/\_new_/ ... + // + s->win.pos += len; // position of the window shifts up + len = s->win.length - s->win.index; + assert(len <= kmax); + memmove(s->buf + kmax - len, s->buf + s->win.index, len); + } else { + // window not active? save stream to window. + // buffer starts kmax bytes below chunk boundary + s->win.pos = stream->pos - kmax; + memcpy(s->buf + kmax - len, stream->input + stream->index, len); + } + + // metadata + s->win = *stream; + s->win.input = s->buf; + s->win.index = kmax - len; + s->win.length = kmax; +} + +// returns partial result or NULL (no parse) +static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, + HInputStream* chunk) +{ + HParsedToken *tok = NULL; // will hold result token + HCFChoice *x = NULL; // current symbol (from top of stack) + HInputStream *stream; + + assert(chunk->index == 0); + assert(chunk->bit_offset == 0); + + const HLLkTable *table = parser->backend_data; + assert(table != NULL); + + HArena *arena = s->arena; + HArena *tarena = s->tarena; + HSlist *stack = s->stack; + HCountedArray *seq = s->seq; + size_t kmax = table->kmax; + + if(!seq) + return NULL; // parse already failed + + if(s->win.length > 0) { + append_win(kmax, s, chunk); + stream = &s->win; + } else { + stream = chunk; + } // when we empty the stack, the parse is complete. while(!h_slist_empty(stack)) { + tok = NULL; + // pop top of stack for inspection - HCFChoice *x = h_slist_pop(stack); + x = h_slist_pop(stack); assert(x != NULL); - if(x != mark && x->type == HCF_CHOICE) { + if(x != MARK && x->type == HCF_CHOICE) { // x is a nonterminal; apply the appropriate production and continue - // push stack frame - h_slist_push(stack, seq); // save current partial value - h_slist_push(stack, x); // save the nonterminal - h_slist_push(stack, mark); // frame delimiter - - // open a fresh result sequence - seq = h_carray_new(arena); - // look up applicable production in parse table const HCFSequence *p = h_llk_lookup(table, x, stream); if(p == NULL) goto no_parse; + if(p == NEED_INPUT) { + save_win(kmax, s, chunk); + goto need_input; + } // an infinite loop case that shouldn't happen assert(!p->items[0] || p->items[0] != x); + // push stack frame + h_slist_push(stack, seq); // save current partial value + h_slist_push(stack, x); // save the nonterminal + h_slist_push(stack, (void *)MARK); // frame delimiter + + // open a fresh result sequence + seq = h_carray_new(arena); + // push production's rhs onto the stack (in reverse order) HCFChoice **s; for(s = p->items; *s; s++); @@ -319,11 +437,10 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* } // the top of stack is such that there will be a result... - HParsedToken *tok; // will hold result token tok = h_arena_malloc(arena, sizeof(HParsedToken)); - tok->index = stream->index; + tok->index = stream->pos + stream->index; tok->bit_offset = stream->bit_offset; - if(x == mark) { + if(x == MARK) { // hit stack frame boundary... // wrap the accumulated parse result, this sequence is finished tok->token_type = TT_SEQUENCE; @@ -340,17 +457,25 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* // consume the input token uint8_t input = h_read_bits(stream, 8, false); + // when old chunk consumed from window, switch to new chunk + if(s->win.length > 0 && s->win.index >= kmax) { + s->win.length = 0; // disable the window + stream = chunk; + } + switch(x->type) { case HCF_END: if(!stream->overrun) goto no_parse; + if(!stream->last_chunk) + goto need_input; h_arena_free(arena, tok); tok = NULL; break; case HCF_CHAR: if(stream->overrun) - goto no_parse; + goto need_input; if(input != x->chr) goto no_parse; tok->token_type = TT_UINT; @@ -359,7 +484,7 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* case HCF_CHARSET: if(stream->overrun) - goto no_parse; + goto need_input; if(!charset_isset(x->charset, input)) goto no_parse; tok->token_type = TT_UINT; @@ -388,24 +513,82 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* h_carray_append(seq, tok); } + // success // since we started with a single nonterminal on the stack, seq should // contain exactly the parse result. assert(seq->used == 1); - h_delete_arena(tarena); - return make_result(arena, seq->elements[0]); + return seq; no_parse: - h_delete_arena(tarena); h_delete_arena(arena); + s->arena = NULL; return NULL; + + need_input: + if(stream->last_chunk) + goto no_parse; + if(tok) + h_arena_free(arena, tok); // no result, yet + h_slist_push(stack, x); // try this symbol again next time + return seq; } +static HParseResult *llk_parse_finish_(HAllocator *mm__, HLLkState *s) +{ + HParseResult *res = NULL; + + if(s->seq) { + assert(s->seq->used == 1); + res = make_result(s->arena, s->seq->elements[0]); + } + + h_delete_arena(s->tarena); + h_free(s); + return res; +} + +HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) +{ + HLLkState *s = llk_parse_start_(mm__, parser); + + assert(stream->last_chunk); + s->seq = llk_parse_chunk_(s, parser, stream); + + HParseResult *res = llk_parse_finish_(mm__, s); + if(res) + res->bit_length = stream->index * 8 + stream->bit_offset; + + return res; +} + +void h_llk_parse_start(HSuspendedParser *s) +{ + s->backend_state = llk_parse_start_(s->mm__, s->parser); +} + +bool h_llk_parse_chunk(HSuspendedParser *s, HInputStream *input) +{ + HLLkState *state = s->backend_state; + + state->seq = llk_parse_chunk_(state, s->parser, input); + + return (state->seq == NULL || h_slist_empty(state->stack)); +} + +HParseResult *h_llk_parse_finish(HSuspendedParser *s) +{ + return llk_parse_finish_(s->mm__, s->backend_state); +} HParserBackendVTable h__llk_backend_vtable = { .compile = h_llk_compile, .parse = h_llk_parse, - .free = h_llk_free + .free = h_llk_free, + + .parse_start = h_llk_parse_start, + .parse_chunk = h_llk_parse_chunk, + .parse_finish = h_llk_parse_finish }; diff --git a/src/backends/lr.c b/src/backends/lr.c index e7f237756361303102440700af4ceb5fcfb5abdf..3f99eb513ad59a0ba0bd59e886ed67413f01f08e 100644 --- a/src/backends/lr.c +++ b/src/backends/lr.c @@ -199,15 +199,14 @@ bool h_lrtable_row_empty(const HLRTable *table, size_t i) /* LR driver */ -HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, - const HInputStream *stream) +static +HLREngine *h_lrengine_new_(HArena *arena, HArena *tarena, const HLRTable *table) { HLREngine *engine = h_arena_malloc(tarena, sizeof(HLREngine)); engine->table = table; engine->state = 0; engine->stack = h_slist_new(tarena); - engine->input = *stream; engine->merged[0] = NULL; engine->merged[1] = NULL; engine->arena = arena; @@ -216,6 +215,14 @@ HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, return engine; } +HLREngine *h_lrengine_new(HArena *arena, HArena *tarena, const HLRTable *table, + const HInputStream *stream) +{ + HLREngine *engine = h_lrengine_new_(arena, tarena, table); + engine->input = *stream; + return engine; +} + static const HLRAction * terminal_lookup(const HLREngine *engine, const HInputStream *stream) { @@ -351,7 +358,9 @@ HParseResult *h_lrengine_result(HLREngine *engine) // on top of the stack is the start symbol's semantic value assert(!h_slist_empty(engine->stack)); HParsedToken *tok = engine->stack->head->elem; - return make_result(engine->arena, tok); + HParseResult *res = make_result(engine->arena, tok); + res->bit_length = (engine->input.pos + engine->input.index) * 8; + return res; } else { return NULL; } @@ -377,7 +386,53 @@ HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* return result; } +void h_lr_parse_start(HSuspendedParser *s) +{ + HLRTable *table = s->parser->backend_data; + assert(table != NULL); + + HArena *arena = h_new_arena(s->mm__, 0); // will hold the results + HArena *tarena = h_new_arena(s->mm__, 0); // tmp, deleted after parse + HLREngine *engine = h_lrengine_new_(arena, tarena, table); + + s->backend_state = engine; +} + +bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream) +{ + HLREngine *engine = s->backend_state; + engine->input = *stream; + + bool run = true; + while(run) { + // check input against table to determine which action to take + const HLRAction *action = h_lrengine_action(engine); + if(action == NEED_INPUT) { + // XXX assume lookahead 1 + assert(engine->input.length - engine->input.index == 0); + break; + } + + // execute action + run = h_lrengine_step(engine, action); + if(engine->input.overrun && !engine->input.last_chunk) + break; + } + + *stream = engine->input; + return !run; // done if engine no longer running +} + +HParseResult *h_lr_parse_finish(HSuspendedParser *s) +{ + HLREngine *engine = s->backend_state; + HParseResult *result = h_lrengine_result(engine); + if(!result) + h_delete_arena(engine->arena); + h_delete_arena(engine->tarena); + return result; +} /* Pretty-printers */ @@ -536,3 +591,35 @@ void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, fputc('\n', f); #endif } + +HCFGrammar *h_pprint_lr_info(FILE *f, HParser *p) +{ + HAllocator *mm__ = &system_allocator; + + fprintf(f, "\n==== G R A M M A R ====\n"); + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); + if (g == NULL) { + fprintf(f, "h_cfgrammar failed\n"); + return NULL; + } + h_pprint_grammar(f, g, 0); + + fprintf(f, "\n==== D F A ====\n"); + HLRDFA *dfa = h_lr0_dfa(g); + if (dfa) { + h_pprint_lrdfa(f, g, dfa, 0); + } else { + fprintf(f, "h_lalr_dfa failed\n"); + } + + fprintf(f, "\n==== L R ( 0 ) T A B L E ====\n"); + HLRTable *table0 = h_lr0_table(g, dfa); + if (table0) { + h_pprint_lrtable(f, g, table0, 0); + } else { + fprintf(f, "h_lr0_table failed\n"); + } + h_lrtable_free(table0); + + return g; +} diff --git a/src/backends/lr.h b/src/backends/lr.h index 8f1eadd9059330b23c77e58aedfd680690b07950..724d126ce106e6ed98f86fd7e30c1d42938dd1cd 100644 --- a/src/backends/lr.h +++ b/src/backends/lr.h @@ -134,6 +134,9 @@ const HLRAction *h_lrengine_action(const HLREngine *engine); bool h_lrengine_step(HLREngine *engine, const HLRAction *action); HParseResult *h_lrengine_result(HLREngine *engine); HParseResult *h_lr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream); +void h_lr_parse_start(HSuspendedParser *s); +bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream); +HParseResult *h_lr_parse_finish(HSuspendedParser *s); HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream); void h_pprint_lritem(FILE *f, const HCFGrammar *g, const HLRItem *item); @@ -143,5 +146,6 @@ void h_pprint_lrdfa(FILE *f, const HCFGrammar *g, const HLRDFA *dfa, unsigned int indent); void h_pprint_lrtable(FILE *f, const HCFGrammar *g, const HLRTable *table, unsigned int indent); +HCFGrammar *h_pprint_lr_info(FILE *f, HParser *p); #endif diff --git a/src/cfgrammar.c b/src/cfgrammar.c index a8761b8d537ec236f7a4876e1ad86a30742df988..77e7ecad7ea1a70597a4c7c70ee21d9184a6c672 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -349,6 +349,7 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en return m->epsilon_branch; } +// A NULL result means no parse. NEED_INPUT means lookahead is too short. void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) { while(m) { @@ -362,9 +363,13 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) // reading bits from it does not consume them from the real input. uint8_t c = h_read_bits(&lookahead, 8, false); - if (lookahead.overrun) { // end of input - // XXX assumption of byte-wise grammar and input - return m->end_branch; + if (lookahead.overrun) { // end of chunk + if (lookahead.last_chunk) { // end of input + // XXX assumption of byte-wise grammar and input + return m->end_branch; + } else { + return NEED_INPUT; + } } // no match yet, descend @@ -672,7 +677,7 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret, } -void h_pprint_char(FILE *f, char c) +void h_pprint_char(FILE *f, uint8_t c) { switch(c) { case '"': fputs("\\\"", f); break; @@ -685,12 +690,12 @@ void h_pprint_char(FILE *f, char c) if (isprint((int)c)) { fputc(c, f); } else { - fprintf(f, "\\x%.2X", c); + fprintf(f, "\\x%.2X", (unsigned int)c); } } } -static void pprint_charset_char(FILE *f, char c) +static void pprint_charset_char(FILE *f, uint8_t c) { switch(c) { case '"': fputc(c, f); break; @@ -896,8 +901,8 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, if (map->epsilon_branch) { if (!first) { fputc(sep, file); - first=false; } + first=false; if (n==0) { fputs("\"\"", file); } else { @@ -915,8 +920,8 @@ pprint_stringmap_elems(FILE *file, bool first, char *prefix, size_t n, char sep, if (map->end_branch) { if (!first) { fputs(",\"", file); - first=false; } + first=false; if (n>0) { fputs("\"\"", file); } diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 9cefc62e83f07048dc2a24f0cda1bde28ca72066..2e8ba83cee5c152baae1177ed7b99d45cf11042c 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -56,6 +56,9 @@ bool h_stringmap_empty(const HStringMap *m); static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c) { return h_hashtable_get(m->char_branches, (void *)char_key(c)); } +// dummy return value used by h_stringmap_get_lookahead when out of input +#define NEED_INPUT ((void *)-1) + /* Convert 'parser' into CFG representation by desugaring and compiling the set * of nonterminals. @@ -102,4 +105,4 @@ void h_pprint_stringset(FILE *file, const HStringMap *set, int indent); void h_pprint_stringmap(FILE *file, char sep, void (*valprint)(FILE *f, void *env, void *val), void *env, const HStringMap *map); -void h_pprint_char(FILE *file, char c); +void h_pprint_char(FILE *file, uint8_t c); diff --git a/src/hammer.c b/src/hammer.c index 443c77b790b10b5592958a55e7d457bc695c030a..70ebc8a4943d8e1b3a25e036a745c2296bf8ddfd 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -43,6 +43,7 @@ typedef struct { +#define DEFAULT_ENDIANNESS (BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN) HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { return h_parse__m(&system_allocator, parser, input, length); @@ -50,12 +51,14 @@ HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { // Set up a parse state... HInputStream input_stream = { + .pos = 0, .index = 0, .bit_offset = 0, .overrun = 0, - .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, + .endianness = DEFAULT_ENDIANNESS, .length = length, - .input = input + .input = input, + .last_chunk = true }; return backends[parser->backend]->parse(mm__, parser, &input_stream); @@ -96,3 +99,92 @@ int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, cons parser->backend = backend; return ret; } + + +HSuspendedParser* h_parse_start(const HParser* parser) { + return h_parse_start__m(&system_allocator, parser); +} +HSuspendedParser* h_parse_start__m(HAllocator* mm__, const HParser* parser) { + if(!backends[parser->backend]->parse_start) + return NULL; + + // allocate and init suspended state + HSuspendedParser *s = h_new(HSuspendedParser, 1); + if(!s) + return NULL; + s->mm__ = mm__; + s->parser = parser; + s->backend_state = NULL; + s->done = false; + s->pos = 0; + s->bit_offset = 0; + s->endianness = DEFAULT_ENDIANNESS; + + // backend-specific initialization + // should allocate s->backend_state + backends[parser->backend]->parse_start(s); + + return s; +} + +bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length) { + assert(backends[s->parser->backend]->parse_chunk != NULL); + + // no-op if parser is already done + if(s->done) + return true; + + // input + HInputStream input_stream = { + .pos = s->pos, + .index = 0, + .bit_offset = 0, + .overrun = 0, + .endianness = s->endianness, + .length = length, + .input = input, + .last_chunk = false + }; + + // process chunk + s->done = backends[s->parser->backend]->parse_chunk(s, &input_stream); + s->endianness = input_stream.endianness; + s->pos += input_stream.index; + s->bit_offset = input_stream.bit_offset; + + return s->done; +} + +HParseResult* h_parse_finish(HSuspendedParser* s) { + assert(backends[s->parser->backend]->parse_chunk != NULL); + assert(backends[s->parser->backend]->parse_finish != NULL); + + HAllocator *mm__ = s->mm__; + + // signal end of input if parser is not already done + if(!s->done) { + HInputStream empty = { + .pos = s->pos, + .index = 0, + .bit_offset = 0, + .overrun = 0, + .endianness = s->endianness, + .length = 0, + .input = NULL, + .last_chunk = true + }; + + s->done = backends[s->parser->backend]->parse_chunk(s, &empty); + assert(s->done); + } + + // extract result + HParseResult *r = backends[s->parser->backend]->parse_finish(s); + if(r) + r->bit_length = s->pos * 8 + s->bit_offset; + + // NB: backend should have freed backend_state + h_free(s); + + return r; +} diff --git a/src/hammer.h b/src/hammer.h index 42c73458a4d0e513f4400e1a3c6790e9cc736a9e..1be297c7a3b1230f2595ba47366a6591946b8777 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -140,6 +140,8 @@ typedef struct HParser_ { HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ } HParser; +typedef struct HSuspendedParser_ HSuspendedParser; + /** * Type of an action to apply to an AST, used in the action() parser. * It can be any (user-defined) function that takes a HParseResult* @@ -265,6 +267,27 @@ typedef struct HBenchmarkResults_ { */ HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length); +/** + * Initialize a parser for iteratively consuming an input stream in chunks. + * This is only supported by some backends. + * + * Result is NULL if not supported by the backend. + */ +HAMMER_FN_DECL(HSuspendedParser*, h_parse_start, const HParser* parser); + +/** + * Run a suspended parser (as returned by h_parse_start) on a chunk of input. + * + * Returns true if the parser is done (needs no more input). + */ +bool h_parse_chunk(HSuspendedParser* s, const uint8_t* input, size_t length); + +/** + * Finish an iterative parse. Signals the end of input to the backend and + * returns the parse result. + */ +HParseResult* h_parse_finish(HSuspendedParser* s); + /** * Given a string, returns a parser that parses that string value. * diff --git a/src/internal.h b/src/internal.h index 9aac4ee7dbaa4c4a1b8e87785b98f22265f37c71..b11186dd7f39c16d0e5485ce7beb111527e550ba 100644 --- a/src/internal.h +++ b/src/internal.h @@ -72,13 +72,15 @@ typedef struct HCFStack_ HCFStack; typedef struct HInputStream_ { // This should be considered to be a really big value type. const uint8_t *input; + size_t pos; // position of this chunk in a multi-chunk stream size_t index; size_t length; char bit_offset; char margin; // The number of bits on the end that is being read // towards that should be ignored. char endianness; - char overrun; + bool overrun; + bool last_chunk; } HInputStream; typedef struct HSlistNode_ { @@ -210,10 +212,32 @@ struct HParseState_ { HSlist *symbol_table; // its contents are HHashTables }; +struct HSuspendedParser_ { + HAllocator *mm__; + const HParser *parser; + void *backend_state; + bool done; + + // input stream state + size_t pos; + uint8_t bit_offset; + uint8_t endianness; +}; + typedef struct HParserBackendVTable_ { int (*compile)(HAllocator *mm__, HParser* parser, const void* params); HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HInputStream* stream); void (*free)(HParser* parser); + + void (*parse_start)(HSuspendedParser *s); + // parse_start should allocate s->backend_state. + bool (*parse_chunk)(HSuspendedParser *s, HInputStream *input); + // if parser is done, return true. otherwise: + // parse_chunk MUST consume all input, integrating it into s->backend_state. + // parse_chunk will not be called again after it reports done. + HParseResult *(*parse_finish)(HSuspendedParser *s); + // parse_finish must free s->backend_state. + // parse_finish will not be called before parse_chunk reports done. } HParserBackendVTable; diff --git a/src/parsers/many.c b/src/parsers/many.c index 51d733fcf87e3191e6f413a9513ac7900d29d8f2..cae2b0eade03450cae13f48e8f53c37db4237721 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -59,6 +59,32 @@ static bool many_isValidCF(void *env) { repeat->sep->vtable->isValidCF(repeat->sep->env))); } +// turn (_ x (_ y (_ z ()))) into (x y z) where '_' are optional +static HParsedToken *reshape_many(const HParseResult *p, void *user) +{ + HCountedArray *seq = h_carray_new(p->arena); + + const HParsedToken *tok = p->ast; + while(tok) { + assert(tok->token_type == TT_SEQUENCE); + if(tok->seq->used > 0) { + size_t n = tok->seq->used; + assert(n <= 3); + h_carray_append(seq, tok->seq->elements[n-2]); + tok = tok->seq->elements[n-1]; + } else { + tok = NULL; + } + } + + HParsedToken *res = a_new_(p->arena, HParsedToken, 1); + res->token_type = TT_SEQUENCE; + res->seq = seq; + res->index = p->ast->index; + res->bit_offset = p->ast->bit_offset; + return res; +} + static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { // TODO: refactor this. HRepeat *repeat = (HRepeat*)env; @@ -93,7 +119,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { HCFS_BEGIN_CHOICE() { // Mar HCFS_BEGIN_SEQ() { if (repeat->sep != NULL) { - HCFS_DESUGAR(h_ignore__m(mm__, repeat->sep)); + HCFS_DESUGAR(repeat->sep); } //stk__->last_completed->reshape = h_act_ignore; // BUG: This modifies a memoized entry. HCFS_DESUGAR(repeat->p); @@ -108,7 +134,7 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { //HCFS_DESUGAR(h_ignore__m(mm__, h_epsilon_p())); } HCFS_END_SEQ(); } - HCFS_THIS_CHOICE->reshape = h_act_flatten; + HCFS_THIS_CHOICE->reshape = reshape_many; } HCFS_END_CHOICE(); } diff --git a/src/t_parser.c b/src/t_parser.c index df9567ed201b1d07d1ebdf9e815fd625ba8de5c8..3d54ff6bf3bd6d75fcb9c73732a7a04025d5bcf3 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -443,6 +443,115 @@ static void test_rightrec(gconstpointer backend) { g_check_parse_match(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))"); } +static void test_iterative(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + p = h_token((uint8_t*)"foobar", 6); + g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_failed(p, be, "fou",3, "bar",3); + g_check_parse_chunks_failed(p, be, "foo",3, "par",3); + g_check_parse_chunks_failed(p, be, "foo",3, "baz",3); + + p = h_sequence(h_ch('f'), h_token((uint8_t*)"ooba", 4), h_ch('r'), NULL); + g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunks_failed(p, be, "fou",3, "bar",3); + g_check_parse_chunks_failed(p, be, "foo",3, "par",3); + g_check_parse_chunks_failed(p, be, "foo",3, "baz",3); + + p = h_choice(h_token((uint8_t*)"foobar", 6), + h_token((uint8_t*)"phupar", 6), NULL); + g_check_parse_chunks_match(p, be, "foo",3, "bar",3, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "foo",3, "barbaz",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "phu",3, "par",3, "<70.68.75.70.61.72>"); + g_check_parse_chunks_failed(p, be, "fou",3, "bar",3); + g_check_parse_chunks_failed(p, be, "foo",3, "baz",3); + g_check_parse_chunks_match(p, be, "foobar",6, "",0, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_match(p, be, "",0, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunks_failed(p, be, "foo",3, "",0); + g_check_parse_chunks_failed(p, be, "",0, "foo",3); + + p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2), + h_token((uint8_t*)"uu", 2), NULL), NULL); + g_check_parse_chunks_match(p, be, "f",1, "oo",2, "(u0x66 <6f.6f>)"); + g_check_parse_chunks_match(p, be, "f",1, "uu",2, "(u0x66 <75.75>)"); + g_check_parse_chunks_failed(p, be, "g",1, "oo",2); + g_check_parse_chunks_failed(p, be, "f",1, "ou",2); + g_check_parse_chunks_failed(p, be, "f",1, "uo",2); +} + +static void test_iterative_lookahead(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + // needs 2 lookahead + p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2), + h_token((uint8_t*)"ou", 2), NULL), NULL); + if(h_compile(p, be, (void *)2) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + // partial chunk consumed + g_check_parse_chunks_match_(p, "fo",2, "o",1, "(u0x66 <6f.6f>)"); + g_check_parse_chunks_match_(p, "fo",2, "u",1, "(u0x66 <6f.75>)"); + g_check_parse_chunks_failed_(p, "go",2, "o",1); + g_check_parse_chunks_failed_(p, "fa",2, "u",1); + g_check_parse_chunks_failed_(p, "fo",2, "b",1); +} + +static void test_iterative_result_length(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p = h_token((uint8_t*)"foobar", 6); + + if(h_compile(p, be, NULL) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + HSuspendedParser *s = h_parse_start(p); + if(!s) { + g_test_message("Chunked parsing not available"); + g_test_fail(); + return; + } + h_parse_chunk(s, (uint8_t*)"foo", 3); + h_parse_chunk(s, (uint8_t*)"ba", 2); + h_parse_chunk(s, (uint8_t*)"rbaz", 4); + HParseResult *r = h_parse_finish(s); + if(!r) { + g_test_message("Parse failed"); + g_test_fail(); + return; + } + + g_check_cmp_int64(r->bit_length, ==, 48); +} + +static void test_result_length(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p = h_token((uint8_t*)"foo", 3); + + if(h_compile(p, be, NULL) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + HParseResult *r = h_parse(p, (uint8_t*)"foobar", 6); + if(!r) { + g_test_message("Parse failed"); + g_test_fail(); + return; + } + + g_check_cmp_int64(r->bit_length, ==, 24); +} + static void test_ambiguous(gconstpointer backend) { HParser *d_ = h_ch('d'); HParser *p_ = h_ch('+'); @@ -653,6 +762,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); + g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); @@ -691,6 +801,10 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore); //g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec); g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec); + g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length); + g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative); + g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead); + g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length); g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token); g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch); @@ -703,8 +817,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/uint32", GINT_TO_POINTER(PB_REGULAR), test_uint32); g_test_add_data_func("/core/parser/regex/uint16", GINT_TO_POINTER(PB_REGULAR), test_uint16); g_test_add_data_func("/core/parser/regex/uint8", GINT_TO_POINTER(PB_REGULAR), test_uint8); - g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range); #if 0 + g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range); g_test_add_data_func("/core/parser/regex/float64", GINT_TO_POINTER(PB_REGULAR), test_float64); g_test_add_data_func("/core/parser/regex/float32", GINT_TO_POINTER(PB_REGULAR), test_float32); #endif @@ -728,6 +842,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/epsilon_p", GINT_TO_POINTER(PB_REGULAR), test_epsilon_p); g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool); g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore); + g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length); g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token); g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch); @@ -767,6 +882,10 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec); g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne); g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); + g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length); + g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative); + g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead); + g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length); g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token); g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch); @@ -807,4 +926,5 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne); g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec); g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous); + g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length); } diff --git a/src/t_regression.c b/src/t_regression.c index c124d6b07f76360d8f0bc07e1a53ef9cf879083d..0cc0eb1eb23c735165da575491cfe83b0c5d442e 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -138,10 +138,34 @@ static void test_wrong_bit_length(void) { h_parse_result_free(r); } +static void test_lalr_charset_lhs(void) { + HParserBackend be = PB_LALR; + + HParser *p = h_choice(h_ch('A'), h_uint8(), NULL); + + // the above would fail to compile because of an unhandled case in trying + // to resolve a conflict where an item's left-hand-side was an HCF_CHARSET. + + g_check_parse_match(p, be, "A",1, "u0x41"); + g_check_parse_match(p, be, "B",1, "u0x42"); +} + +static void test_cfg_many_seq(void) { + HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL)); + + g_check_parse_match(p, PB_LLk, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + g_check_parse_match(p, PB_GLR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); + // these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty + // reshape on h_many. +} + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); g_test_add_func("/core/regression/read_bits_48", test_read_bits_48); g_test_add_func("/core/regression/llk_zero_end", test_llk_zero_end); g_test_add_func("/core/regression/wrong_bit_length", test_wrong_bit_length); + g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs); + g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq); } diff --git a/src/test_suite.h b/src/test_suite.h index 82fe495522116a8fe9ff4ab11f9f65904f3fd34c..49f13cf81c50864eb8ae03ed705f582a7dd1ca0f 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -90,7 +90,8 @@ #define g_check_parse_failed(parser, backend, input, inp_len) do { \ int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \ if(skip != 0) { \ - g_test_message("Backend not applicable, skipping test"); \ + g_test_message("Compile failed"); \ + g_test_fail(); \ break; \ } \ const HParseResult *result = h_parse(parser, (const uint8_t*)input, inp_len); \ @@ -103,7 +104,8 @@ #define g_check_parse_ok(parser, backend, input, inp_len) do { \ int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ if(skip) { \ - g_test_message("Backend not applicable, skipping test"); \ + g_test_message("Compile failed"); \ + g_test_fail(); \ break; \ } \ HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \ @@ -124,7 +126,8 @@ #define g_check_parse_match(parser, backend, input, inp_len, result) do { \ int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ if(skip) { \ - g_test_message("Backend not applicable, skipping test"); \ + g_test_message("Compile failed"); \ + g_test_fail(); \ break; \ } \ HParseResult *res = h_parse(parser, (const uint8_t*)input, inp_len); \ @@ -145,6 +148,69 @@ } \ } while(0) +#define g_check_parse_chunks_failed(parser, backend, chunk1, c1_len, chunk2, c2_len) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunks_failed_(parser, chunk1, c1_len, chunk2, c2_len); \ + } while(0) + +#define g_check_parse_chunks_failed_(parser, chunk1, c1_len, chunk2, c2_len) do { \ + HSuspendedParser *s = h_parse_start(parser); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + h_parse_chunk(s, (const uint8_t*)chunk2, c2_len); \ + const HParseResult *res = h_parse_finish(s); \ + if (NULL != res) { \ + g_test_message("Check failed: shouldn't have succeeded, but did"); \ + g_test_fail(); \ + } \ + } while(0) + +#define g_check_parse_chunks_match(parser, backend, chunk1, c1_len, chunk2, c2_len, result) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunks_match_(parser, chunk1, c1_len, chunk2, c2_len, result); \ + } while(0) + +#define g_check_parse_chunks_match_(parser, chunk1, c1_len, chunk2, c2_len, result) do { \ + HSuspendedParser *s = h_parse_start(parser); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + h_parse_chunk(s, (const uint8_t*)chunk2, c2_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + char* cres = h_write_result_unamb(res->ast); \ + g_check_string(cres, ==, result); \ + (&system_allocator)->free(&system_allocator, cres); \ + HArenaStats stats; \ + h_allocator_stats(res->arena, &stats); \ + g_test_message("Parse used %zd bytes, wasted %zd bytes. " \ + "Inefficiency: %5f%%", \ + stats.used, stats.wasted, \ + stats.wasted * 100. / (stats.used+stats.wasted)); \ + h_delete_arena(res->arena); \ + } \ + } while(0) + #define g_check_hashtable_present(table, key) do { \ if(!h_hashtable_present(table, key)) { \ g_test_message("Check failed: key should have been in table, but wasn't"); \