diff --git a/SConstruct b/SConstruct index 9349525eb22aa978f1a64015dbf3f6ce6cca97e3..0aa5670f6c3020afbea5290b16ce2b82e0aac5b2 100644 --- a/SConstruct +++ b/SConstruct @@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function import os import os.path import platform +import subprocess import sys default_install_dir='/usr/local' @@ -21,6 +22,17 @@ tools = ['default', 'scanreplace'] if 'dotnet' in ARGUMENTS.get('bindings', []): tools.append('csharp/mono') +# add the clang tool if necessary +if os.getenv('CC') == 'clang' or platform.system() == 'Darwin': + tools.append('clang') +else: + # try to detect if cc happens to be clang by inspecting --version + cc = os.getenv('CC') or 'cc' + ver = subprocess.run([cc, '--version'], capture_output=True).stdout + if b'clang' in ver.split(): + tools.append('clang') + os.environ['CC'] = cc # make sure we call it as we saw it + envvars = {'PATH' : os.environ['PATH']} if 'PKG_CONFIG_PATH' in os.environ: envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH'] @@ -99,10 +111,7 @@ AddOption('--tests', env['CC'] = os.getenv('CC') or env['CC'] env['CXX'] = os.getenv('CXX') or env['CXX'] - -if os.getenv('CC') == 'clang' or env['PLATFORM'] == 'darwin': - env.Replace(CC='clang', - CXX='clang++') +env['CFLAGS'] = os.getenv('CFLAGS') or env['CFLAGS'] # Language standard and warnings if env['CC'] == 'cl': diff --git a/src/allocator.c b/src/allocator.c index 93c94e1de7d79cf35542faf54e67938f5a0d5a4a..b4dd94f3401b5aa3ab329935f4971a5dc2ee4c4d 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -61,7 +61,14 @@ static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero); void* h_alloc(HAllocator* mm__, size_t size) { void *p = mm__->alloc(mm__, size); if(!p) - h_platform_errx(1, "memory allocation failed (%uB requested)\n", (unsigned int)size); + h_platform_errx(1, "memory allocation failed (%zuB requested)\n", size); + return p; +} + +void* h_realloc(HAllocator* mm__, void* ptr, size_t size) { + void *p = mm__->realloc(mm__, ptr, size); + if(!p) + h_platform_errx(1, "memory reallocation failed (%zuB requested)\n", size); return p; } diff --git a/src/allocator.h b/src/allocator.h index 8ebde72308997aa5ac56885d1fa5d45078659170..f98ec899bb872acb105e7b16120f448d34c2d8ca 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -48,6 +48,7 @@ typedef struct HAllocator_ { } HAllocator; void* h_alloc(HAllocator* allocator, size_t size) ATTR_MALLOC(2); +void* h_realloc(HAllocator* allocator, void* ptr, size_t size); typedef struct HArena_ HArena ; // hidden implementation diff --git a/src/backends/packrat.c b/src/backends/packrat.c index d4e918dfe72411d2a9a6c0bd4db7df7d5bcf1784..e69e02b4dc5e522cf2ff7f28caac8536d7691fdd 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -34,37 +34,39 @@ HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) { return ret; } -// Really library-internal tool to perform an uncached parse, and handle any common error-handling. -static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) { - // TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when. - HParseResult *tmp_res; - if (parser) { - HInputStream bak = state->input_stream; - tmp_res = parser->vtable->parse(parser->env, state); - if (tmp_res) { - tmp_res->arena = state->arena; - if (!state->input_stream.overrun) { - size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak); - if (tmp_res->bit_length == 0) { // Don't modify if forwarding. - tmp_res->bit_length = bit_length; - } - if (tmp_res->ast && tmp_res->ast->bit_length != 0) { - ((HParsedToken*)(tmp_res->ast))->bit_length = bit_length; - } - } else - tmp_res->bit_length = 0; - } - } else - tmp_res = NULL; - if (state->input_stream.overrun) - return NULL; // overrun is always failure. -#ifdef CONSISTENCY_CHECK - if (!tmp_res) { - state->input_stream = INVALID; - state->input_stream.input = key->input_pos.input; +// internal helper to perform an uncached parse and common error-handling +static inline +HParseResult *perform_lowlevel_parse(HParseState *state, const HParser *parser) +{ + HParseResult *res; + HInputStream bak; + size_t len; + + if (!parser) + return NULL; + + bak = state->input_stream; + res = parser->vtable->parse(parser->env, state); + + if (!res) + return NULL; // NB: input position is considered invalid on failure + + // combinators' parse functions by design do not have to check for overrun. + // turn such bogus successes into parse failure. + if (state->input_stream.overrun) { + res->bit_length = 0; + return NULL; } -#endif - return tmp_res; + + // update result length + res->arena = state->arena; + len = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak); + if (res->bit_length == 0) // Don't modify if forwarding. + res->bit_length = len; + if (res->ast && res->ast->bit_length != 0) + ((HParsedToken *)(res->ast))->bit_length = len; + + return res; } HParserCacheValue* recall(HParserCacheKey *k, HParseState *state, HHashValue keyhash) { @@ -326,6 +328,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr parse_state->arena = arena; parse_state->symbol_table = NULL; HParseResult *res = h_do_parse(parser, parse_state); + *input_stream = parse_state->input_stream; h_slist_free(parse_state->lr_stack); h_hashtable_free(parse_state->recursion_heads); // tear down the parse state @@ -336,10 +339,118 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr return res; } +// The following naive implementation of the iterative (chunked) parsing API +// concatenates chunks and blindly re-runs the full parse on every call to +// h_packrat_parse_chunk. +// +// NB: A full implementation will still have to concatenate the chunks to +// support arbitrary backtracking, but should be able save much, if not all, of +// the HParseState between calls. +// Cutting unneeded past input should also be possible but is complicated by +// the fact that only higher-order combinators are saved to the packrat cache, +// so former input to bare primitive combinators must remain available. +// +// Note: The iterative API expects us to always consume an entire input chunk +// when we suspend, even if packrat later backtracks into it. We will produce +// the correct parse result and accurately consume from a final chunk, but all +// earlier chunks will be reported as fully consumed and as being part of the +// HParseResult in terms of its bit_length field. + +void h_packrat_parse_start(HSuspendedParser *s) +{ + // nothing to do here, we allocate lazily below +} + +bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input) +{ + HAllocator *mm__ = s->mm__; + HParseResult *res; + HInputStream *cat; + size_t newlen; + + if (s->backend_state == NULL) { // this is the first chunk + // attempt to finish the parse on just the given input. + res = h_packrat_parse(mm__, s->parser, input); + if (input->last_chunk || !input->overrun) { + s->backend_state = res; // pass on the result + return true; // and signal we're done + } + + // we ran out of input and are expecting more + // allocate and initialize an input stream to concatenate the chunks + cat = h_new(HInputStream, 1); + *cat = *input; + cat->input = h_alloc(mm__, input->length); + memcpy((void *)cat->input, input->input, input->length); + s->backend_state = cat; + + goto suspend; + } + + // we have received additional input - append it to the saved stream + cat = s->backend_state; + assert(input->pos == cat->length); + if (input->length > SIZE_MAX - cat->length) + h_platform_errx(1, "input length would overflow"); + newlen = cat->length + input->length; + cat->input = h_realloc(mm__, (void *)cat->input, newlen); + memcpy((void *)cat->input + cat->length, input->input, input->length); + cat->length = newlen; + cat->last_chunk = input->last_chunk; + + // reset our input stream and call the parser on it (again) + cat->index = 0; + cat->bit_offset = 0; + cat->margin = 0; + cat->endianness = DEFAULT_ENDIANNESS; + cat->overrun = false; + res = h_packrat_parse(mm__, s->parser, cat); + assert(cat->index <= cat->length); + input->overrun = cat->overrun; + + // suspend if the parser still needs more input + if (input->overrun && !input->last_chunk) + goto suspend; + // otherwise the parse is finished... + + // report final input position + if (cat->index < input->pos) { // parser just needed some lookahead + input->index = 0; // don't consume this last chunk + input->bit_offset = 0; + input->margin = 0; + } else { + input->index = cat->index - input->pos; + input->bit_offset = cat->bit_offset; + input->margin = cat->margin; + input->endianness = cat->endianness; + } + + // clean up and return the result + h_free((void *)cat->input); + h_free(cat); + s->backend_state = res; + + return true; // don't call me again. + +suspend: + input->index = input->length; // consume the entire chunk on suspend + input->margin = 0; + input->bit_offset = 0; + return false; // come back with more input. +} + +HParseResult *h_packrat_parse_finish(HSuspendedParser *s) +{ + return s->backend_state; +} + HParserBackendVTable h__packrat_backend_vtable = { .compile = h_packrat_compile, .parse = h_packrat_parse, .free = h_packrat_free, + .parse_start = h_packrat_parse_start, + .parse_chunk = h_packrat_parse_chunk, + .parse_finish = h_packrat_parse_finish, /* Name/param resolution functions */ .backend_short_name = "packrat", .backend_description = "Packrat parser with Warth's recursion", diff --git a/src/hammer.c b/src/hammer.c index 50453392aad5d997849b508fb900ea2f5637823e..648dbe2773a754103d6e85d750a9e2bf5a367316 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -558,8 +558,6 @@ HParserBackendWithParams * h_get_backend_with_params_by_name(const char *name_wi } -#define DEFAULT_ENDIANNESS (BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN) - HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { return h_parse__m(&system_allocator, parser, input, length); } diff --git a/src/internal.h b/src/internal.h index c2244d4f81f91c889156aee89248c9ed932cc1a5..a53c5914ad9ee2d5996dfe6c2d76aa51242ef43b 100644 --- a/src/internal.h +++ b/src/internal.h @@ -69,6 +69,8 @@ extern HAllocator system_allocator; typedef struct HCFStack_ HCFStack; +#define DEFAULT_ENDIANNESS (BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN) + typedef struct HInputStream_ { // This should be considered to be a really big value type. const uint8_t *input; @@ -368,12 +370,14 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p); void h_skip_bits(HInputStream* state, size_t count); void h_seek_bits(HInputStream* state, size_t pos); static inline size_t h_input_stream_pos(HInputStream* state) { - assert(state->index < SIZE_MAX / 8); - return state->index * 8 + state->bit_offset + state->margin; + assert(state->pos <= SIZE_MAX - state->index); + assert(state->pos + state->index < SIZE_MAX / 8); + return (state->pos + state->index) * 8 + state->bit_offset + state->margin; } static inline size_t h_input_stream_length(HInputStream *state) { - assert(state->length <= SIZE_MAX / 8); - return state->length * 8; + assert(state->pos <= SIZE_MAX - state->length); + assert(state->pos + state->length <= SIZE_MAX / 8); + return (state->pos + state->length) * 8; } // need to decide if we want to make this public. HParseResult* h_do_parse(const HParser* parser, HParseState *state); diff --git a/src/parsers/and.c b/src/parsers/and.c index e07bc9fcd36c91005d01eeef8d554a8202dce9ae..e27b59a6add6d3371fb5d714644423c910751e5e 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -3,18 +3,18 @@ static HParseResult *parse_and(void* env, HParseState* state) { HInputStream bak = state->input_stream; HParseResult *res = h_do_parse((HParser*)env, state); + if (!res) + return NULL; // propagate failed input state, esp. overrun state->input_stream = bak; - if (res) - return make_result(state->arena, NULL); - return NULL; + return make_result(state->arena, NULL); } static const HParserVtable and_vt = { .parse = parse_and, .isValidRegular = h_false, /* TODO: strictly speaking this should be regular, - but it will be a huge amount of work and difficult - to get right, so we're leaving it for a future - revision. --mlp, 18/12/12 */ + but it will be a huge amount of work and + difficult to get right, so we're leaving it for + a future revision. --mlp, 18/12/12 */ .isValidCF = h_false, /* despite TODO above, this remains false. */ .compile_to_rvm = h_not_regular, .higher = true, diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c index 24ece4bec6f7f80b0905401be6e72b10f73769f8..1e809f5ebc141ec0dc6bef978f5badb68aa9a28b 100644 --- a/src/parsers/butnot.c +++ b/src/parsers/butnot.c @@ -5,7 +5,6 @@ typedef struct { const HParser *p2; } HTwoParsers; - static HParseResult* parse_butnot(void *env, HParseState *state) { HTwoParsers *parsers = (HTwoParsers*)env; // cache the initial state of the input stream @@ -19,15 +18,18 @@ static HParseResult* parse_butnot(void *env, HParseState *state) { HInputStream after_p1_state = state->input_stream; state->input_stream = start_state; HParseResult *r2 = h_do_parse(parsers->p2, state); - // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases + // don't touch the input state (overrun flag) if we must suspend + if (want_suspend(state)) { + return NULL; + } + // in all other cases, the input stream should be in the post-p1 state state->input_stream = after_p1_state; - // if p2 failed, restore post-p1 state and bail out early if (NULL == r2) { return r1; } size_t r1len = token_length(r1); size_t r2len = token_length(r2); - // if both match but p1's text is shorter than than p2's (or the same length), fail + // if both match but p1's text no longer than p2's, fail if (r1len <= r2len) { return NULL; } else { diff --git a/src/parsers/choice.c b/src/parsers/choice.c index bb6d9c94f5f80c9bc9e85b0af71f125dc2ddc3d7..882ecbf85162c6ad11493141a0a406f9dd5c2a29 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -30,6 +30,8 @@ static HParseResult* parse_choice(void *env, HParseState *state) { HParseResult *tmp = h_do_parse(s->p_array[i], state); if (NULL != tmp) return tmp; + if (want_suspend(state)) + return NULL; // bail out early, leaving overrun flag } // nothing succeeded, so fail return NULL; diff --git a/src/parsers/difference.c b/src/parsers/difference.c index a24f5acf378c6b801677364f7a5902ae49ec60f1..bdc7a40a1bc23ba60306d5de6a421dfaa363f0e1 100644 --- a/src/parsers/difference.c +++ b/src/parsers/difference.c @@ -18,9 +18,12 @@ static HParseResult* parse_difference(void *env, HParseState *state) { HInputStream after_p1_state = state->input_stream; state->input_stream = start_state; HParseResult *r2 = h_do_parse(parsers->p2, state); - // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases + // don't touch the input state (overrun flag) if we must suspend + if (want_suspend(state)) { + return NULL; + } + // in all other cases, the input stream should be in the post-p1 state state->input_stream = after_p1_state; - // if p2 failed, restore post-p1 state and bail out early if (NULL == r2) { return r1; } @@ -34,7 +37,7 @@ static HParseResult* parse_difference(void *env, HParseState *state) { } } -static HParserVtable difference_vt = { +static const HParserVtable difference_vt = { .parse = parse_difference, .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? diff --git a/src/parsers/end.c b/src/parsers/end.c index 35e4186d430d8b48fe5cd1e41552403d6f95e562..754bb7f5353e5e06e3fa61f6444244c982187bc1 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -1,13 +1,19 @@ #include "parser_internal.h" -static HParseResult* parse_end(void *env, HParseState *state) { - if (state->input_stream.index == state->input_stream.length) { +static HParseResult* parse_end(void *env, HParseState *state) +{ + if (state->input_stream.index < state->input_stream.length) + return NULL; + + assert(state->input_stream.index == state->input_stream.length); + if (state->input_stream.last_chunk) { HParseResult *ret = a_new(HParseResult, 1); ret->ast = NULL; ret->bit_length = 0; ret->arena = state->arena; return ret; } else { + state->input_stream.overrun = true; // need more input return NULL; } } diff --git a/src/parsers/many.c b/src/parsers/many.c index 655dd1497667f44b3de6694a0109c6f58804167d..f879211f6aa5d750ab9b78216b65696cfb707cb5 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -21,17 +21,16 @@ static HParseResult *parse_many(void* env, HParseState *state) { if (count > 0 && env_->sep != NULL) { HParseResult *sep = h_do_parse(env_->sep, state); if (!sep) - goto err0; + goto stop; } HParseResult *elem = h_do_parse(env_->p, state); if (!elem) - goto err0; + goto stop; if (elem->ast) h_carray_append(seq, (void*)elem->ast); count++; } - if (count < env_->count) - goto err; + assert(count == env_->count); succ: ; // necessary for the label to be here... HParsedToken *res = a_new(HParsedToken, 1); @@ -41,13 +40,13 @@ static HParseResult *parse_many(void* env, HParseState *state) { res->bit_length = 0; res->bit_offset = 0; return make_result(state->arena, res); - err0: + stop: + if (want_suspend(state)) + return NULL; // bail out early, leaving overrun flag if (count >= env_->count) { state->input_stream = bak; goto succ; } - err: - state->input_stream = bak; return NULL; } diff --git a/src/parsers/not.c b/src/parsers/not.c index 8c2003dec77b946c50db3d0f62b7117a8ff12f69..a64f917725a17eafcd7eb558a96c73494a04e9ba 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -4,10 +4,11 @@ static HParseResult* parse_not(void* env, HParseState* state) { HInputStream bak = state->input_stream; if (h_do_parse((HParser*)env, state)) return NULL; - else { - state->input_stream = bak; - return make_result(state->arena, NULL); - } + if (want_suspend(state)) + return NULL; // bail out early, leaving overrun flag + // regular parse failure -> success + state->input_stream = bak; + return make_result(state->arena, NULL); } static const HParserVtable not_vt = { diff --git a/src/parsers/optional.c b/src/parsers/optional.c index 6a2789e2d0a86a8c4e6b141825fd3abc075af5d0..e37446a43dcdbb193b7068e6597eee12878d3a36 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -6,6 +6,8 @@ static HParseResult* parse_optional(void* env, HParseState* state) { HParseResult *res0 = h_do_parse((HParser*)env, state); if (res0) return res0; + if (want_suspend(state)) + return NULL; // bail out early, leaving overrun flag state->input_stream = bak; HParsedToken *ast = a_new(HParsedToken, 1); ast->token_type = TT_NONE; diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index ebc5f4b32992d77cae4e6b512d17e3a39729e14f..945d48f695b3470a3c25a8642c1a3aa9656d9710 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -1,10 +1,10 @@ /* * NOTE: This is an internal header and installed for use by extensions. The * API is not guaranteed stable. -*/ + */ -#ifndef HAMMER_PARSE_INTERNAL__H -#define HAMMER_PARSE_INTERNAL__H +#ifndef HAMMER_PARSER_INTERNAL__H +#define HAMMER_PARSER_INTERNAL__H #include "../hammer.h" #include "../internal.h" #include "../backends/regex.h" @@ -32,6 +32,11 @@ static inline size_t token_length(HParseResult *pr) { } } +// tell if we should suspend a parser, i.e. overrun occured before final input +static inline bool want_suspend(HParseState *state) { + return state->input_stream.overrun && !state->input_stream.last_chunk; +} + /* Epsilon rules happen during desugaring. This handles them. */ static inline void desugar_epsilon(HAllocator *mm__, HCFStack *stk__, void *env) { HCFS_BEGIN_CHOICE() { @@ -41,4 +46,4 @@ static inline void desugar_epsilon(HAllocator *mm__, HCFStack *stk__, void *env) } HCFS_END_CHOICE(); } -#endif // HAMMER_PARSE_INTERNAL__H +#endif // HAMMER_PARSER_INTERNAL__H diff --git a/src/parsers/permutation.c b/src/parsers/permutation.c index a50f4c1582af85434e4d9856407677273eba496c..f6f170a6093001d0d0c9941c4885e237445ddee2 100644 --- a/src/parsers/permutation.c +++ b/src/parsers/permutation.c @@ -29,6 +29,9 @@ static int parse_permutation_tail(const HSequence *s, if(set[i]) { match = h_do_parse(ps[i], state); + if (want_suspend(state)) + return 0; // bail out early, leaving overrun flag + // save result if(match) seq->elements[i] = (void *)match->ast; diff --git a/src/parsers/seek.c b/src/parsers/seek.c index d5bc02840f0145dc3089e82c11068359932b454c..e1459d80a6275e27e5237fe23483100bc0ddad18 100644 --- a/src/parsers/seek.c +++ b/src/parsers/seek.c @@ -25,6 +25,10 @@ static HParseResult *parse_seek(void *env, HParseState *state) pos = 0; break; case SEEK_END: + if (!stream->last_chunk) { /* the end is not yet known! */ + stream->overrun = true; /* we need more input */ + return NULL; + } pos = h_input_stream_length(stream); break; case SEEK_CUR: diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 970a32c8b57209a66f3588bddb4ea30de9f87454..e54c2170258532d7796a8bfa13923ea8075b77b0 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -8,6 +8,8 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { do { bak = state->input_stream; c = h_read_bits(&state->input_stream, 8, false); + if (want_suspend(state)) + return NULL; // bail out early, leaving overrun flag if (state->input_stream.overrun) break; } while (isspace((int)c)); diff --git a/src/parsers/xor.c b/src/parsers/xor.c index 3a3f21d27a928bf6d2d180eeb39763c918275fd0..08d791cce9ab7ae327d9c9d782b4ff64aa164d48 100644 --- a/src/parsers/xor.c +++ b/src/parsers/xor.c @@ -11,10 +11,14 @@ static HParseResult* parse_xor(void *env, HParseState *state) { // cache the initial state of the input stream HInputStream start_state = state->input_stream; HParseResult *r1 = h_do_parse(parsers->p1, state); + if (want_suspend(state)) + return NULL; // bail out early, leaving overrun flag HInputStream after_p1_state = state->input_stream; // reset input stream, parse again state->input_stream = start_state; HParseResult *r2 = h_do_parse(parsers->p2, state); + if (want_suspend(state)) + return NULL; // bail out early, leaving overrun flag if (NULL == r1) { if (NULL != r2) { return r2; diff --git a/src/t_parser.c b/src/t_parser.c index 356c38f1674d6d3f90e3b0da672646455437f7a2..dbeaabde7c4573018d0564383ec1a7f5d3069017 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -507,7 +507,72 @@ static void test_rightrec(gconstpointer backend) { g_check_parse_match(rr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(u0x61 (u0x61 (u0x61)))"); } -static void test_iterative(gconstpointer backend) { +static void test_iterative_single(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + p = h_token((uint8_t*)"foobar", 6); + g_check_parse_chunk_match(p, be, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foopar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_sequence(h_ch('f'), h_token((uint8_t*)"ooba", 4), h_ch('r'), NULL); + g_check_parse_chunk_match(p, be, "foobar",6, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "(u0x66 <6f.6f.62.61> u0x72)"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foopar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_choice(h_token((uint8_t*)"foobar", 6), + h_token((uint8_t*)"phupar", 6), NULL); + g_check_parse_chunk_match(p, be, "foobar",6, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "foobarbaz",9, "<66.6f.6f.62.61.72>"); + g_check_parse_chunk_match(p, be, "phupar",6, "<70.68.75.70.61.72>"); + g_check_parse_chunk_failed(p, be, "foubar",6); + g_check_parse_chunk_failed(p, be, "foobaz",6); + + p = h_sequence(h_ch('f'), h_choice(h_token((uint8_t*)"oo", 2), + h_token((uint8_t*)"uu", 2), NULL), NULL); + g_check_parse_chunk_match(p, be, "foo",3, "(u0x66 <6f.6f>)"); + g_check_parse_chunk_match(p, be, "fuu",3, "(u0x66 <75.75>)"); + g_check_parse_chunk_failed(p, be, "goo",3); + g_check_parse_chunk_failed(p, be, "fou",3); + g_check_parse_chunk_failed(p, be, "fuo",3); +} + +// this test applies to backends that support the iterative API, but not actual +// chunked operation. in such cases, passing multiple chunks should fail the +// parse rather than treating the end of the first chunk as the end of input. +#if 0 +static void test_iterative_dummy(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p; + + HParser *x = h_ch('x'); + HParser *y = h_ch('y'); + HParser *e = h_epsilon_p(); + + p = h_many(x); + g_check_parse_chunks_failed(p, be, "xxx",3, "xxx",3); + g_check_parse_chunks_failed(p, be, "xxx",3, "",0); + + p = h_optional(x); + g_check_parse_chunks_failed(p, be, "",0, "xxx",3); + + p = h_choice(x, e, NULL); + g_check_parse_chunks_failed(p, be, "",0, "xxx",3); + + // these are ok because the parse succeeds without overrun. + p = h_choice(e, x, NULL); + g_check_parse_chunks_match(p, be, "",0, "xxx",3, "NULL"); + p = h_choice(y, x, NULL); + g_check_parse_chunks_match(p, be, "y",1, "xxx",3, "u0x79"); +} +#endif + +static void test_iterative_multi(gconstpointer backend) { HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); HParser *p; @@ -567,6 +632,39 @@ static void test_iterative_lookahead(gconstpointer backend) { g_check_parse_chunks_failed_(p, "fo",2, "b",1); } +static void test_iterative_seek(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *p; + + // seeking should work across chunk boundaries... + + p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_ch('f'), NULL); + g_check_parse_chunks_match(p, be, "a",1, "bcdef",5, "(u0x61 u0x28 u0x66)"); + g_check_parse_chunks_failed(p, be, "a",1, "bcdex",5); + g_check_parse_chunks_failed(p, be, "a",1, "bc",2); + + p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_end_p(), NULL); + g_check_parse_chunks_match(p, be, "ab",2, "cde",3, "(u0x61 u0x28)"); + g_check_parse_chunks_failed(p, be, "ab",2, "cdex",4); + g_check_parse_chunks_failed(p, be, "ab",2, "c",1); + + p = h_sequence(h_ch('a'), h_seek(0, SEEK_END), h_end_p(), NULL); + g_check_parse_chunks_match(p, be, "abc",3, "de",2, "(u0x61 u0x28)"); + g_check_parse_chunks_match(p, be, "abc",3, "",0, "(u0x61 u0x18)"); + + p = h_sequence(h_ch('a'), h_seek(-16, SEEK_END), h_ch('x'), NULL); + g_check_parse_chunks_match(p, be, "abcd",4, "xy",2, "(u0x61 u0x20 u0x78)"); + g_check_parse_chunks_match(p, be, "abxy",4, "",0, "(u0x61 u0x10 u0x78)"); + g_check_parse_chunks_failed(p, be, "a",1, "bc",2); + g_check_parse_chunks_failed(p, be, "",0, "x",1); + + p = h_sequence(h_ch('a'), h_seek(32, SEEK_CUR), h_ch('f'), NULL); + g_check_parse_chunks_match(p, be, "abcde",5, "f",1, "(u0x61 u0x28 u0x66)"); + g_check_parse_chunks_failed(p, be, "xbcde",5, "f",1); + g_check_parse_chunks_failed(p, be, "abcde",5, "x",1); + g_check_parse_chunks_failed(p, be, "abc",3, "",0); +} + static void test_iterative_result_length(gconstpointer backend) { HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); HParser *p = h_token((uint8_t*)"foobar", 6); @@ -933,6 +1031,11 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length); //g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position); + g_test_add_data_func("/core/parser/packrat/iterative/single", GINT_TO_POINTER(PB_PACKRAT), test_iterative_single); + g_test_add_data_func("/core/parser/packrat/iterative/multi", GINT_TO_POINTER(PB_PACKRAT), test_iterative_multi); + g_test_add_data_func("/core/parser/packrat/iterative/lookahead", GINT_TO_POINTER(PB_PACKRAT), test_iterative_lookahead); + g_test_add_data_func("/core/parser/packrat/iterative/seek", GINT_TO_POINTER(PB_PACKRAT), test_iterative_seek); + g_test_add_data_func("/core/parser/packrat/iterative/result_length", GINT_TO_POINTER(PB_PACKRAT), test_iterative_result_length); g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip); g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek); g_test_add_data_func("/core/parser/packrat/tell", GINT_TO_POINTER(PB_PACKRAT), test_tell); @@ -978,7 +1081,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec); g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length); //g_test_add_data_func("/core/parser/llk/token_position", GINT_TO_POINTER(PB_LLk), test_token_position); - g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative); + g_test_add_data_func("/core/parser/llk/iterative/single", GINT_TO_POINTER(PB_LLk), test_iterative_single); + g_test_add_data_func("/core/parser/llk/iterative/multi", GINT_TO_POINTER(PB_LLk), test_iterative_multi); g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead); g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length); g_test_add_data_func("/core/parser/llk/drop_from", GINT_TO_POINTER(PB_LLk), test_drop_from); @@ -1064,7 +1168,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length); g_test_add_data_func("/core/parser/lalr/token_position", GINT_TO_POINTER(PB_LALR), test_token_position); - g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative); + g_test_add_data_func("/core/parser/lalr/iterative/single", GINT_TO_POINTER(PB_LALR), test_iterative_single); + g_test_add_data_func("/core/parser/lalr/iterative/multi", GINT_TO_POINTER(PB_LALR), test_iterative_multi); g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead); g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length); g_test_add_data_func("/core/parser/lalr/drop_from", GINT_TO_POINTER(PB_LALR), test_drop_from); diff --git a/src/test_suite.h b/src/test_suite.h index 775c8818a1f79cb2fb50275a55c0e8e1f8a235fe..93f3e19de01db05e8eb8aa3bc51371cee7a5a635 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -327,6 +327,97 @@ } \ } while(0) +#define g_check_parse_chunk_failed__m(mm__, parser, backend, chunk1, c1_len) do { \ + int skip = h_compile__m(mm__, (HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_failed___m(mm__, parser, chunk1, c1_len); \ + } while(0) + +#define g_check_parse_chunk_failed___m(mm__, parser, chunk1, c1_len) do { \ + HSuspendedParser *s = h_parse_start__m(mm__, (HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (NULL != res) { \ + h_parse_result_free(res); \ + g_test_message("Check failed: shouldn't have succeeded, but did"); \ + g_test_fail(); \ + } \ + } while(0) + +#define g_check_parse_chunk_failed(p, be, c1, c1_len) \ + g_check_parse_chunk_failed__m(&system_allocator, p, be, c1, c1_len) + +#define g_check_parse_chunk_failed_(p, c1, c1_len) \ + g_check_parse_chunk_failed___m(&system_allocator, p, c1, c1_len) + +#define g_check_parse_chunk_ok(parser, backend, chunk1, c1_len) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend)backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_ok_(parser, chunk1, c1_len); \ + } while(0) + +#define g_check_parse_chunk_ok_(parser, chunk1, c1_len) do { \ + HSuspendedParser *s = h_parse_start((HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + print_arena_stats(res->arena); \ + h_parse_result_free(res); \ + } \ + } while(0) + +#define g_check_parse_chunk_match(parser, backend, chunk1, c1_len, result) do { \ + int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \ + if(skip) { \ + g_test_message("Compile failed"); \ + g_test_fail(); \ + break; \ + } \ + g_check_parse_chunk_match_(parser, chunk1, c1_len, result); \ + } while(0) + +#define g_check_parse_chunk_match_(parser, chunk1, c1_len, result) do { \ + HSuspendedParser *s = h_parse_start((HParser *)(parser)); \ + if(!s) { \ + g_test_message("Chunk-wise parsing not available"); \ + g_test_fail(); \ + break; \ + } \ + h_parse_chunk(s, (const uint8_t*)chunk1, c1_len); \ + HParseResult *res = h_parse_finish(s); \ + if (!res) { \ + g_test_message("Parse failed on line %d", __LINE__); \ + g_test_fail(); \ + } else { \ + char* cres = h_write_result_unamb(res->ast); \ + g_check_string(cres, ==, result); \ + (&system_allocator)->free(&system_allocator, cres); \ + print_arena_stats(res->arena); \ + h_parse_result_free(res); \ + } \ + } while(0) + #define g_check_hashtable_present(table, key) do { \ if(!h_hashtable_present(table, key)) { \ g_test_message("Check failed: key should have been in table, but wasn't"); \