diff --git a/SConstruct b/SConstruct index b066a84a479dd274cd8123dc70032af06658ebdf..9697213f6d06cc069f45d121fb2acb291b8aeae8 100644 --- a/SConstruct +++ b/SConstruct @@ -10,9 +10,13 @@ vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packa vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept)) vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby'])) +tools = ['default', 'scanreplace'] +if 'dotnet' in ARGUMENTS.get('bindings', []): + tools.append('csharp/mono') + env = Environment(ENV = {'PATH' : os.environ['PATH']}, variables = vars, - tools=['default', 'scanreplace', 'csharp/mono'], + tools=tools, toolpath=['tools']) if not 'bindings' in env: @@ -47,6 +51,8 @@ env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attr if env['PLATFORM'] == 'darwin': env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}') +elif os.uname()[0] == "OpenBSD": + pass else: env.MergeFlags("-lrt") diff --git a/contrib/freebsd/.gitignore b/contrib/freebsd/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b8f99f5be53f536f79ef622abaa77b9942a9e142 --- /dev/null +++ b/contrib/freebsd/.gitignore @@ -0,0 +1 @@ +work diff --git a/contrib/freebsd/Makefile b/contrib/freebsd/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..efcaac073eea24b10b05502b48cb6174da03e962 --- /dev/null +++ b/contrib/freebsd/Makefile @@ -0,0 +1,35 @@ +# Created by thequux for Upstanding Hackers +# Copyright (c) 2014 Upstanding Hackers +# +# The contents of this file are available under the 2-clause BSD +# license. + +PORTNAME = hammer +PORTVERSION = 0.9 +CATEGORIES = devel +EXTRACT_SUFX = .tar.bz2 + +USES = scons + +# MASTER_SITE +MAINTAINER = thequux@upstandinghackers.com +COMMENT = Parser combinators. In C. + +LICENSE = GPLv2 +GIT_REV = master +GIT_URL = git://github.com/UpstandingHackers/hammer.git + +SCONS_ARGS = --variant=opt DESTDIR=${STAGEDIR} prefix=${LOCALBASE} +SCONS_TARGET = all +SCONS_INSTALL_TARGET = install + +do-fetch: + ${MKDIR} ${WRKDIR} + git clone -b ${GIT_REV} ${GIT_URL} ${WRKDIR}/${DISTNAME} + cd ${WRKDIR}; tar cvfy ${DISTDIR}/${DISTNAME}.tar.bz2 ${DISTNAME} + rm -rf ${WRKDIR}/${DISTNAME} + +FETCH_DEPENDS += git:${PORTSDIR}/devel/git + +# TODO: strip libhammer.so +.include <bsd.port.mk> diff --git a/contrib/freebsd/pkg-descr b/contrib/freebsd/pkg-descr new file mode 100644 index 0000000000000000000000000000000000000000..e5bedb8de42f9bff944475649e7be780ecaba401 --- /dev/null +++ b/contrib/freebsd/pkg-descr @@ -0,0 +1,4 @@ +Hammer is a fast parser combinator library written in C with bindings +for many languages. + +WWW: http://github.com/UpstandingHackers/hammer \ No newline at end of file diff --git a/contrib/freebsd/pkg-plist b/contrib/freebsd/pkg-plist new file mode 100644 index 0000000000000000000000000000000000000000..c751ea1e61032e6dcbe9e61603f27425eb2823cc --- /dev/null +++ b/contrib/freebsd/pkg-plist @@ -0,0 +1,14 @@ +include/hammer/internal.h +include/hammer/allocator.h +include/hammer/parsers/parser_internal.h +include/hammer/backends/regex.h +include/hammer/backends/contextfree.h +include/hammer/glue.h +include/hammer/hammer.h +lib/libhammer.so +lib/libhammer.a +lib/pkgconfig/libhammer.pc +@dirrm include/hammer/parsers +@dirrm include/hammer/backends +@dirrm include/hammer +@dirrmtry lib/pkgconfig \ No newline at end of file diff --git a/src/backends/llk.c b/src/backends/llk.c index 2bc39daf92b371b3b22b783623442eee36053bc0..89151e6f7fb3cc605ca6cbb7e498cdb1490776ea 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -81,13 +81,12 @@ static void *combine_entries(HHashSet *workset, void *dst, const void *src) // add the mappings of src to dst, marking conflicts and adding the conflicting // values to workset. -// note: reuses parts of src to build dst! static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src) { if(src->epsilon_branch) { if(dst->epsilon_branch) dst->epsilon_branch = - combine_entries(workset, dst->epsilon_branch, src->epsilon_branch); + combine_entries(workset, dst->epsilon_branch, src->epsilon_branch); else dst->epsilon_branch = src->epsilon_branch; } else { @@ -101,7 +100,7 @@ static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src) if(src->end_branch) { if(dst->end_branch) dst->end_branch = - combine_entries(workset, dst->end_branch, src->end_branch); + combine_entries(workset, dst->end_branch, src->end_branch); else dst->end_branch = src->end_branch; } @@ -118,10 +117,13 @@ static void stringmap_merge(HHashSet *workset, HStringMap *dst, HStringMap *src) if(src_) { HStringMap *dst_ = h_hashtable_get(dst->char_branches, (void *)c); - if(dst_) + if(dst_) { stringmap_merge(workset, dst_, src_); - else + } else { + if(src_->arena != dst->arena) + src_ = h_stringmap_copy(dst->arena, src_); h_hashtable_put(dst->char_branches, (void *)c, src_); + } } } } diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 87f166de95e8fb2b58bd8455622ca8f7cf007115..c1e422ed6e9fa42fe4130c11ad8a1f7e7c22c2a2 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -3,10 +3,22 @@ #include "../internal.h" #include "../parsers/parser_internal.h" -// short-hand for constructing HCachedResult's -static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { - HCachedResult *ret = a_new(HCachedResult, 1); - ret->result = result; +// short-hand for creating cache values (regular case) +static +HParserCacheValue * cached_result(HParseState *state, HParseResult *result) { + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_RIGHT; + ret->right = result; + ret->input_stream = state->input_stream; + return ret; +} + +// short-hand for caching parse results (left recursion case) +static +HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) { + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_LEFT; + ret->left = lr; ret->input_stream = state->input_stream; return ret; } @@ -44,27 +56,28 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { HParserCacheValue *cached = h_hashtable_get(state->cache, k); - HRecursionHead *head = h_hashtable_get(state->recursion_heads, k); + HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos); if (!head) { // No heads found return cached; } else { // Some heads found if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { // Nothing in the cache, and the key parser is not involved - HParseResult *tmp = a_new(HParseResult, 1); - tmp->ast = NULL; tmp->arena = state->arena; - HParserCacheValue *ret = a_new(HParserCacheValue, 1); - ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); - return ret; + cached = cached_result(state, NULL); + cached->input_stream = k->input_pos; } if (h_slist_find(head->eval_set, k->parser)) { // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. head->eval_set = h_slist_remove_all(head->eval_set, k->parser); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - // we know that cached has an entry here, modify it - if (!cached) - cached = a_new(HParserCacheValue, 1); - cached->value_type = PC_RIGHT; - cached->right = cached_result(state, tmp_res); + // update the cache + if (!cached) { + cached = cached_result(state, tmp_res); + h_hashtable_put(state->cache, k, cached); + } else { + cached->value_type = PC_RIGHT; + cached->right = tmp_res; + cached->input_stream = state->input_stream; + } } return cached; } @@ -83,51 +96,62 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { some->eval_set = NULL; rec_detect->head = some; } - //assert(state->lr_stack->head != NULL); - HSlistNode *head = state->lr_stack->head; - HLeftRec *lr; - while (head && (lr = head->elem)->rule != p) { + + HSlistNode *it; + for(it=state->lr_stack->head; it; it=it->next) { + HLeftRec *lr = it->elem; + + if(lr->rule == p) + break; + lr->head = rec_detect->head; h_slist_push(lr->head->involved_set, (void*)lr->rule); - head = head->next; } } +// helper: true iff pos1 is less than pos2 +static inline bool pos_lt(HInputStream pos1, HInputStream pos2) { + return ((pos1.index < pos2.index) || + (pos1.index == pos2.index && pos1.bit_offset < pos2.bit_offset)); +} + /* If recall() returns NULL, we need to store a dummy failure in the cache and compute the * future parse. */ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { // Store the head into the recursion_heads - h_hashtable_put(state->recursion_heads, k, head); + h_hashtable_put(state->recursion_heads, &k->input_pos, head); HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); if (!old_cached || PC_LEFT == old_cached->value_type) errx(1, "impossible match"); - HParseResult *old_res = old_cached->right->result; + HParseResult *old_res = old_cached->right; + + // rewind the input + state->input_stream = k->input_pos; // reset the eval_set of the head of the recursion at each beginning of growth head->eval_set = h_slist_copy(head->involved_set); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { - if ((old_res->ast->index < tmp_res->ast->index) || - (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, k, v); + if (pos_lt(old_cached->input_stream, state->input_stream)) { + h_hashtable_put(state->cache, k, cached_result(state, tmp_res)); return grow(k, state, head); } else { // we're done with growing, we can remove data from the recursion head - h_hashtable_del(state->recursion_heads, k); + h_hashtable_del(state->recursion_heads, &k->input_pos); HParserCacheValue *cached = h_hashtable_get(state->cache, k); if (cached && PC_RIGHT == cached->value_type) { - return cached->right->result; + state->input_stream = cached->input_stream; + return cached->right; } else { errx(1, "impossible match"); } } } else { - h_hashtable_del(state->recursion_heads, k); + h_hashtable_del(state->recursion_heads, &k->input_pos); + state->input_stream = old_cached->input_stream; return old_res; } } @@ -140,9 +164,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab } else { // update cache - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); - h_hashtable_put(state->cache, k, v); + h_hashtable_put(state->cache, k, cached_result(state, growable->seed)); if (!growable->seed) return NULL; else @@ -165,18 +187,18 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { base->seed = NULL; base->rule = parser; base->head = NULL; h_slist_push(state->lr_stack, base); // cache it - HParserCacheValue *dummy = a_new(HParserCacheValue, 1); - dummy->value_type = PC_LEFT; dummy->left = base; - h_hashtable_put(state->cache, key, dummy); + h_hashtable_put(state->cache, key, cached_lr(state, base)); // parse the input HParseResult *tmp_res = perform_lowlevel_parse(state, parser); // the base variable has passed equality tests with the cache h_slist_pop(state->lr_stack); + // update the cached value to our new position + HParserCacheValue *cached = h_hashtable_get(state->cache, key); + assert(cached != NULL); + cached->input_stream = state->input_stream; // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one if (NULL == base->head) { - HParserCacheValue *right = a_new(HParserCacheValue, 1); - right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, key, right); + h_hashtable_put(state->cache, key, cached_result(state, tmp_res)); return tmp_res; } else { base->seed = tmp_res; @@ -185,12 +207,12 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { } } else { // it exists! + state->input_stream = m->input_stream; if (PC_LEFT == m->value_type) { setupLR(parser, state, m->left); - return m->left->seed; // BUG: this might not be correct + return m->left->seed; } else { - state->input_stream = m->right->input_stream; - return m->right->result; + return m->right; } } } @@ -212,6 +234,14 @@ static bool cache_key_equal(const void* key1, const void* key2) { return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0; } +static uint32_t pos_hash(const void* key) { + return h_djbhash(key, sizeof(HInputStream)); +} + +static bool pos_equal(const void* key1, const void* key2) { + return memcmp(key1, key2, sizeof(HInputStream)) == 0; +} + HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) { HArena * arena = h_new_arena(mm__, 0); HParseState *parse_state = a_new_(arena, HParseState, 1); @@ -219,8 +249,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr cache_key_hash); // hash_func parse_state->input_stream = *input_stream; parse_state->lr_stack = h_slist_new(arena); - parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal, - cache_key_hash); + parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); h_slist_free(parse_state->lr_stack); diff --git a/src/benchmark.c b/src/benchmark.c index a3f292e65a83576d980daee38bb7af5d112671b8..632d7db3ba1321b1fb0fa6532f9fb76719725a5b 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -10,7 +10,13 @@ #include <mach/mach.h> #endif +#ifdef __NetBSD__ +#include <sys/resource.h> +#endif + void h_benchmark_clock_gettime(struct timespec *ts) { + if (ts == NULL) + return; #ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time /* * This returns real time, not CPU time. See http://stackoverflow.com/a/6725161 @@ -23,6 +29,18 @@ void h_benchmark_clock_gettime(struct timespec *ts) { mach_port_deallocate(mach_task_self(), cclock); ts->tv_sec = mts.tv_sec; ts->tv_nsec = mts.tv_nsec; +#elif defined(__NetBSD__) + // NetBSD doesn't have CLOCK_THREAD_CPUTIME_ID. We'll use getrusage instead + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + ts->tv_nsec = (rusage.ru_utime.tv_usec + rusage.ru_stime.tv_usec) * 1000; + // not going to overflow; can be at most 2e9-2 + ts->tv_sec = rusage.ru_utime.tv_sec + rusage.ru_utime.tv_sec; + if (ts->tv_nsec >= 1000000000) { + ts->tv_nsec -= 1000000000; // subtract a second + ts->tv_sec += 1; // add it back. + } + assert (ts->tv_nsec <= 1000000000); #else clock_gettime(CLOCK_THREAD_CPUTIME_ID, ts); #endif diff --git a/src/cfgrammar.c b/src/cfgrammar.c index 196d9d3c8b6ee9cb77b24a98ff365b8b4634ac45..ab7388302b24c9bb1c8767fc5d66e990fa39c165 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -281,6 +281,13 @@ void h_stringmap_update(HStringMap *m, const HStringMap *n) h_hashtable_merge(combine_stringmap, m->char_branches, n->char_branches); } +HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m) +{ + HStringMap *res = h_stringmap_new(a); + h_stringmap_update(res, m); + return res; +} + /* Replace all occurances of old in m with new. * If old is NULL, replace all values in m with new. * If new is NULL, remove the respective values. @@ -641,7 +648,7 @@ void h_pprint_char(FILE *f, char c) case '\n': fputs("\\n", f); break; case '\r': fputs("\\r", f); break; default: - if(isprint(c)) { + if(isprint((int)c)) { fputc(c, f); } else { fprintf(f, "\\x%.2X", c); diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 193f8ca327d2f9c0b74518b9942b5fe3f37c407b..9cefc62e83f07048dc2a24f0cda1bde28ca72066 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -40,6 +40,7 @@ typedef struct HStringMap_ { } HStringMap; HStringMap *h_stringmap_new(HArena *a); +HStringMap *h_stringmap_copy(HArena *a, const HStringMap *m); void h_stringmap_put_end(HStringMap *m, void *v); void h_stringmap_put_epsilon(HStringMap *m, void *v); void h_stringmap_put_after(HStringMap *m, uint8_t c, HStringMap *ends); diff --git a/src/internal.h b/src/internal.h index 056a5afc6a2edcf0b8927e40d0ae12df60a93e81..85cd4dbc3407c74f9b8e055b56adc5fcad0a1b61 100644 --- a/src/internal.h +++ b/src/internal.h @@ -255,21 +255,17 @@ typedef struct HLeftRec_ { HRecursionHead *head; } HLeftRec; -/* Result and remaining input, for rerunning from a cached position. */ -typedef struct HCachedResult_ { - HParseResult *result; - HInputStream input_stream; -} HCachedResult; - /* Tagged union for values in the cache: either HLeftRec's (Left) or * HParseResult's (Right). + * Includes the position (input_stream) to advance to after using this value. */ typedef struct HParserCacheValue_t { HParserCacheValueType value_type; union { HLeftRec *left; - HCachedResult *right; + HParseResult *right; }; + HInputStream input_stream; } HParserCacheValue; // This file provides the logical inverse of bitreader.c diff --git a/src/parsers/action.c b/src/parsers/action.c index e254a894320099d930e6fe08ddaa30e8d39ff18c..04eb7a4c85c71f8ea3bc60b3371f052cc43d7603 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -13,8 +13,8 @@ static HParseResult* parse_action(void *env, HParseState *state) { HParseResult *tmp = h_do_parse(a->p, state); //HParsedToken *tok = a->action(h_do_parse(a->p, state)); if(tmp) { - const HParsedToken *tok = a->action(tmp, a->user_data); - return make_result(state->arena, (HParsedToken*)tok); + HParsedToken *tok = (HParsedToken*)a->action(tmp, a->user_data); + return make_result(state->arena, tok); } else return NULL; } else // either the parser's missing or the action's missing diff --git a/src/parsers/and.c b/src/parsers/and.c index dfd91871574be6b3d5adbbb33508a0997e4e3f8f..c5c9836db57cc8864f785870a613e2ceb406b28c 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -25,5 +25,6 @@ HParser* h_and(const HParser* p) { } HParser* h_and__m(HAllocator* mm__, const HParser* p) { // zero-width postive lookahead - return h_new_parser(mm__, &and_vt, (void *)p); + void* env = (void*)p; + return h_new_parser(mm__, &and_vt, env); } diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index 9aa993aecd4689fe38ac8541f5ea79ce6c17d043..af606b0ea7567cf4e5068260386d907f10e0c8a7 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -55,5 +55,6 @@ HParser* h_ignore(const HParser* p) { return h_ignore__m(&system_allocator, p); } HParser* h_ignore__m(HAllocator* mm__, const HParser* p) { - return h_new_parser(mm__, &ignore_vt, (void *)p); + void* env = (void*)p; + return h_new_parser(mm__, &ignore_vt, env); } diff --git a/src/parsers/not.c b/src/parsers/not.c index 8bf45dda21062699fbd744063c020ab382fa6a97..6c34bad48dc09ca2a290ce351b89e921422da265 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -21,5 +21,6 @@ HParser* h_not(const HParser* p) { return h_not__m(&system_allocator, p); } HParser* h_not__m(HAllocator* mm__, const HParser* p) { - return h_new_parser(mm__, ¬_vt, (void *)p); + void* env = (void*)p; + return h_new_parser(mm__, ¬_vt, env); } diff --git a/src/parsers/optional.c b/src/parsers/optional.c index ff9fc15bbf465bd36a0edd4fd1906c1f6077c1b1..ccee53fa864469600db64bb76562ce469559d09e 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -92,6 +92,7 @@ HParser* h_optional(const HParser* p) { HParser* h_optional__m(HAllocator* mm__, const HParser* p) { // TODO: re-add this //assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round."); - return h_new_parser(mm__, &optional_vt, (void *)p); + void* env = (void*)p; + return h_new_parser(mm__, &optional_vt, env); } diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 737affde3bad042e057b8a6c6c1f983116b18786..04284e86e61d242c58a1c42689607ecfd3794dfe 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -10,7 +10,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { c = h_read_bits(&state->input_stream, 8, false); if (state->input_stream.overrun) break; - } while (isspace(c)); + } while (isspace((int)c)); state->input_stream = bak; return h_do_parse((HParser*)env, state); } @@ -81,5 +81,6 @@ HParser* h_whitespace(const HParser* p) { return h_whitespace__m(&system_allocator, p); } HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) { - return h_new_parser(mm__, &whitespace_vt, (void *)p); + void* env = (void*)p; + return h_new_parser(mm__, &whitespace_vt, env); } diff --git a/src/t_parser.c b/src/t_parser.c index e2eca978e9e96b341b04d55ba8a850d95dacae54..4260a7c99447d4dbf1cbe1e58e782f93f3c5e397 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -414,11 +414,24 @@ static void test_leftrec(gconstpointer backend) { HParser *lr_ = h_indirect(); h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), h_epsilon_p(), NULL)); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "NULL"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "((u0x61) u0x61)"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)"); } +static void test_leftrec_ne(gconstpointer backend) { + HParser *a_ = h_ch('a'); + + HParser *lr_ = h_indirect(); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)"); + g_check_parse_failed(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0); +} + static void test_rightrec(gconstpointer backend) { HParser *a_ = h_ch('a'); @@ -485,7 +498,9 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and); g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); + // XXX(pesco) it seems to me Warth's algorithm just doesn't work for this case //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); + g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); @@ -599,6 +614,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/attr_bool", GINT_TO_POINTER(PB_LALR), test_attr_bool); g_test_add_data_func("/core/parser/lalr/ignore", GINT_TO_POINTER(PB_LALR), test_ignore); g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec); + g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne); g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token); @@ -637,6 +653,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/attr_bool", GINT_TO_POINTER(PB_GLR), test_attr_bool); g_test_add_data_func("/core/parser/glr/ignore", GINT_TO_POINTER(PB_GLR), test_ignore); g_test_add_data_func("/core/parser/glr/leftrec", GINT_TO_POINTER(PB_GLR), test_leftrec); + g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne); g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec); g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous); }