From 709884faf52fb990d87fdc81f6a4383721aea688 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" <clonearmy@gmail.com> Date: Thu, 3 May 2012 02:31:22 +0100 Subject: [PATCH] `difference` and `xor` combinators done. --- NOTES | 5 +++- src/hammer.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++--- src/hammer.h | 13 +++++++++ 3 files changed, 89 insertions(+), 5 deletions(-) diff --git a/NOTES b/NOTES index 4d89c709..16d6bd7a 100644 --- a/NOTES +++ b/NOTES @@ -3,4 +3,7 @@ NOTES Regarding parse_result_t: If a parse fails, the parse_result_t will be NULL. -If a parse is successful but there's nothing there (i.e., if end_p succeeds), then there's a parse_result_t but its ast is NULL. \ No newline at end of file +If a parse is successful but there's nothing there (i.e., if end_p succeeds), then there's a parse_result_t but its ast is NULL. + +Regarding butnot and difference: +There's a "do what I say, not what I do" variation in how we implemented these (versus how jsparse did it). His `butnot` succeeds if p1 and p2 both match and p1's result is longer than p2's, though the comments say it should succeed if p2's result is longer than p1's. Also, his `difference` succeeds if p1 and p2 both match, full stop, returning the result of p2 if p2's result is shorter than p1's or the result of p1 otherwise, though the comments say it should succeed if p2's result is shorter than p1's. Whatever; we're doing what the comments say. \ No newline at end of file diff --git a/src/hammer.c b/src/hammer.c index 867e36b3..cb6d3acc 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -292,7 +292,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) { // cache the initial state of the input stream input_stream_t start_state = state->input_stream; parse_result_t *r1 = do_parse(parsers->p1, state); - // if r1 is null, bail out early + // if p1 failed, bail out early if (NULL == r1) { return NULL; } @@ -302,7 +302,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) { parse_result_t *r2 = do_parse(parsers->p2, state); // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases state->input_stream = after_p1_state; - // if r2 is null, restore post-p1 state and bail out early + // if p2 failed, restore post-p1 state and bail out early if (NULL == r2) { return r1; } @@ -324,8 +324,76 @@ const parser_t* butnot(const parser_t* p1, const parser_t* p2) { return ret; } -const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; } -const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; } +static parse_result_t* parse_difference(void *env, parse_state_t *state) { + two_parsers_t *parsers = (two_parsers_t*)env; + // cache the initial state of the input stream + input_stream_t start_state = state->input_stream; + parse_result_t *r1 = do_parse(parsers->p1, state); + // if p1 failed, bail out early + if (NULL == r1) { + return NULL; + } + // cache the state after parse #1, since we might have to back up to it + input_stream_t after_p1_state = state->input_stream; + state->input_stream = start_state; + parse_result_t *r2 = do_parse(parsers->p2, state); + // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases + state->input_stream = after_p1_state; + // if p2 failed, restore post-p1 state and bail out early + if (NULL == r2) { + return r1; + } + size_t r1len = token_length(r1); + size_t r2len = token_length(r2); + // if both match but p1's text is shorter than p2's, fail + if (r1len < r2len) { + return NULL; + } else { + return r1; + } +} + +const parser_t* difference(const parser_t* p1, const parser_t* p2) { + two_parsers_t *env = g_new(two_parsers_t, 1); + env->p1 = p1; env->p2 = p2; + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_difference; ret->env = (void*)env; + return ret; +} + +static parse_result_t* parse_xor(void *env, parse_state_t *state) { + two_parsers_t *parsers = (two_parsers_t*)env; + // cache the initial state of the input stream + input_stream_t start_state = state->input_stream; + parse_result_t *r1 = do_parse(parsers->p1, state); + input_stream_t after_p1_state = state->input_stream; + // reset input stream, parse again + state->input_stream = start_state; + parse_result_t *r2 = do_parse(parsers->p2, state); + if (NULL == r1) { + if (NULL != r2) { + return r2; + } else { + return NULL; + } + } else { + if (NULL == r2) { + state->input_stream = after_p1_state; + return r1; + } else { + return NULL; + } + } +} + +const parser_t* xor(const parser_t* p1, const parser_t* p2) { + two_parsers_t *env = g_new(two_parsers_t, 1); + env->p1 = p1; env->p2 = p2; + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_xor; ret->env = (void*)env; + return ret; +} + const parser_t* repeat0(const parser_t* p) { return NULL; } const parser_t* repeat1(const parser_t* p) { return NULL; } const parser_t* repeat_n(const parser_t* p, const size_t n) { return NULL; } diff --git a/src/hammer.h b/src/hammer.h index b91b9dc8..b3f5f4eb 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -116,9 +116,22 @@ const parser_t* sequence(const parser_t* p_array[]); /* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */ const parser_t* choice(const parser_t* p_array[]); +/* Given two parsers, p1 and p2, this parser succeeds in the following cases: + * - if p1 succeeds and p2 fails + * - if both succeed but p1's result is shorter than p2's + */ const parser_t* butnot(const parser_t* p1, const parser_t* p2); + +/* Given two parsers, p1 and p2, this parser succeeds in the following cases: + * - if p1 succeeds and p2 fails + * - if both succeed but p2's result is shorter than p1's + */ const parser_t* difference(const parser_t* p1, const parser_t* p2); + +/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do. + */ const parser_t* xor(const parser_t* p1, const parser_t* p2); + const parser_t* repeat0(const parser_t* p); const parser_t* repeat1(const parser_t* p); const parser_t* repeat_n(const parser_t* p, const size_t n); -- GitLab