diff --git a/NOTES b/NOTES index 4d89c709044f65b25c779ec41cbfc494f365d23c..16d6bd7aecd3ed74fcd0a548426117578a2a2102 100644 --- a/NOTES +++ b/NOTES @@ -3,4 +3,7 @@ NOTES Regarding parse_result_t: If a parse fails, the parse_result_t will be NULL. -If a parse is successful but there's nothing there (i.e., if end_p succeeds), then there's a parse_result_t but its ast is NULL. \ No newline at end of file +If a parse is successful but there's nothing there (i.e., if end_p succeeds), then there's a parse_result_t but its ast is NULL. + +Regarding butnot and difference: +There's a "do what I say, not what I do" variation in how we implemented these (versus how jsparse did it). His `butnot` succeeds if p1 and p2 both match and p1's result is longer than p2's, though the comments say it should succeed if p2's result is longer than p1's. Also, his `difference` succeeds if p1 and p2 both match, full stop, returning the result of p2 if p2's result is shorter than p1's or the result of p1 otherwise, though the comments say it should succeed if p2's result is shorter than p1's. Whatever; we're doing what the comments say. \ No newline at end of file diff --git a/src/hammer.c b/src/hammer.c index 867e36b3bf338d12c7c121c14d00e7302cd8bbcb..cb6d3accd8d00356d7718c4d64caef4f8adc2970 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -292,7 +292,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) { // cache the initial state of the input stream input_stream_t start_state = state->input_stream; parse_result_t *r1 = do_parse(parsers->p1, state); - // if r1 is null, bail out early + // if p1 failed, bail out early if (NULL == r1) { return NULL; } @@ -302,7 +302,7 @@ static parse_result_t* parse_butnot(void *env, parse_state_t *state) { parse_result_t *r2 = do_parse(parsers->p2, state); // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases state->input_stream = after_p1_state; - // if r2 is null, restore post-p1 state and bail out early + // if p2 failed, restore post-p1 state and bail out early if (NULL == r2) { return r1; } @@ -324,8 +324,76 @@ const parser_t* butnot(const parser_t* p1, const parser_t* p2) { return ret; } -const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; } -const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; } +static parse_result_t* parse_difference(void *env, parse_state_t *state) { + two_parsers_t *parsers = (two_parsers_t*)env; + // cache the initial state of the input stream + input_stream_t start_state = state->input_stream; + parse_result_t *r1 = do_parse(parsers->p1, state); + // if p1 failed, bail out early + if (NULL == r1) { + return NULL; + } + // cache the state after parse #1, since we might have to back up to it + input_stream_t after_p1_state = state->input_stream; + state->input_stream = start_state; + parse_result_t *r2 = do_parse(parsers->p2, state); + // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases + state->input_stream = after_p1_state; + // if p2 failed, restore post-p1 state and bail out early + if (NULL == r2) { + return r1; + } + size_t r1len = token_length(r1); + size_t r2len = token_length(r2); + // if both match but p1's text is shorter than p2's, fail + if (r1len < r2len) { + return NULL; + } else { + return r1; + } +} + +const parser_t* difference(const parser_t* p1, const parser_t* p2) { + two_parsers_t *env = g_new(two_parsers_t, 1); + env->p1 = p1; env->p2 = p2; + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_difference; ret->env = (void*)env; + return ret; +} + +static parse_result_t* parse_xor(void *env, parse_state_t *state) { + two_parsers_t *parsers = (two_parsers_t*)env; + // cache the initial state of the input stream + input_stream_t start_state = state->input_stream; + parse_result_t *r1 = do_parse(parsers->p1, state); + input_stream_t after_p1_state = state->input_stream; + // reset input stream, parse again + state->input_stream = start_state; + parse_result_t *r2 = do_parse(parsers->p2, state); + if (NULL == r1) { + if (NULL != r2) { + return r2; + } else { + return NULL; + } + } else { + if (NULL == r2) { + state->input_stream = after_p1_state; + return r1; + } else { + return NULL; + } + } +} + +const parser_t* xor(const parser_t* p1, const parser_t* p2) { + two_parsers_t *env = g_new(two_parsers_t, 1); + env->p1 = p1; env->p2 = p2; + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_xor; ret->env = (void*)env; + return ret; +} + const parser_t* repeat0(const parser_t* p) { return NULL; } const parser_t* repeat1(const parser_t* p) { return NULL; } const parser_t* repeat_n(const parser_t* p, const size_t n) { return NULL; } diff --git a/src/hammer.h b/src/hammer.h index b91b9dc89c2a5953d818fe70a4fd27c3569ac2a0..b3f5f4ebf0d44ae89927d555541ec86544eb9b1f 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -116,9 +116,22 @@ const parser_t* sequence(const parser_t* p_array[]); /* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */ const parser_t* choice(const parser_t* p_array[]); +/* Given two parsers, p1 and p2, this parser succeeds in the following cases: + * - if p1 succeeds and p2 fails + * - if both succeed but p1's result is shorter than p2's + */ const parser_t* butnot(const parser_t* p1, const parser_t* p2); + +/* Given two parsers, p1 and p2, this parser succeeds in the following cases: + * - if p1 succeeds and p2 fails + * - if both succeed but p2's result is shorter than p1's + */ const parser_t* difference(const parser_t* p1, const parser_t* p2); + +/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do. + */ const parser_t* xor(const parser_t* p1, const parser_t* p2); + const parser_t* repeat0(const parser_t* p); const parser_t* repeat1(const parser_t* p); const parser_t* repeat_n(const parser_t* p, const size_t n);