diff --git a/src/hammer.c b/src/hammer.c index 7b6379a17d81c8269e62de9a0077c529be17fe28..ddafaabe539de95cb8ce4b22406b8ff3ce4aa419 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -459,11 +459,100 @@ const parser_t* xor(const parser_t* p1, const parser_t* p2) { return ret; } -const parser_t* repeat0(const parser_t* p) { return &unimplemented; } -const parser_t* repeat1(const parser_t* p) { return &unimplemented; } +typedef struct { + const parser_t *p, *sep; + size_t count; + bool min_p; +} repeat_t; +static parse_result_t *parse_many(void* env, parse_state_t *state) { + repeat_t *env_ = (repeat_t*) env; + GSequence *seq = g_sequence_new(NULL); + size_t count = 0; + input_stream_t bak; + while (env_->min_p || env_->count > count) { + bak = state->input_stream; + if (count > 0) { + parse_result_t *sep = do_parse(env_->sep, state); + if (!sep) + goto err0; + } + parse_result_t *elem = do_parse(env_->p, state); + if (!elem) + goto err0; + if (elem->ast) + g_sequence_append(seq, (gpointer)elem->ast); + count++; + } + if (count < env_->count) + goto err; + succ: + ; // necessary for the label to be here... + parsed_token_t *res = a_new(parsed_token_t, 1); + res->token_type = TT_SEQUENCE; + res->seq = seq; + return make_result(state, res); + err0: + if (count >= env_->count) { + state->input_stream = bak; + goto succ; + } + err: + g_sequence_free(seq); + state->input_stream = bak; + return NULL; +} +const parser_t* many(const parser_t* p) { + parser_t *res = g_new(parser_t, 1); + repeat_t *env = g_new(repeat_t, 1); + env->p = p; + env->sep = epsilon_p(); + env->count = 0; + env->min_p = true; + res->fn = parse_many; + res->env = env; + return res; +} + +const parser_t* many1(const parser_t* p) { + parser_t *res = g_new(parser_t, 1); + repeat_t *env = g_new(repeat_t, 1); + env->p = p; + env->sep = epsilon_p(); + env->count = 1; + env->min_p = true; + res->fn = parse_many; + res->env = env; + return res; +} -const parser_t* repeat_n(const parser_t* p, const size_t n) { return &unimplemented; } +const parser_t* repeat_n(const parser_t* p, const size_t n) { + parser_t *res = g_new(parser_t, 1); + repeat_t *env = g_new(repeat_t, 1); + env->p = p; + env->sep = epsilon_p(); + env->count = n; + env->min_p = false; + res->fn = parse_many; + res->env = env; + return res; +} + +static parse_result_t* parse_ignore(void* env, parse_state_t* state) { + parse_result_t *res0 = do_parse((parser_t*)env, state); + if (!res0) + return NULL; + parse_result_t *res = a_new(parse_result_t, 1); + res->ast = NULL; + res->arena = state->arena; + return res; +} +const parser_t* ignore(const parser_t* p) { + parser_t* ret = g_new(parser_t, 1); + ret->fn = parse_ignore; + ret->env = (void*)p; + return ret; +} static parse_result_t* parse_optional(void* env, parse_state_t* state) { input_stream_t bak = state->input_stream; @@ -477,32 +566,70 @@ static parse_result_t* parse_optional(void* env, parse_state_t* state) { } const parser_t* optional(const parser_t* p) { + assert_message(p->fn != parse_ignore, "Thou shalt ignore an option, rather than the other way 'round."); parser_t *ret = g_new(parser_t, 1); ret->fn = parse_optional; ret->env = (void*)p; return ret; } -static parse_result_t* parse_ignore(void* env, parse_state_t* state) { - parse_result_t *res0 = do_parse((parser_t*)env, state); - if (!res0) - return NULL; - parse_result_t *res = a_new(parse_result_t, 1); +const parser_t* sepBy(const parser_t* p, const parser_t* sep) { + parser_t *res = g_new(parser_t, 1); + repeat_t *env = g_new(repeat_t, 1); + env->p = p; + env->sep = sep; + env->count = 0; + env->min_p = true; + res->fn = parse_many; + res->env = env; + return res; +} + +const parser_t* sepBy1(const parser_t* p, const parser_t* sep) { + parser_t *res = g_new(parser_t, 1); + repeat_t *env = g_new(repeat_t, 1); + env->p = p; + env->sep = sep; + env->count = 1; + env->min_p = true; + res->fn = parse_many; + res->env = env; + return res; +} + +static parse_result_t* parse_epsilon(void* env, parse_state_t* state) { + (void)env; + parse_result_t* res = a_new(parse_result_t, 1); res->ast = NULL; res->arena = state->arena; return res; } -const parser_t* ignore(const parser_t* p) { - parser_t* ret = g_new(parser_t, 1); - ret->fn = parse_ignore; - ret->env = (void*)p; - return ret; + +const parser_t* epsilon_p() { + parser_t *res = g_new(parser_t, 1); + res->fn = parse_epsilon; + res->env = NULL; + return res; } -const parser_t* list(const parser_t* p, const parser_t* sep) { return &unimplemented; } -const parser_t* epsilon_p() { return &unimplemented; } const parser_t* attr_bool(const parser_t* p, attr_bool_t a) { return &unimplemented; } const parser_t* and(const parser_t* p) { return &unimplemented; } -const parser_t* not(const parser_t* p) { return &unimplemented; } + +static parse_result_t* parse_not(void* env, parse_state_t* state) { + input_stream_t bak = state->input_stream; + if (do_parse((parser_t*)env, state)) + return NULL; + else { + state->input_stream = bak; + return make_result(state, NULL); + } +} + +const parser_t* not(const parser_t* p) { + parser_t *res = g_new(parser_t, 1); + res->fn = parse_not; + res->env = (void*)p; + return res; +} static guint cache_key_hash(gconstpointer key) { return djbhash(key, sizeof(parser_cache_key_t)); @@ -541,7 +668,7 @@ parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t lengt static void test_token(void) { const parser_t *token_ = token((const uint8_t*)"95\xa2", 3); - g_check_parse_ok(token_, "95\xa2", 3, "<39.35.A2>"); + g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>"); g_check_parse_failed(token_, "95", 2); } @@ -722,22 +849,22 @@ static void test_xor(void) { g_check_parse_failed(xor_, "a", 1); } -static void test_repeat0(void) { - const parser_t *repeat0_ = repeat0(choice(ch('a'), ch('b'), NULL)); +static void test_many(void) { + const parser_t *many_ = many(choice(ch('a'), ch('b'), NULL)); - g_check_parse_ok(repeat0_, "adef", 4, "(s0x61)"); - g_check_parse_ok(repeat0_, "bdef", 4, "(s0x62)"); - g_check_parse_ok(repeat0_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)"); - g_check_parse_ok(repeat0_, "daabbabadef", 11, "()"); + g_check_parse_ok(many_, "adef", 4, "(s0x61)"); + g_check_parse_ok(many_, "bdef", 4, "(s0x62)"); + g_check_parse_ok(many_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)"); + g_check_parse_ok(many_, "daabbabadef", 11, "()"); } -static void test_repeat1(void) { - const parser_t *repeat1_ = repeat1(choice(ch('a'), ch('b'), NULL)); +static void test_many1(void) { + const parser_t *many1_ = many1(choice(ch('a'), ch('b'), NULL)); - g_check_parse_ok(repeat1_, "adef", 4, "(s0x61)"); - g_check_parse_ok(repeat1_, "bdef", 4, "(s0x62)"); - g_check_parse_ok(repeat1_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)"); - g_check_parse_failed(repeat1_, "daabbabadef", 11); + g_check_parse_ok(many1_, "adef", 4, "(s0x61)"); + g_check_parse_ok(many1_, "bdef", 4, "(s0x62)"); + g_check_parse_ok(many1_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)"); + g_check_parse_failed(many1_, "daabbabadef", 11); } static void test_repeat_n(void) { @@ -766,13 +893,13 @@ static void test_ignore(void) { g_check_parse_failed(ignore_, "ac", 2); } -static void test_list(void) { - const parser_t *list_ = list(choice(ch('1'), ch('2'), ch('3'), NULL), ch(',')); +static void test_sepBy1(void) { + const parser_t *sepBy1_ = sepBy1(choice(ch('1'), ch('2'), ch('3'), NULL), ch(',')); - g_check_parse_ok(list_, "1,2,3", 5, "(s0x31 s0x32 s0x33)"); - g_check_parse_ok(list_, "1,3,2", 5, "(s0x31 s0x33 s0x32)"); - g_check_parse_ok(list_, "1,3", 3, "(s0x31 s0x33)"); - g_check_parse_ok(list_, "3", 1, "(s0x33)"); + g_check_parse_ok(sepBy1_, "1,2,3", 5, "(s0x31 s0x32 s0x33)"); + g_check_parse_ok(sepBy1_, "1,3,2", 5, "(s0x31 s0x33 s0x32)"); + g_check_parse_ok(sepBy1_, "1,3", 3, "(s0x31 s0x33)"); + g_check_parse_ok(sepBy1_, "3", 1, "(s0x33)"); } static void test_epsilon_p(void) { @@ -806,10 +933,10 @@ static void test_not(void) { token((const uint8_t*)"++", 2), NULL), ch('b'), NULL); - g_check_parse_ok(not_1, "a+b", 3, "(s0x61 s0x2B s0x62)"); + g_check_parse_ok(not_1, "a+b", 3, "(s0x61 s0x2b s0x62)"); g_check_parse_failed(not_1, "a++b", 4); - g_check_parse_ok(not_2, "a+b", 3, "(s0x61 s0x2B s0x62)"); - g_check_parse_ok(not_2, "a++b", 4, "(s0x61 <2B.2B> s0x62)"); + g_check_parse_ok(not_2, "a+b", 3, "(s0x61 (s0x2b) s0x62)"); + g_check_parse_ok(not_2, "a++b", 4, "(s0x61 <2b.2b> s0x62)"); } void register_parser_tests(void) { @@ -838,11 +965,11 @@ void register_parser_tests(void) { g_test_add_func("/core/parser/butnot", test_butnot); g_test_add_func("/core/parser/difference", test_difference); g_test_add_func("/core/parser/xor", test_xor); - g_test_add_func("/core/parser/repeat0", test_repeat0); - g_test_add_func("/core/parser/repeat1", test_repeat1); + g_test_add_func("/core/parser/many", test_many); + g_test_add_func("/core/parser/many1", test_many1); g_test_add_func("/core/parser/repeat_n", test_repeat_n); g_test_add_func("/core/parser/optional", test_optional); - g_test_add_func("/core/parser/list", test_list); + g_test_add_func("/core/parser/sepBy1", test_sepBy1); g_test_add_func("/core/parser/epsilon_p", test_epsilon_p); g_test_add_func("/core/parser/attr_bool", test_attr_bool); g_test_add_func("/core/parser/and", test_and); diff --git a/src/hammer.h b/src/hammer.h index a3536414cd8737b40a19db132ed6d7bc7f10dab2..054e3f82b001c0ba9fd1c3ccb50bff99bcd0ff27 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -177,10 +177,10 @@ const parser_t* difference(const parser_t* p1, const parser_t* p2); const parser_t* xor(const parser_t* p1, const parser_t* p2); /* Given a parser, p, this parser succeeds for zero or more repetitions of p. */ -const parser_t* repeat0(const parser_t* p); +const parser_t* many(const parser_t* p); /* Given a parser, p, this parser succeeds for one or more repetitions of p. */ -const parser_t* repeat1(const parser_t* p); +const parser_t* many1(const parser_t* p); /* Given a parser, p, this parser succeeds for exactly N repetitions of p. */ const parser_t* repeat_n(const parser_t* p, const size_t n); @@ -191,10 +191,15 @@ const parser_t* optional(const parser_t* p); /* Given a parser, p, this parser succeeds if p succeeds, but doesn't include p's result in the result. */ const parser_t* ignore(const parser_t* p); -/* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. - * For example, if p is repeat1(range('0','9')) and sep is ch(','), list(p, sep) will match a comma-separated list of integers. +/* Given a parser, p, and a parser for a separator, sep, this parser matches a (possibly empty) list of things that p can parse, separated by sep. + * For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy(p, sep) will match a comma-separated list of integers. */ -const parser_t* list(const parser_t* p, const parser_t* sep); +const parser_t* sepBy(const parser_t* p, const parser_t* sep); + +/* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element. + * For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy1(p, sep) will match a comma-separated list of integers. + */ +const parser_t* sepBy1(const parser_t* p, const parser_t* sep); /* This parser always returns a zero length match, i.e., empty string. */ const parser_t* epsilon_p(); diff --git a/src/internal.h b/src/internal.h index 8a756c432c3b93fbdef2b7f93dd28f2e23feb4b0..1a65a071fde698737545f5d4905667a649a12cb2 100644 --- a/src/internal.h +++ b/src/internal.h @@ -18,8 +18,17 @@ #ifndef HAMMER_INTERNAL__H #define HAMMER_INTERNAL__H #include <glib.h> +#include <err.h> #include "hammer.h" +#ifdef NDEBUG +#define assert_message(check, message) do { } while(0) +#else +#define assert_message(check, message) do { \ + if (!(check)) \ + errx(1, "Assertation failed (programmer error): %s", message); \ + } while(0) +#endif #define false 0 #define true 1 @@ -71,4 +80,5 @@ void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached); guint djbhash(const uint8_t *buf, size_t len); char* write_result_unamb(const parsed_token_t* tok); void pprint(const parsed_token_t* tok, int indent, int delta); + #endif // #ifndef HAMMER_INTERNAL__H diff --git a/src/pprint.c b/src/pprint.c index 6832b30af1f790b68ed5b6be3fa40ef2b9a5a726..106e7844f2bc3ca1c010703c4d0d3888403af672 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -102,7 +102,7 @@ static void unamb_sub(const parsed_token_t* tok, struct result_buf *buf) { append_buf(buf, "<>", 2); else { for (size_t i = 0; i < tok->bytes.len; i++) { - const char *HEX = "0123456789ABCDEF"; + const char *HEX = "0123456789abcdef"; append_buf_c(buf, (i == 0) ? '<': '.'); char c = tok->bytes.token[i]; append_buf_c(buf, HEX[(c >> 4) & 0xf]); diff --git a/src/test_suite.h b/src/test_suite.h index 8f0b07a7088a07e6a3e08781631d3dc1f7fa79b7..f313e5d3b5c3e4078f6d64c14996364fa34d9ec9 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -19,7 +19,7 @@ #define HAMMER_TEST_SUITE__H // Equivalent to g_assert_*, but not using g_assert... -#define g_check_inttype(fmt, typ, n1, op, n2) { \ +#define g_check_inttype(fmt, typ, n1, op, n2) do { \ typ _n1 = (n1); \ typ _n2 = (n2); \ if (!(_n1 op _n2)) { \ @@ -28,9 +28,9 @@ _n1, #op, _n2); \ g_test_fail(); \ } \ - } + } while(0) -#define g_check_bytes(len, n1, op, n2) { \ +#define g_check_bytes(len, n1, op, n2) do { \ const uint8_t *_n1 = (n1); \ const uint8_t *_n2 = (n2); \ if (!(memcmp(_n1, _n2, len) op 0)) { \ @@ -38,37 +38,37 @@ #n1 " " #op " " #n2); \ g_test_fail(); \ } \ - } + } while(0) -#define g_check_string(n1, op, n2) { \ +#define g_check_string(n1, op, n2) do { \ const char *_n1 = (n1); \ const char *_n2 = (n2); \ - if (!(strcmp(_n1, _n2) op 0)) { \ + if (!(strcmp(_n1, _n2) op 0)) { \ g_test_message("Check failed: (%s) (%s %s %s)", \ #n1 " " #op " " #n2, \ _n1, #op, _n2); \ g_test_fail(); \ } \ - } + } while(0) // TODO: replace uses of this with g_check_parse_failed -#define g_check_failed(res) { \ - const parse_result_t *result = (res); \ - if (NULL != result) { \ +#define g_check_failed(res) do { \ + const parse_result_t *result = (res); \ + if (NULL != result) { \ g_test_message("Check failed: shouldn't have succeeded, but did"); \ - g_test_fail(); \ - } \ - } + g_test_fail(); \ + } \ + } while(0) -#define g_check_parse_failed(parser, input, inp_len) { \ +#define g_check_parse_failed(parser, input, inp_len) do { \ const parse_result_t *result = parse(parser, (const uint8_t*)input, inp_len); \ if (NULL != result) { \ g_test_message("Check failed: shouldn't have succeeded, but did"); \ g_test_fail(); \ } \ - } + } while(0) -#define g_check_parse_ok(parser, input, inp_len, result) { \ +#define g_check_parse_ok(parser, input, inp_len, result) do { \ parse_result_t *res = parse(parser, (const uint8_t*)input, inp_len); \ if (!res) { \ g_test_message("Parse failed on line %d", __LINE__); \ @@ -83,7 +83,7 @@ stats.used, stats.wasted, \ stats.wasted * 100. / (stats.used+stats.wasted)); \ } \ - } + } while(0) #define g_check_cmpint(n1, op, n2) g_check_inttype("%d", int, n1, op, n2)