diff --git a/.gitignore b/.gitignore index af536ad902dcf2ec3908e8d68e9aba0e48d910d5..3ab234f023eeef7ac5bb003d96d0ca1d21596b4a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ src/test_suite lib/hush examples/dns TAGS +*.swp +*.swo diff --git a/src/Makefile b/src/Makefile index 24408ccae11ab69b84ee5f1c24275138e6660312..faae8a7d4bedad4b1900b930abe2cafd05f9ab92 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,6 +4,7 @@ PARSERS := \ bits \ token \ whitespace \ + ignoreseq \ ch \ action \ charset \ @@ -21,7 +22,8 @@ PARSERS := \ epsilon \ and \ not \ - attr_bool + attr_bool \ + indirect OUTPUTS := bitreader.o \ hammer.o \ diff --git a/src/hammer.c b/src/hammer.c index 82ca235243b90c5c16dda2953e75056d7d382577..d1a3334c77ed43b9945a94c1e7249341bffb77e9 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -385,6 +385,37 @@ static void test_whitespace(void) { g_check_parse_failed(whitespace_, "_a", 2); } +static void test_left(void) { + const HParser *left_ = h_left(h_ch('a'), h_ch(' ')); + + g_check_parse_ok(left_, "a ", 2, "u0x61"); + g_check_parse_failed(left_, "a", 1); + g_check_parse_failed(left_, " ", 1); + g_check_parse_failed(left_, "ab", 2); +} + +static void test_right(void) { + const HParser *right_ = h_right(h_ch(' '), h_ch('a')); + + g_check_parse_ok(right_, " a", 2, "u0x61"); + g_check_parse_failed(right_, "a", 1); + g_check_parse_failed(right_, " ", 1); + g_check_parse_failed(right_, "ba", 2); +} + +static void test_middle(void) { + const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' ')); + + g_check_parse_ok(middle_, " a ", 3, "u0x61"); + g_check_parse_failed(middle_, "a", 1); + g_check_parse_failed(middle_, " ", 1); + g_check_parse_failed(middle_, " a", 2); + g_check_parse_failed(middle_, "a ", 2); + g_check_parse_failed(middle_, " b ", 3); + g_check_parse_failed(middle_, "ba ", 3); + g_check_parse_failed(middle_, " ab", 3); +} + #include <ctype.h> const HParsedToken* upcase(const HParseResult *p) { @@ -434,6 +465,14 @@ static void test_action(void) { g_check_parse_failed(action_, "XX", 2); } +static void test_in(void) { + uint8_t options[3] = { 'a', 'b', 'c' }; + const HParser *in_ = h_in(options, 3); + g_check_parse_ok(in_, "b", 1, "u0x62"); + g_check_parse_failed(in_, "d", 1); + +} + static void test_not_in(void) { uint8_t options[3] = { 'a', 'b', 'c' }; const HParser *not_in_ = h_not_in(options, 3); @@ -606,7 +645,11 @@ void register_parser_tests(void) { g_test_add_func("/core/parser/float32", test_float32); #endif g_test_add_func("/core/parser/whitespace", test_whitespace); + g_test_add_func("/core/parser/left", test_left); + g_test_add_func("/core/parser/right", test_right); + g_test_add_func("/core/parser/middle", test_middle); g_test_add_func("/core/parser/action", test_action); + g_test_add_func("/core/parser/in", test_in); g_test_add_func("/core/parser/not_in", test_not_in); g_test_add_func("/core/parser/end_p", test_end_p); g_test_add_func("/core/parser/nothing_p", test_nothing_p); diff --git a/src/hammer.h b/src/hammer.h index fc3ea5bf122cc1a8cb756981c43b23586fc1d416..a219dfe2024d4b112a1cac9b20cc2a2a611a3765 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -222,6 +222,30 @@ const HParser* h_uint8(); */ const HParser* h_whitespace(const HParser* p); +/** + * Given two parsers, p and q, returns a parser that parses them in + * sequence but only returns p's result. + * + * Result token type: p's result type + */ +const HParser* h_left(const HParser* p, const HParser* q); + +/** + * Given two parsers, p and q, returns a parser that parses them in + * sequence but only returns q's result. + * + * Result token type: q's result type + */ +const HParser* h_right(const HParser* p, const HParser* q); + +/** + * Given three parsers, p, x, and q, returns a parser that parses them in + * sequence but only returns x's result. + * + * Result token type: x's result type + */ +const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q); + /** * Given another parser, p, and a function f, returns a parser that * applies p, then applies f to everything in the AST of p's result. @@ -230,6 +254,13 @@ const HParser* h_whitespace(const HParser* p); */ const HParser* h_action(const HParser* p, const HAction a); +/** + * Parse a single character in the given charset. + * + * Result token type: TT_UINT + */ +const HParser* h_in(const uint8_t *charset, size_t length); + /** * Parse a single character *NOT* in the given charset. * @@ -441,7 +472,7 @@ HParser *h_indirect(); * Set the inner parser of an indirect. See comments on indirect for * details. */ -void h_bind_indirect(HParser* indirect, HParser* inner); +void h_bind_indirect(HParser* indirect, const HParser* inner); /** * Free the memory allocated to an HParseResult when it is no longer needed. diff --git a/src/parsers/charset.c b/src/parsers/charset.c index 6420af501a2b0058731f1d5bd6c6678620378d46..b9642fccd0aef2394f64de70fed56e911a918ad9 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -28,16 +28,24 @@ const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { } -const HParser* h_not_in(const uint8_t *options, size_t count) { +const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) { HParser *ret = g_new(HParser, 1); HCharset cs = new_charset(); for (size_t i = 0; i < 256; i++) - charset_set(cs, i, 1); + charset_set(cs, i, 1-val); for (size_t i = 0; i < count; i++) - charset_set(cs, options[i], 0); + charset_set(cs, options[i], val); ret->vtable = &charset_vt; ret->env = (void*)cs; return (const HParser*)ret; } +const HParser* h_in(const uint8_t *options, size_t count) { + return h_in_or_not(options, count, 1); +} + +const HParser* h_not_in(const uint8_t *options, size_t count) { + return h_in_or_not(options, count, 0); +} + diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c new file mode 100644 index 0000000000000000000000000000000000000000..8aac2c82c5f09658c9962860f7f625bff70523f2 --- /dev/null +++ b/src/parsers/ignoreseq.c @@ -0,0 +1,73 @@ +#include "parser_internal.h" + + +// +// general case: parse sequence, pick one result +// + +typedef struct { + const HParser **parsers; + size_t count; // how many parsers in 'ps' + size_t which; // whose result to return +} HIgnoreSeq; + +static HParseResult* parse_ignoreseq(void* env, HParseState *state) { + const HIgnoreSeq *seq = (HIgnoreSeq*)env; + HParseResult *res = NULL; + + for (size_t i=0; i < seq->count; ++i) { + HParseResult *tmp = h_do_parse(seq->parsers[i], state); + if (!tmp) + return NULL; + else if (i == seq->which) + res = tmp; + } + + return res; +} + +static const HParserVtable ignoreseq_vt = { + .parse = parse_ignoreseq, +}; + + +// +// API frontends +// + +static const HParser* h_leftright(const HParser* p, const HParser* q, size_t which) { + HIgnoreSeq *seq = g_new(HIgnoreSeq, 1); + seq->parsers = g_new(const HParser*, 2); + seq->parsers[0] = p; + seq->parsers[1] = q; + seq->count = 2; + seq->which = which; + + HParser *ret = g_new(HParser, 1); + ret->vtable = &ignoreseq_vt; + ret->env = (void*)seq; + return ret; +} + +const HParser* h_left(const HParser* p, const HParser* q) { + return h_leftright(p, q, 0); +} + +const HParser* h_right(const HParser* p, const HParser* q) { + return h_leftright(p, q, 1); +} + +const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) { + HIgnoreSeq *seq = g_new(HIgnoreSeq, 1); + seq->parsers = g_new(const HParser*, 3); + seq->parsers[0] = p; + seq->parsers[1] = x; + seq->parsers[2] = q; + seq->count = 3; + seq->which = 1; + + HParser *ret = g_new(HParser, 1); + ret->vtable = &ignoreseq_vt; + ret->env = (void*)seq; + return ret; +} diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 43657c3bbdfe2c04dced5b3eae1bda2fef3ab26e..758116de640ef9a179b934051d7b6421ce4e33cf 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -7,9 +7,9 @@ static const HParserVtable indirect_vt = { .parse = parse_indirect, }; -void h_bind_indirect(HParser* indirect, HParser* inner) { +void h_bind_indirect(HParser* indirect, const HParser* inner) { assert_message(indirect->vtable == &indirect_vt, "You can only bind an indirect parser"); - indirect->env = inner; + indirect->env = (void*)inner; } HParser* h_indirect() {