diff --git a/src/Makefile b/src/Makefile index d362118c293cd5d66cfc2a0e1e1e69559de1dc15..9c69124241ca1148c6152c6d49344c5029cfc0b5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,11 +1,36 @@ +PARSERS := \ + unimplemented \ + bits \ + token \ + whitespace \ + ch \ + action \ + charset \ + int_range \ + sequence \ + choice \ + nothing \ + end \ + butnot \ + difference \ + many \ + xor \ + optional \ + ignore \ + epsilon \ + and \ + not \ + attr_bool + OUTPUTS := bitreader.o \ hammer.o \ libhammer.a \ pprint.o \ allocator.o \ datastructures.o \ - test_suite + test_suite \ + $(PARSERS:%=parsers/%.o) TOPLEVEL := ../ @@ -17,7 +42,8 @@ all: libhammer.a test_suite test_suite: test_suite.o libhammer.a $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o +libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o \ + $(PARSERS:%=parsers/%.o) bitreader.o: test_suite.h hammer.o: hammer.h diff --git a/src/hammer.c b/src/hammer.c index 6d64cd0a1d46c9c0d8342f73e03f7add3cbed649..356a9bcb2602b598e4a903f60bcf6fc9138c3eb5 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -15,19 +15,16 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "hammer.h" -#include "internal.h" -#include "allocator.h" #include <assert.h> #include <ctype.h> #include <error.h> #include <limits.h> #include <stdarg.h> #include <string.h> - -#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count))) -#define a_new(typ, count) a_new_(state->arena, typ, count) -// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out. +#include "hammer.h" +#include "internal.h" +#include "allocator.h" +#include "parsers/parser_internal.h" static guint djbhash(const uint8_t *buf, size_t len) { guint hash = 5381; @@ -54,7 +51,7 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { if (g_slist_find(head->eval_set, k->parser)) { // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. head->eval_set = g_slist_remove_all(head->eval_set, k->parser); - HParseResult *tmp_res = k->parser->fn(k->parser->env, state); + HParseResult *tmp_res = k->parser->vtable->parse(k->parser->env, state); if (tmp_res) tmp_res->arena = state->arena; // we know that cached has an entry here, modify it @@ -102,7 +99,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) head->eval_set = head->involved_set; HParseResult *tmp_res; if (k->parser) { - tmp_res = k->parser->fn(k->parser->env, state); + tmp_res = k->parser->vtable->parse(k->parser->env, state); if (tmp_res) tmp_res->arena = state->arena; } else @@ -170,7 +167,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { HParseResult *tmp_res; if (parser) { HInputStream bak = state->input_stream; - tmp_res = parser->fn(parser->env, state); + tmp_res = parser->vtable->parse(parser->env, state); if (tmp_res) { tmp_res->arena = state->arena; if (!state->input_stream.overrun) { @@ -217,760 +214,12 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { } /* Helper function, since these lines appear in every parser */ -static HParseResult* make_result(HParseState *state, HParsedToken *tok) { - HParseResult *ret = a_new(HParseResult, 1); - ret->ast = tok; - ret->arena = state->arena; - return ret; -} - -typedef struct { - uint8_t *str; - uint8_t len; -} HToken; - - static HParseResult* parse_unimplemented(void* env, HParseState *state) { - (void) env; - (void) state; - static HParsedToken token = { - .token_type = TT_ERR - }; - static HParseResult result = { - .ast = &token - }; - return &result; -} - -static HParser unimplemented __attribute__((unused)) = { - .fn = parse_unimplemented, - .env = NULL -}; - -struct bits_env { - uint8_t length; - uint8_t signedp; -}; - -static HParseResult* parse_bits(void* env, HParseState *state) { - struct bits_env *env_ = env; - HParsedToken *result = a_new(HParsedToken, 1); - result->token_type = (env_->signedp ? TT_SINT : TT_UINT); - if (env_->signedp) - result->sint = h_read_bits(&state->input_stream, env_->length, true); - else - result->uint = h_read_bits(&state->input_stream, env_->length, false); - return make_result(state, result); -} - -const HParser* h_bits(size_t len, bool sign) { - struct bits_env *env = g_new(struct bits_env, 1); - env->length = len; - env->signedp = sign; - HParser *res = g_new(HParser, 1); - res->fn = parse_bits; - res->env = env; - return res; -} - -#define SIZED_BITS(name_pre, len, signedp) \ - const HParser* h_##name_pre##len () { \ - return h_bits(len, signedp); \ - } -SIZED_BITS(int, 8, true) -SIZED_BITS(int, 16, true) -SIZED_BITS(int, 32, true) -SIZED_BITS(int, 64, true) -SIZED_BITS(uint, 8, false) -SIZED_BITS(uint, 16, false) -SIZED_BITS(uint, 32, false) -SIZED_BITS(uint, 64, false) - -static HParseResult* parse_token(void *env, HParseState *state) { - HToken *t = (HToken*)env; - for (int i=0; i<t->len; ++i) { - uint8_t chr = (uint8_t)h_read_bits(&state->input_stream, 8, false); - if (t->str[i] != chr) { - return NULL; - } - } - HParsedToken *tok = a_new(HParsedToken, 1); - tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len; - return make_result(state, tok); -} - -const HParser* h_token(const uint8_t *str, const size_t len) { - HToken *t = g_new(HToken, 1); - t->str = (uint8_t*)str, t->len = len; - HParser *ret = g_new(HParser, 1); - ret->fn = parse_token; ret->env = t; - return (const HParser*)ret; -} - -static HParseResult* parse_ch(void* env, HParseState *state) { - uint8_t c = (uint8_t)GPOINTER_TO_UINT(env); - uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false); - if (c == r) { - HParsedToken *tok = a_new(HParsedToken, 1); - tok->token_type = TT_UINT; tok->uint = r; - return make_result(state, tok); - } else { - return NULL; - } -} - -const HParser* h_ch(const uint8_t c) { - HParser *ret = g_new(HParser, 1); - ret->fn = parse_ch; ret->env = GUINT_TO_POINTER(c); - return (const HParser*)ret; -} - -static HParseResult* parse_whitespace(void* env, HParseState *state) { - char c; - HInputStream bak; - do { - bak = state->input_stream; - c = h_read_bits(&state->input_stream, 8, false); - if (state->input_stream.overrun) - return NULL; - } while (isspace(c)); - state->input_stream = bak; - return h_do_parse((HParser*)env, state); -} - -const HParser* h_whitespace(const HParser* p) { - HParser *ret = g_new(HParser, 1); - ret->fn = parse_whitespace; - ret->env = (void*)p; - return ret; -} - -typedef struct { - const HParser *p; - HAction action; -} HParseAction; - -static HParseResult* parse_action(void *env, HParseState *state) { - HParseAction *a = (HParseAction*)env; - if (a->p && a->action) { - HParseResult *tmp = h_do_parse(a->p, state); - //HParsedToken *tok = a->action(h_do_parse(a->p, state)); - const HParsedToken *tok = a->action(tmp); - return make_result(state, (HParsedToken*)tok); - } else // either the parser's missing or the action's missing - return NULL; -} - -const HParser* h_action(const HParser* p, const HAction a) { - HParser *res = g_new(HParser, 1); - res->fn = parse_action; - HParseAction *env = g_new(HParseAction, 1); - env->p = p; - env->action = a; - res->env = (void*)env; - return res; -} - -static HParseResult* parse_charset(void *env, HParseState *state) { - uint8_t in = h_read_bits(&state->input_stream, 8, false); - HCharset cs = (HCharset)env; - - if (charset_isset(cs, in)) { - HParsedToken *tok = a_new(HParsedToken, 1); - tok->token_type = TT_UINT; tok->uint = in; - return make_result(state, tok); - } else - return NULL; -} - -const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { - HParser *ret = g_new(HParser, 1); - HCharset cs = new_charset(); - for (int i = 0; i < 256; i++) - charset_set(cs, i, (lower <= i) && (i <= upper)); - ret->fn = parse_charset; ret->env = (void*)cs; - return (const HParser*)ret; -} - -typedef struct { - const HParser *p; - int64_t lower; - int64_t upper; -} HRange; - -static HParseResult* parse_int_range(void *env, HParseState *state) { - HRange *r_env = (HRange*)env; - HParseResult *ret = h_do_parse(r_env->p, state); - if (!ret || !ret->ast) - return NULL; - switch(ret->ast->token_type) { - case TT_SINT: - if (r_env->lower <= ret->ast->sint && r_env->upper >= ret->ast->sint) - return ret; - else - return NULL; - case TT_UINT: - if ((uint64_t)r_env->lower <= ret->ast->uint && (uint64_t)r_env->upper >= ret->ast->uint) - return ret; - else - return NULL; - default: - return NULL; - } -} - -const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { - struct bits_env *b_env = p->env; - // p must be an integer parser, which means it's using parse_bits - assert_message(p->fn == parse_bits, "int_range requires an integer parser"); - // if it's a uint parser, it can't be uint64 - assert_message(!(b_env->signedp) ? (b_env->length < 64) : true, "int_range can't use a uint64 parser"); - // and regardless, the bounds need to fit in the parser in question - switch(b_env->length) { - case 32: - if (b_env->signedp) - assert_message(lower >= INT_MIN && upper <= INT_MAX, "bounds for 32-bit signed integer exceeded"); - else - assert_message(lower >= 0 && upper <= UINT_MAX, "bounds for 32-bit unsigned integer exceeded"); - break; - case 16: - if (b_env->signedp) - assert_message(lower >= SHRT_MIN && upper <= SHRT_MAX, "bounds for 16-bit signed integer exceeded"); - else - assert_message(lower >= 0 && upper <= USHRT_MAX, "bounds for 16-bit unsigned integer exceeded"); - break; - case 8: - if (b_env->signedp) - assert_message(lower >= SCHAR_MIN && upper <= SCHAR_MAX, "bounds for 8-bit signed integer exceeded"); - else - assert_message(lower >= 0 && upper <= UCHAR_MAX, "bounds for 8-bit unsigned integer exceeded"); - break; - default: - // how'd that happen? if we got here, this parser is broken. - return NULL; - } - - HRange *r_env = g_new(HRange, 1); - r_env->p = p; - r_env->lower = lower; - r_env->upper = upper; - HParser *ret = g_new(HParser, 1); - ret->fn = parse_int_range; - ret->env = (void*)r_env; - return ret; -} - -const HParser* h_not_in(const uint8_t *options, int count) { - HParser *ret = g_new(HParser, 1); - HCharset cs = new_charset(); - for (int i = 0; i < 256; i++) - charset_set(cs, i, 1); - for (int i = 0; i < count; i++) - charset_set(cs, options[i], 0); - - ret->fn = parse_charset; ret->env = (void*)cs; - return (const HParser*)ret; -} - -static HParseResult* parse_end(void *env, HParseState *state) { - if (state->input_stream.index == state->input_stream.length) { - HParseResult *ret = a_new(HParseResult, 1); - ret->ast = NULL; - return ret; - } else { - return NULL; - } -} - -const HParser* h_end_p() { - HParser *ret = g_new(HParser, 1); - ret->fn = parse_end; ret->env = NULL; - return (const HParser*)ret; -} - -static HParseResult* parse_nothing() { - // not a mistake, this parser always fails - return NULL; -} - -const HParser* h_nothing_p() { - HParser *ret = g_new(HParser, 1); - ret->fn = parse_nothing; ret->env = NULL; - return (const HParser*)ret; -} - -typedef struct { - size_t len; - const HParser **p_array; -} HSequence; - -static HParseResult* parse_sequence(void *env, HParseState *state) { - HSequence *s = (HSequence*)env; - HCountedArray *seq = h_carray_new_sized(state->arena, (s->len > 0) ? s->len : 4); - for (size_t i=0; i<s->len; ++i) { - HParseResult *tmp = h_do_parse(s->p_array[i], state); - // if the interim parse fails, the whole thing fails - if (NULL == tmp) { - return NULL; - } else { - if (tmp->ast) - h_carray_append(seq, (void*)tmp->ast); - } - } - HParsedToken *tok = a_new(HParsedToken, 1); - tok->token_type = TT_SEQUENCE; tok->seq = seq; - return make_result(state, tok); -} - -const HParser* h_sequence(const HParser *p, ...) { - va_list ap; - size_t len = 0; - const HParser *arg; - va_start(ap, p); - do { - len++; - arg = va_arg(ap, const HParser *); - } while (arg); - va_end(ap); - HSequence *s = g_new(HSequence, 1); - s->p_array = g_new(const HParser *, len); - - va_start(ap, p); - s->p_array[0] = p; - for (size_t i = 1; i < len; i++) { - s->p_array[i] = va_arg(ap, const HParser *); - } while (arg); - va_end(ap); - - s->len = len; - HParser *ret = g_new(HParser, 1); - ret->fn = parse_sequence; ret->env = (void*)s; - return ret; -} - -static HParseResult* parse_choice(void *env, HParseState *state) { - HSequence *s = (HSequence*)env; - HInputStream backup = state->input_stream; - for (size_t i=0; i<s->len; ++i) { - if (i != 0) - state->input_stream = backup; - HParseResult *tmp = h_do_parse(s->p_array[i], state); - if (NULL != tmp) - return tmp; - } - // nothing succeeded, so fail - return NULL; -} - -const HParser* h_choice(const HParser* p, ...) { - va_list ap; - size_t len = 0; - HSequence *s = g_new(HSequence, 1); - - const HParser *arg; - va_start(ap, p); - do { - len++; - arg = va_arg(ap, const HParser *); - } while (arg); - va_end(ap); - s->p_array = g_new(const HParser *, len); - - va_start(ap, p); - s->p_array[0] = p; - for (size_t i = 1; i < len; i++) { - s->p_array[i] = va_arg(ap, const HParser *); - } while (arg); - va_end(ap); - - s->len = len; - HParser *ret = g_new(HParser, 1); - ret->fn = parse_choice; ret->env = (void*)s; - return ret; -} typedef struct { const HParser *p1; const HParser *p2; } HTwoParsers; -// return token size in bits... -size_t token_length(HParseResult *pr) { - if (pr) { - return pr->bit_length; - } else { - return 0; - } -} - -static HParseResult* parse_butnot(void *env, HParseState *state) { - HTwoParsers *parsers = (HTwoParsers*)env; - // cache the initial state of the input stream - HInputStream start_state = state->input_stream; - HParseResult *r1 = h_do_parse(parsers->p1, state); - // if p1 failed, bail out early - if (NULL == r1) { - return NULL; - } - // cache the state after parse #1, since we might have to back up to it - HInputStream after_p1_state = state->input_stream; - state->input_stream = start_state; - HParseResult *r2 = h_do_parse(parsers->p2, state); - // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases - state->input_stream = after_p1_state; - // if p2 failed, restore post-p1 state and bail out early - if (NULL == r2) { - return r1; - } - size_t r1len = token_length(r1); - size_t r2len = token_length(r2); - // if both match but p1's text is shorter than than p2's (or the same length), fail - if (r1len <= r2len) { - return NULL; - } else { - return r1; - } -} - -const HParser* h_butnot(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); - env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); - ret->fn = parse_butnot; ret->env = (void*)env; - return ret; -} - -static HParseResult* parse_difference(void *env, HParseState *state) { - HTwoParsers *parsers = (HTwoParsers*)env; - // cache the initial state of the input stream - HInputStream start_state = state->input_stream; - HParseResult *r1 = h_do_parse(parsers->p1, state); - // if p1 failed, bail out early - if (NULL == r1) { - return NULL; - } - // cache the state after parse #1, since we might have to back up to it - HInputStream after_p1_state = state->input_stream; - state->input_stream = start_state; - HParseResult *r2 = h_do_parse(parsers->p2, state); - // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases - state->input_stream = after_p1_state; - // if p2 failed, restore post-p1 state and bail out early - if (NULL == r2) { - return r1; - } - size_t r1len = token_length(r1); - size_t r2len = token_length(r2); - // if both match but p1's text is shorter than p2's, fail - if (r1len < r2len) { - return NULL; - } else { - return r1; - } -} - -const HParser* h_difference(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); - env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); - ret->fn = parse_difference; ret->env = (void*)env; - return ret; -} - -static HParseResult* parse_xor(void *env, HParseState *state) { - HTwoParsers *parsers = (HTwoParsers*)env; - // cache the initial state of the input stream - HInputStream start_state = state->input_stream; - HParseResult *r1 = h_do_parse(parsers->p1, state); - HInputStream after_p1_state = state->input_stream; - // reset input stream, parse again - state->input_stream = start_state; - HParseResult *r2 = h_do_parse(parsers->p2, state); - if (NULL == r1) { - if (NULL != r2) { - return r2; - } else { - return NULL; - } - } else { - if (NULL == r2) { - state->input_stream = after_p1_state; - return r1; - } else { - return NULL; - } - } -} - -const HParser* h_xor(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); - env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); - ret->fn = parse_xor; ret->env = (void*)env; - return ret; -} - -typedef struct { - const HParser *p, *sep; - size_t count; - bool min_p; -} HRepeat; - -static HParseResult *parse_many(void* env, HParseState *state) { - HRepeat *env_ = (HRepeat*) env; - HCountedArray *seq = h_carray_new_sized(state->arena, (env_->count > 0 ? env_->count : 4)); - size_t count = 0; - HInputStream bak; - while (env_->min_p || env_->count > count) { - bak = state->input_stream; - if (count > 0) { - HParseResult *sep = h_do_parse(env_->sep, state); - if (!sep) - goto err0; - } - HParseResult *elem = h_do_parse(env_->p, state); - if (!elem) - goto err0; - if (elem->ast) - h_carray_append(seq, (void*)elem->ast); - count++; - } - if (count < env_->count) - goto err; - succ: - ; // necessary for the label to be here... - HParsedToken *res = a_new(HParsedToken, 1); - res->token_type = TT_SEQUENCE; - res->seq = seq; - return make_result(state, res); - err0: - if (count >= env_->count) { - state->input_stream = bak; - goto succ; - } - err: - state->input_stream = bak; - return NULL; -} - -const HParser* h_many(const HParser* p) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); - env->p = p; - env->sep = h_epsilon_p(); - env->count = 0; - env->min_p = true; - res->fn = parse_many; - res->env = env; - return res; -} - -const HParser* h_many1(const HParser* p) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); - env->p = p; - env->sep = h_epsilon_p(); - env->count = 1; - env->min_p = true; - res->fn = parse_many; - res->env = env; - return res; -} - -const HParser* h_repeat_n(const HParser* p, const size_t n) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); - env->p = p; - env->sep = h_epsilon_p(); - env->count = n; - env->min_p = false; - res->fn = parse_many; - res->env = env; - return res; -} - -static HParseResult* parse_ignore(void* env, HParseState* state) { - HParseResult *res0 = h_do_parse((HParser*)env, state); - if (!res0) - return NULL; - HParseResult *res = a_new(HParseResult, 1); - res->ast = NULL; - res->arena = state->arena; - return res; -} -const HParser* h_ignore(const HParser* p) { - HParser* ret = g_new(HParser, 1); - ret->fn = parse_ignore; - ret->env = (void*)p; - return ret; -} - -static HParseResult* parse_optional(void* env, HParseState* state) { - HInputStream bak = state->input_stream; - HParseResult *res0 = h_do_parse((HParser*)env, state); - if (res0) - return res0; - state->input_stream = bak; - HParsedToken *ast = a_new(HParsedToken, 1); - ast->token_type = TT_NONE; - return make_result(state, ast); -} - -const HParser* h_optional(const HParser* p) { - assert_message(p->fn != parse_ignore, "Thou shalt ignore an option, rather than the other way 'round."); - HParser *ret = g_new(HParser, 1); - ret->fn = parse_optional; - ret->env = (void*)p; - return ret; -} - -const HParser* h_sepBy(const HParser* p, const HParser* sep) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); - env->p = p; - env->sep = sep; - env->count = 0; - env->min_p = true; - res->fn = parse_many; - res->env = env; - return res; -} - -const HParser* h_sepBy1(const HParser* p, const HParser* sep) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); - env->p = p; - env->sep = sep; - env->count = 1; - env->min_p = true; - res->fn = parse_many; - res->env = env; - return res; -} - -static HParseResult* parse_epsilon(void* env, HParseState* state) { - (void)env; - HParseResult* res = a_new(HParseResult, 1); - res->ast = NULL; - res->arena = state->arena; - return res; -} - -const HParser* h_epsilon_p() { - HParser *res = g_new(HParser, 1); - res->fn = parse_epsilon; - res->env = NULL; - return res; -} - -static HParseResult* parse_indirect(void* env, HParseState* state) { - return h_do_parse(env, state); -} -void h_bind_indirect(HParser* indirect, HParser* inner) { - indirect->env = inner; -} - -HParser* h_indirect() { - HParser *res = g_new(HParser, 1); - res->fn = parse_indirect; - res->env = NULL; - return res; -} - -typedef struct { - const HParser *p; - HPredicate pred; -} HAttrBool; - -static HParseResult* parse_attr_bool(void *env, HParseState *state) { - HAttrBool *a = (HAttrBool*)env; - HParseResult *res = h_do_parse(a->p, state); - if (res && res->ast) { - if (a->pred(res)) - return res; - else - return NULL; - } else - return NULL; -} - -const HParser* h_attr_bool(const HParser* p, HPredicate pred) { - HParser *res = g_new(HParser, 1); - res->fn = parse_attr_bool; - HAttrBool *env = g_new(HAttrBool, 1); - env->p = p; - env->pred = pred; - res->env = (void*)env; - return res; -} - -typedef struct { - const HParser *length; - const HParser *value; -} HLenVal; - -static HParseResult* parse_length_value(void *env, HParseState *state) { - HLenVal *lv = (HLenVal*)env; - HParseResult *len = h_do_parse(lv->length, state); - if (!len) - return NULL; - if (len->ast->token_type != TT_UINT) - errx(1, "Length parser must return an unsigned integer"); - HParser epsilon_local = { - .fn = parse_epsilon, - .env = NULL - }; - HRepeat repeat = { - .p = lv->value, - .sep = &epsilon_local, - .count = len->ast->uint, - .min_p = false - }; - return parse_many(&repeat, state); -} - -const HParser* h_length_value(const HParser* length, const HParser* value) { - HParser *res = g_new(HParser, 1); - res->fn = parse_length_value; - HLenVal *env = g_new(HLenVal, 1); - env->length = length; - env->value = value; - res->env = (void*)env; - return res; -} - -static HParseResult *parse_and(void* env, HParseState* state) { - HInputStream bak = state->input_stream; - HParseResult *res = h_do_parse((HParser*)env, state); - state->input_stream = bak; - if (res) - return make_result(state, NULL); - return NULL; -} - -const HParser* h_and(const HParser* p) { - // zero-width postive lookahead - HParser *res = g_new(HParser, 1); - res->env = (void*)p; - res->fn = parse_and; - return res; -} - -static HParseResult* parse_not(void* env, HParseState* state) { - HInputStream bak = state->input_stream; - if (h_do_parse((HParser*)env, state)) - return NULL; - else { - state->input_stream = bak; - return make_result(state, NULL); - } -} - -const HParser* h_not(const HParser* p) { - HParser *res = g_new(HParser, 1); - res->fn = parse_not; - res->env = (void*)p; - return res; -} static guint cache_key_hash(gconstpointer key) { return djbhash(key, sizeof(HParserCacheKey)); diff --git a/src/hammer.h b/src/hammer.h index af9db6e38d73f37bd338f42746d01f8de90263d6..95da0eeda926d456ed3cba061a5b7571b4e99c14 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -98,8 +98,12 @@ typedef const HParsedToken* (*HAction)(const HParseResult *p); */ typedef bool (*HPredicate)(HParseResult *p); +typedef struct HParserVtable_ { + HParseResult* (*parse)(void *env, HParseState *state); +} HParserVtable; + typedef struct HParser_ { - HParseResult* (*fn)(void *env, HParseState *state); + const HParserVtable *vtable; void *env; } HParser; diff --git a/src/internal.h b/src/internal.h index 300241fca53c723728ec40d6be4e66f9529089ae..73956328454ab58bcb61c527766184d3f358b416 100644 --- a/src/internal.h +++ b/src/internal.h @@ -148,6 +148,7 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size); HCountedArray *h_carray_new(HArena * arena); void h_carray_append(HCountedArray *array, void* item); + #if 0 #include <malloc.h> #define arena_malloc(a, s) malloc(s) diff --git a/src/parsers/action.c b/src/parsers/action.c new file mode 100644 index 0000000000000000000000000000000000000000..c5b89f9a11154c158359e164a665f0b2ee5d186c --- /dev/null +++ b/src/parsers/action.c @@ -0,0 +1,31 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p; + HAction action; +} HParseAction; + +static HParseResult* parse_action(void *env, HParseState *state) { + HParseAction *a = (HParseAction*)env; + if (a->p && a->action) { + HParseResult *tmp = h_do_parse(a->p, state); + //HParsedToken *tok = a->action(h_do_parse(a->p, state)); + const HParsedToken *tok = a->action(tmp); + return make_result(state, (HParsedToken*)tok); + } else // either the parser's missing or the action's missing + return NULL; +} + +static const HParserVtable action_vt = { + .parse = parse_action, +}; + +const HParser* h_action(const HParser* p, const HAction a) { + HParser *res = g_new(HParser, 1); + res->vtable = &action_vt; + HParseAction *env = g_new(HParseAction, 1); + env->p = p; + env->action = a; + res->env = (void*)env; + return res; +} diff --git a/src/parsers/and.c b/src/parsers/and.c new file mode 100644 index 0000000000000000000000000000000000000000..fb117fb153a482794d9c4c04bdb0c5caab182af8 --- /dev/null +++ b/src/parsers/and.c @@ -0,0 +1,22 @@ +#include "parser_internal.h" + +static HParseResult *parse_and(void* env, HParseState* state) { + HInputStream bak = state->input_stream; + HParseResult *res = h_do_parse((HParser*)env, state); + state->input_stream = bak; + if (res) + return make_result(state, NULL); + return NULL; +} + +static const HParserVtable and_vt = { + .parse = parse_and, +}; + +const HParser* h_and(const HParser* p) { + // zero-width postive lookahead + HParser *res = g_new(HParser, 1); + res->env = (void*)p; + res->vtable = &and_vt; + return res; +} diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c new file mode 100644 index 0000000000000000000000000000000000000000..bf9e6dcffa34ba8eb251b10bae0a60b8ae444e88 --- /dev/null +++ b/src/parsers/attr_bool.c @@ -0,0 +1,32 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p; + HPredicate pred; +} HAttrBool; + +static HParseResult* parse_attr_bool(void *env, HParseState *state) { + HAttrBool *a = (HAttrBool*)env; + HParseResult *res = h_do_parse(a->p, state); + if (res && res->ast) { + if (a->pred(res)) + return res; + else + return NULL; + } else + return NULL; +} + +static const HParserVtable attr_bool_vt = { + .parse = parse_attr_bool, +}; + +const HParser* h_attr_bool(const HParser* p, HPredicate pred) { + HParser *res = g_new(HParser, 1); + res->vtable = &attr_bool_vt; + HAttrBool *env = g_new(HAttrBool, 1); + env->p = p; + env->pred = pred; + res->env = (void*)env; + return res; +} diff --git a/src/parsers/bits.c b/src/parsers/bits.c new file mode 100644 index 0000000000000000000000000000000000000000..32b7a552b8bfb236d44ced877f2bd88cd4dd8401 --- /dev/null +++ b/src/parsers/bits.c @@ -0,0 +1,43 @@ +#include "parser_internal.h" + +struct bits_env { + uint8_t length; + uint8_t signedp; +}; + +static HParseResult* parse_bits(void* env, HParseState *state) { + struct bits_env *env_ = env; + HParsedToken *result = a_new(HParsedToken, 1); + result->token_type = (env_->signedp ? TT_SINT : TT_UINT); + if (env_->signedp) + result->sint = h_read_bits(&state->input_stream, env_->length, true); + else + result->uint = h_read_bits(&state->input_stream, env_->length, false); + return make_result(state, result); +} + +static const HParserVtable bits_vt = { + .parse = parse_bits, +}; +const HParser* h_bits(size_t len, bool sign) { + struct bits_env *env = g_new(struct bits_env, 1); + env->length = len; + env->signedp = sign; + HParser *res = g_new(HParser, 1); + res->vtable = &bits_vt; + res->env = env; + return res; +} + +#define SIZED_BITS(name_pre, len, signedp) \ + const HParser* h_##name_pre##len () { \ + return h_bits(len, signedp); \ + } +SIZED_BITS(int, 8, true) +SIZED_BITS(int, 16, true) +SIZED_BITS(int, 32, true) +SIZED_BITS(int, 64, true) +SIZED_BITS(uint, 8, false) +SIZED_BITS(uint, 16, false) +SIZED_BITS(uint, 32, false) +SIZED_BITS(uint, 64, false) diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c new file mode 100644 index 0000000000000000000000000000000000000000..5026d79d0df540530b6879db8947acb8bbb11e9d --- /dev/null +++ b/src/parsers/butnot.c @@ -0,0 +1,49 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p1; + const HParser *p2; +} HTwoParsers; + + +static HParseResult* parse_butnot(void *env, HParseState *state) { + HTwoParsers *parsers = (HTwoParsers*)env; + // cache the initial state of the input stream + HInputStream start_state = state->input_stream; + HParseResult *r1 = h_do_parse(parsers->p1, state); + // if p1 failed, bail out early + if (NULL == r1) { + return NULL; + } + // cache the state after parse #1, since we might have to back up to it + HInputStream after_p1_state = state->input_stream; + state->input_stream = start_state; + HParseResult *r2 = h_do_parse(parsers->p2, state); + // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases + state->input_stream = after_p1_state; + // if p2 failed, restore post-p1 state and bail out early + if (NULL == r2) { + return r1; + } + size_t r1len = token_length(r1); + size_t r2len = token_length(r2); + // if both match but p1's text is shorter than than p2's (or the same length), fail + if (r1len <= r2len) { + return NULL; + } else { + return r1; + } +} + +static const HParserVtable butnot_vt = { + .parse = parse_butnot, +}; + +const HParser* h_butnot(const HParser* p1, const HParser* p2) { + HTwoParsers *env = g_new(HTwoParsers, 1); + env->p1 = p1; env->p2 = p2; + HParser *ret = g_new(HParser, 1); + ret->vtable = &butnot_vt; ret->env = (void*)env; + return ret; +} + diff --git a/src/parsers/ch.c b/src/parsers/ch.c new file mode 100644 index 0000000000000000000000000000000000000000..fbfa57edaabdc4b45edf2e19b0a4643fe4ac30ed --- /dev/null +++ b/src/parsers/ch.c @@ -0,0 +1,23 @@ +#include "parser_internal.h" + +static HParseResult* parse_ch(void* env, HParseState *state) { + uint8_t c = (uint8_t)GPOINTER_TO_UINT(env); + uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false); + if (c == r) { + HParsedToken *tok = a_new(HParsedToken, 1); + tok->token_type = TT_UINT; tok->uint = r; + return make_result(state, tok); + } else { + return NULL; + } +} + +static const HParserVtable ch_vt = { + .parse = parse_ch, +}; +const HParser* h_ch(const uint8_t c) { + HParser *ret = g_new(HParser, 1); + ret->vtable = &ch_vt; + ret->env = GUINT_TO_POINTER(c); + return (const HParser*)ret; +} diff --git a/src/parsers/charset.c b/src/parsers/charset.c new file mode 100644 index 0000000000000000000000000000000000000000..09542fc52126af07ea4884248c22cf4085952d60 --- /dev/null +++ b/src/parsers/charset.c @@ -0,0 +1,43 @@ +#include "parser_internal.h" + + +static HParseResult* parse_charset(void *env, HParseState *state) { + uint8_t in = h_read_bits(&state->input_stream, 8, false); + HCharset cs = (HCharset)env; + + if (charset_isset(cs, in)) { + HParsedToken *tok = a_new(HParsedToken, 1); + tok->token_type = TT_UINT; tok->uint = in; + return make_result(state, tok); + } else + return NULL; +} + +static const HParserVtable charset_vt = { + .parse = parse_charset, +}; + +const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { + HParser *ret = g_new(HParser, 1); + HCharset cs = new_charset(); + for (int i = 0; i < 256; i++) + charset_set(cs, i, (lower <= i) && (i <= upper)); + ret->vtable = &charset_vt; + ret->env = (void*)cs; + return (const HParser*)ret; +} + + +const HParser* h_not_in(const uint8_t *options, int count) { + HParser *ret = g_new(HParser, 1); + HCharset cs = new_charset(); + for (int i = 0; i < 256; i++) + charset_set(cs, i, 1); + for (int i = 0; i < count; i++) + charset_set(cs, options[i], 0); + + ret->vtable = &charset_vt; + ret->env = (void*)cs; + return (const HParser*)ret; +} + diff --git a/src/parsers/choice.c b/src/parsers/choice.c new file mode 100644 index 0000000000000000000000000000000000000000..082a2e10f343b1b97ddc09ef4958193b6e6adc65 --- /dev/null +++ b/src/parsers/choice.c @@ -0,0 +1,53 @@ +#include "parser_internal.h" + +typedef struct { + size_t len; + const HParser **p_array; +} HSequence; + + +static HParseResult* parse_choice(void *env, HParseState *state) { + HSequence *s = (HSequence*)env; + HInputStream backup = state->input_stream; + for (size_t i=0; i<s->len; ++i) { + if (i != 0) + state->input_stream = backup; + HParseResult *tmp = h_do_parse(s->p_array[i], state); + if (NULL != tmp) + return tmp; + } + // nothing succeeded, so fail + return NULL; +} + +static const HParserVtable choice_vt = { + .parse = parse_choice, +}; + +const HParser* h_choice(const HParser* p, ...) { + va_list ap; + size_t len = 0; + HSequence *s = g_new(HSequence, 1); + + const HParser *arg; + va_start(ap, p); + do { + len++; + arg = va_arg(ap, const HParser *); + } while (arg); + va_end(ap); + s->p_array = g_new(const HParser *, len); + + va_start(ap, p); + s->p_array[0] = p; + for (size_t i = 1; i < len; i++) { + s->p_array[i] = va_arg(ap, const HParser *); + } while (arg); + va_end(ap); + + s->len = len; + HParser *ret = g_new(HParser, 1); + ret->vtable = &choice_vt; ret->env = (void*)s; + return ret; +} + diff --git a/src/parsers/difference.c b/src/parsers/difference.c new file mode 100644 index 0000000000000000000000000000000000000000..7f167a003fc3edd69d9ffdadaf97466a5a5bc97a --- /dev/null +++ b/src/parsers/difference.c @@ -0,0 +1,47 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p1; + const HParser *p2; +} HTwoParsers; + +static HParseResult* parse_difference(void *env, HParseState *state) { + HTwoParsers *parsers = (HTwoParsers*)env; + // cache the initial state of the input stream + HInputStream start_state = state->input_stream; + HParseResult *r1 = h_do_parse(parsers->p1, state); + // if p1 failed, bail out early + if (NULL == r1) { + return NULL; + } + // cache the state after parse #1, since we might have to back up to it + HInputStream after_p1_state = state->input_stream; + state->input_stream = start_state; + HParseResult *r2 = h_do_parse(parsers->p2, state); + // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases + state->input_stream = after_p1_state; + // if p2 failed, restore post-p1 state and bail out early + if (NULL == r2) { + return r1; + } + size_t r1len = token_length(r1); + size_t r2len = token_length(r2); + // if both match but p1's text is shorter than p2's, fail + if (r1len < r2len) { + return NULL; + } else { + return r1; + } +} + +static HParserVtable difference_vt = { + .parse = parse_difference, +}; + +const HParser* h_difference(const HParser* p1, const HParser* p2) { + HTwoParsers *env = g_new(HTwoParsers, 1); + env->p1 = p1; env->p2 = p2; + HParser *ret = g_new(HParser, 1); + ret->vtable = &difference_vt; ret->env = (void*)env; + return ret; +} diff --git a/src/parsers/end.c b/src/parsers/end.c new file mode 100644 index 0000000000000000000000000000000000000000..8e427bd50eb238b2ed5960693c5b72aa30a306c6 --- /dev/null +++ b/src/parsers/end.c @@ -0,0 +1,21 @@ +#include "parser_internal.h" + +static HParseResult* parse_end(void *env, HParseState *state) { + if (state->input_stream.index == state->input_stream.length) { + HParseResult *ret = a_new(HParseResult, 1); + ret->ast = NULL; + return ret; + } else { + return NULL; + } +} + +static const HParserVtable end_vt = { + .parse = parse_end, +}; + +const HParser* h_end_p() { + HParser *ret = g_new(HParser, 1); + ret->vtable = &end_vt; ret->env = NULL; + return (const HParser*)ret; +} diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c new file mode 100644 index 0000000000000000000000000000000000000000..dc6d7a6db8b294dfab3b636ee72af5d7b4e7e646 --- /dev/null +++ b/src/parsers/epsilon.c @@ -0,0 +1,22 @@ +#include "parser_internal.h" + +static HParseResult* parse_epsilon(void* env, HParseState* state) { + (void)env; + HParseResult* res = a_new(HParseResult, 1); + res->ast = NULL; + res->arena = state->arena; + return res; +} + +static const HParserVtable epsilon_vt = { + .parse = parse_epsilon, +}; + +static const HParser epsilon_p = { + .vtable = &epsilon_vt, + .env = NULL +}; + +const HParser* h_epsilon_p() { + return &epsilon_p; +} diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c new file mode 100644 index 0000000000000000000000000000000000000000..5972548360cb3bbbc341484d5caa17dd6df89dfc --- /dev/null +++ b/src/parsers/ignore.c @@ -0,0 +1,22 @@ +#include "parser_internal.h" + +static HParseResult* parse_ignore(void* env, HParseState* state) { + HParseResult *res0 = h_do_parse((HParser*)env, state); + if (!res0) + return NULL; + HParseResult *res = a_new(HParseResult, 1); + res->ast = NULL; + res->arena = state->arena; + return res; +} + +static const HParserVtable ignore_vt = { + .parse = parse_ignore, +}; + +const HParser* h_ignore(const HParser* p) { + HParser* ret = g_new(HParser, 1); + ret->vtable = &ignore_vt; + ret->env = (void*)p; + return ret; +} diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c new file mode 100644 index 0000000000000000000000000000000000000000..43657c3bbdfe2c04dced5b3eae1bda2fef3ab26e --- /dev/null +++ b/src/parsers/indirect.c @@ -0,0 +1,20 @@ +#include "parser_internal.h" + +static HParseResult* parse_indirect(void* env, HParseState* state) { + return h_do_parse(env, state); +} +static const HParserVtable indirect_vt = { + .parse = parse_indirect, +}; + +void h_bind_indirect(HParser* indirect, HParser* inner) { + assert_message(indirect->vtable == &indirect_vt, "You can only bind an indirect parser"); + indirect->env = inner; +} + +HParser* h_indirect() { + HParser *res = g_new(HParser, 1); + res->vtable = &indirect_vt; + res->env = NULL; + return res; +} diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c new file mode 100644 index 0000000000000000000000000000000000000000..9fb1c7edbe5d69e23a5a07d8ba10942e89fc69da --- /dev/null +++ b/src/parsers/int_range.c @@ -0,0 +1,51 @@ +#include "parser_internal.h" + + +typedef struct { + const HParser *p; + int64_t lower; + int64_t upper; +} HRange; + +static HParseResult* parse_int_range(void *env, HParseState *state) { + HRange *r_env = (HRange*)env; + HParseResult *ret = h_do_parse(r_env->p, state); + if (!ret || !ret->ast) + return NULL; + switch(ret->ast->token_type) { + case TT_SINT: + if (r_env->lower <= ret->ast->sint && r_env->upper >= ret->ast->sint) + return ret; + else + return NULL; + case TT_UINT: + if ((uint64_t)r_env->lower <= ret->ast->uint && (uint64_t)r_env->upper >= ret->ast->uint) + return ret; + else + return NULL; + default: + return NULL; + } +} + +static const HParserVtable int_range_vt = { + .parse = parse_int_range, +}; + +const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { + // p must be an integer parser, which means it's using parse_bits + // TODO: re-add this check + //assert_message(p->vtable == &bits_vt, "int_range requires an integer parser"); + + // and regardless, the bounds need to fit in the parser in question + // TODO: check this as well. + + HRange *r_env = g_new(HRange, 1); + r_env->p = p; + r_env->lower = lower; + r_env->upper = upper; + HParser *ret = g_new(HParser, 1); + ret->vtable = &int_range_vt; + ret->env = (void*)r_env; + return ret; +} diff --git a/src/parsers/many.c b/src/parsers/many.c new file mode 100644 index 0000000000000000000000000000000000000000..f18be26a0f9c0a0b82fa0586940bcc62fa8531de --- /dev/null +++ b/src/parsers/many.c @@ -0,0 +1,145 @@ +#include "parser_internal.h" + +// TODO: split this up. +typedef struct { + const HParser *p, *sep; + size_t count; + bool min_p; +} HRepeat; + +static HParseResult *parse_many(void* env, HParseState *state) { + HRepeat *env_ = (HRepeat*) env; + HCountedArray *seq = h_carray_new_sized(state->arena, (env_->count > 0 ? env_->count : 4)); + size_t count = 0; + HInputStream bak; + while (env_->min_p || env_->count > count) { + bak = state->input_stream; + if (count > 0) { + HParseResult *sep = h_do_parse(env_->sep, state); + if (!sep) + goto err0; + } + HParseResult *elem = h_do_parse(env_->p, state); + if (!elem) + goto err0; + if (elem->ast) + h_carray_append(seq, (void*)elem->ast); + count++; + } + if (count < env_->count) + goto err; + succ: + ; // necessary for the label to be here... + HParsedToken *res = a_new(HParsedToken, 1); + res->token_type = TT_SEQUENCE; + res->seq = seq; + return make_result(state, res); + err0: + if (count >= env_->count) { + state->input_stream = bak; + goto succ; + } + err: + state->input_stream = bak; + return NULL; +} + +static const HParserVtable many_vt = { + .parse = parse_many, +}; + +const HParser* h_many(const HParser* p) { + HParser *res = g_new(HParser, 1); + HRepeat *env = g_new(HRepeat, 1); + env->p = p; + env->sep = h_epsilon_p(); + env->count = 0; + env->min_p = true; + res->vtable = &many_vt; + res->env = env; + return res; +} + +const HParser* h_many1(const HParser* p) { + HParser *res = g_new(HParser, 1); + HRepeat *env = g_new(HRepeat, 1); + env->p = p; + env->sep = h_epsilon_p(); + env->count = 1; + env->min_p = true; + res->vtable = &many_vt; + res->env = env; + return res; +} + +const HParser* h_repeat_n(const HParser* p, const size_t n) { + HParser *res = g_new(HParser, 1); + HRepeat *env = g_new(HRepeat, 1); + env->p = p; + env->sep = h_epsilon_p(); + env->count = n; + env->min_p = false; + res->vtable = &many_vt; + res->env = env; + return res; +} + +const HParser* h_sepBy(const HParser* p, const HParser* sep) { + HParser *res = g_new(HParser, 1); + HRepeat *env = g_new(HRepeat, 1); + env->p = p; + env->sep = sep; + env->count = 0; + env->min_p = true; + res->vtable = &many_vt; + res->env = env; + return res; +} + +const HParser* h_sepBy1(const HParser* p, const HParser* sep) { + HParser *res = g_new(HParser, 1); + HRepeat *env = g_new(HRepeat, 1); + env->p = p; + env->sep = sep; + env->count = 1; + env->min_p = true; + res->vtable = &many_vt; + res->env = env; + return res; +} + +typedef struct { + const HParser *length; + const HParser *value; +} HLenVal; + +static HParseResult* parse_length_value(void *env, HParseState *state) { + HLenVal *lv = (HLenVal*)env; + HParseResult *len = h_do_parse(lv->length, state); + if (!len) + return NULL; + if (len->ast->token_type != TT_UINT) + errx(1, "Length parser must return an unsigned integer"); + // TODO: allocate this using public functions + HRepeat repeat = { + .p = lv->value, + .sep = h_epsilon_p(), + .count = len->ast->uint, + .min_p = false + }; + return parse_many(&repeat, state); +} + +static const HParserVtable length_value_vt = { + .parse = parse_length_value, +}; + +const HParser* h_length_value(const HParser* length, const HParser* value) { + HParser *res = g_new(HParser, 1); + res->vtable = &length_value_vt; + HLenVal *env = g_new(HLenVal, 1); + env->length = length; + env->value = value; + res->env = (void*)env; + return res; +} diff --git a/src/parsers/not.c b/src/parsers/not.c new file mode 100644 index 0000000000000000000000000000000000000000..1c46b6dc394f186977d04eef0062b4fbaa5c1ce9 --- /dev/null +++ b/src/parsers/not.c @@ -0,0 +1,22 @@ +#include "parser_internal.h" + +static HParseResult* parse_not(void* env, HParseState* state) { + HInputStream bak = state->input_stream; + if (h_do_parse((HParser*)env, state)) + return NULL; + else { + state->input_stream = bak; + return make_result(state, NULL); + } +} + +static const HParserVtable not_vt = { + .parse = parse_not, +}; + +const HParser* h_not(const HParser* p) { + HParser *res = g_new(HParser, 1); + res->vtable = ¬_vt; + res->env = (void*)p; + return res; +} diff --git a/src/parsers/nothing.c b/src/parsers/nothing.c new file mode 100644 index 0000000000000000000000000000000000000000..9f81c02841ad298a6d714206961dfcc8edde0ab5 --- /dev/null +++ b/src/parsers/nothing.c @@ -0,0 +1,17 @@ +#include "parser_internal.h" + + +static HParseResult* parse_nothing() { + // not a mistake, this parser always fails + return NULL; +} + +static const HParserVtable nothing_vt = { + .parse = parse_nothing, +}; + +const HParser* h_nothing_p() { + HParser *ret = g_new(HParser, 1); + ret->vtable = ¬hing_vt; ret->env = NULL; + return (const HParser*)ret; +} diff --git a/src/parsers/optional.c b/src/parsers/optional.c new file mode 100644 index 0000000000000000000000000000000000000000..c0845765e5206283a2b465d0f668695a5acf9fae --- /dev/null +++ b/src/parsers/optional.c @@ -0,0 +1,26 @@ +#include "parser_internal.h" + +static HParseResult* parse_optional(void* env, HParseState* state) { + HInputStream bak = state->input_stream; + HParseResult *res0 = h_do_parse((HParser*)env, state); + if (res0) + return res0; + state->input_stream = bak; + HParsedToken *ast = a_new(HParsedToken, 1); + ast->token_type = TT_NONE; + return make_result(state, ast); +} + +static const HParserVtable optional_vt = { + .parse = parse_optional, +}; + +const HParser* h_optional(const HParser* p) { + // TODO: re-add this + //assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round."); + HParser *ret = g_new(HParser, 1); + ret->vtable = &optional_vt; + ret->env = (void*)p; + return ret; +} + diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..d8b36514ce76a0f3ce6f464c2bdfff5df8713cf4 --- /dev/null +++ b/src/parsers/parser_internal.h @@ -0,0 +1,27 @@ +#ifndef HAMMER_PARSE_INTERNAL__H +#define HAMMER_PARSE_INTERNAL__H +#include "../hammer.h" +#include "../internal.h" + +#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count))) +#define a_new(typ, count) a_new_(state->arena, typ, count) +// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out. + +static inline HParseResult* make_result(HParseState *state, HParsedToken *tok) { + HParseResult *ret = a_new(HParseResult, 1); + ret->ast = tok; + ret->arena = state->arena; + return ret; +} + +// return token size in bits... +static inline size_t token_length(HParseResult *pr) { + if (pr) { + return pr->bit_length; + } else { + return 0; + } +} + + +#endif // HAMMER_PARSE_INTERNAL__H diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c new file mode 100644 index 0000000000000000000000000000000000000000..54196da103aa9ac288c8aad70ebda8c590d1bb6f --- /dev/null +++ b/src/parsers/sequence.c @@ -0,0 +1,54 @@ +#include "parser_internal.h" + +typedef struct { + size_t len; + const HParser **p_array; +} HSequence; + +static HParseResult* parse_sequence(void *env, HParseState *state) { + HSequence *s = (HSequence*)env; + HCountedArray *seq = h_carray_new_sized(state->arena, (s->len > 0) ? s->len : 4); + for (size_t i=0; i<s->len; ++i) { + HParseResult *tmp = h_do_parse(s->p_array[i], state); + // if the interim parse fails, the whole thing fails + if (NULL == tmp) { + return NULL; + } else { + if (tmp->ast) + h_carray_append(seq, (void*)tmp->ast); + } + } + HParsedToken *tok = a_new(HParsedToken, 1); + tok->token_type = TT_SEQUENCE; tok->seq = seq; + return make_result(state, tok); +} + +static const HParserVtable sequence_vt = { + .parse = parse_sequence, +}; + +const HParser* h_sequence(const HParser *p, ...) { + va_list ap; + size_t len = 0; + const HParser *arg; + va_start(ap, p); + do { + len++; + arg = va_arg(ap, const HParser *); + } while (arg); + va_end(ap); + HSequence *s = g_new(HSequence, 1); + s->p_array = g_new(const HParser *, len); + + va_start(ap, p); + s->p_array[0] = p; + for (size_t i = 1; i < len; i++) { + s->p_array[i] = va_arg(ap, const HParser *); + } while (arg); + va_end(ap); + + s->len = len; + HParser *ret = g_new(HParser, 1); + ret->vtable = &sequence_vt; ret->env = (void*)s; + return ret; +} diff --git a/src/parsers/template.c b/src/parsers/template.c new file mode 100644 index 0000000000000000000000000000000000000000..bfe45bc4833a1e7a623d67333e5ca41296c8a431 --- /dev/null +++ b/src/parsers/template.c @@ -0,0 +1 @@ +#include "parser_internal.h" diff --git a/src/parsers/token.c b/src/parsers/token.c new file mode 100644 index 0000000000000000000000000000000000000000..b3be207c3a1bea8b81bba908b0fc31780ef5232b --- /dev/null +++ b/src/parsers/token.c @@ -0,0 +1,34 @@ +#include "parser_internal.h" + +typedef struct { + uint8_t *str; + uint8_t len; +} HToken; + + + +static HParseResult* parse_token(void *env, HParseState *state) { + HToken *t = (HToken*)env; + for (int i=0; i<t->len; ++i) { + uint8_t chr = (uint8_t)h_read_bits(&state->input_stream, 8, false); + if (t->str[i] != chr) { + return NULL; + } + } + HParsedToken *tok = a_new(HParsedToken, 1); + tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len; + return make_result(state, tok); +} + +const const HParserVtable token_vt = { + .parse = parse_token, +}; + +const HParser* h_token(const uint8_t *str, const size_t len) { + HToken *t = g_new(HToken, 1); + t->str = (uint8_t*)str, t->len = len; + HParser *ret = g_new(HParser, 1); + ret->vtable = &token_vt; + ret->env = t; + return (const HParser*)ret; +} diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c new file mode 100644 index 0000000000000000000000000000000000000000..99d153b715b92ea4664e8475bcc4cc95cbc044b0 --- /dev/null +++ b/src/parsers/unimplemented.c @@ -0,0 +1,26 @@ +#include "parser_internal.h" + +static HParseResult* parse_unimplemented(void* env, HParseState *state) { + (void) env; + (void) state; + static HParsedToken token = { + .token_type = TT_ERR + }; + static HParseResult result = { + .ast = &token + }; + return &result; +} + +static const HParserVtable unimplemented_vt = { + .parse = parse_unimplemented, +}; + +static HParser unimplemented = { + .vtable = &unimplemented_vt, + .env = NULL +}; + +const HParser* h_unimplemented() { + return &unimplemented; +} diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c new file mode 100644 index 0000000000000000000000000000000000000000..4d2ec179f15bc368a4d02c79b5c21c695434ffe6 --- /dev/null +++ b/src/parsers/whitespace.c @@ -0,0 +1,26 @@ +#include <ctype.h> +#include "parser_internal.h" + +static HParseResult* parse_whitespace(void* env, HParseState *state) { + char c; + HInputStream bak; + do { + bak = state->input_stream; + c = h_read_bits(&state->input_stream, 8, false); + if (state->input_stream.overrun) + return NULL; + } while (isspace(c)); + state->input_stream = bak; + return h_do_parse((HParser*)env, state); +} + +static const HParserVtable whitespace_vt = { + .parse = parse_whitespace, +}; + +const HParser* h_whitespace(const HParser* p) { + HParser *ret = g_new(HParser, 1); + ret->vtable = &whitespace_vt; + ret->env = (void*)p; + return ret; +} diff --git a/src/parsers/xor.c b/src/parsers/xor.c new file mode 100644 index 0000000000000000000000000000000000000000..9ffd51ef12fe081bbde8d5d0b4f5d9f68e2ca7a7 --- /dev/null +++ b/src/parsers/xor.c @@ -0,0 +1,44 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p1; + const HParser *p2; +} HTwoParsers; + + +static HParseResult* parse_xor(void *env, HParseState *state) { + HTwoParsers *parsers = (HTwoParsers*)env; + // cache the initial state of the input stream + HInputStream start_state = state->input_stream; + HParseResult *r1 = h_do_parse(parsers->p1, state); + HInputStream after_p1_state = state->input_stream; + // reset input stream, parse again + state->input_stream = start_state; + HParseResult *r2 = h_do_parse(parsers->p2, state); + if (NULL == r1) { + if (NULL != r2) { + return r2; + } else { + return NULL; + } + } else { + if (NULL == r2) { + state->input_stream = after_p1_state; + return r1; + } else { + return NULL; + } + } +} + +static const HParserVtable xor_vt = { + .parse = parse_xor, +}; + +const HParser* h_xor(const HParser* p1, const HParser* p2) { + HTwoParsers *env = g_new(HTwoParsers, 1); + env->p1 = p1; env->p2 = p2; + HParser *ret = g_new(HParser, 1); + ret->vtable = &xor_vt; ret->env = (void*)env; + return ret; +}