From f7e7f92408f9b43eb1088f3057ed8f0cfee11125 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" <clonearmy@gmail.com> Date: Tue, 1 May 2012 03:21:14 +0100 Subject: [PATCH] Parsers for sequence and choice are done. Got rid of join_action, we didn't need it. parse_result_t now contains a single parsed_token_t (and whatever we decide to jam into it later), so that sequence makes sense. --- src/hammer.c | 141 +++++++++++++++++++++------------------------------ src/hammer.h | 28 +++++++--- 2 files changed, 80 insertions(+), 89 deletions(-) diff --git a/src/hammer.c b/src/hammer.c index b0671867..e5d9bc32 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -75,7 +75,7 @@ parse_result_t* get_cached(parse_state_t *ps, const parser_t *p) { } } -int put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) { +void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) { gpointer t = g_hash_table_lookup(ps->cache, p); if (NULL != t) { g_hash_table_insert(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index, ps->input_stream.length)), (gpointer)cached); @@ -89,9 +89,9 @@ int put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) { parse_result_t* do_parse(const parser_t* parser, parse_state_t *state); /* Helper function, since these lines appear in every parser */ -inline parse_result_t* make_result(GSequence *ast) { +inline parse_result_t* make_result(parsed_token_t *tok) { parse_result_t *ret = g_new(parse_result_t, 1); - ret->ast = ast; + ret->ast = tok; return ret; } @@ -109,10 +109,8 @@ static parse_result_t* parse_token(void *env, parse_state_t *state) { } } parsed_token_t *tok = g_new(parsed_token_t, 1); - tok->token = t->str; tok->len = t->len; - GSequence *ast = g_sequence_new(NULL); - g_sequence_append(ast, tok); - return make_result(ast); + tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len; + return make_result(tok); } const parser_t* token(const uint8_t *str, const size_t len) { @@ -128,10 +126,8 @@ static parse_result_t* parse_ch(void* env, parse_state_t *state) { uint8_t r = (uint8_t)read_bits(&state->input_stream, 8, false); if (c == r) { parsed_token_t *tok = g_new(parsed_token_t, 1); - tok->token = GUINT_TO_POINTER(c); tok->len = 1; - GSequence *ast = g_sequence_new(NULL); - g_sequence_append(ast, tok); - return make_result(ast); + tok->token_type = TT_UINT; tok->uint = r; + return make_result(tok); } else { return NULL; } @@ -153,10 +149,8 @@ static parse_result_t* parse_range(void* env, parse_state_t *state) { uint8_t r = (uint8_t)read_bits(&state->input_stream, 8, false); if (range->lower <= r && range->upper >= r) { parsed_token_t *tok = g_new(parsed_token_t, 1); - tok->token = GUINT_TO_POINTER(r); tok->len = 1; - GSequence *ast = g_sequence_new(NULL); - g_sequence_append(ast, tok); - return make_result(ast); + tok->token_type = TT_UINT; tok->uint = r; + return make_result(tok); } else { return NULL; } @@ -172,67 +166,6 @@ const parser_t* range(const uint8_t lower, const uint8_t upper) { const parser_t* whitespace(const parser_t* p) { return NULL; } //const parser_t* action(const parser_t* p, /* fptr to action on AST */) { return NULL; } -typedef struct { - parser_t *parser; - uint8_t *sep; - size_t len; -} join_t; - -void join_collect(gpointer tok, gpointer ret) { - size_t sz = GPOINTER_TO_SIZE(ret); - sz += ((parsed_token_t*)tok)->len; - ret = GSIZE_TO_POINTER(sz); -} - -static parse_result_t* parse_join(void *env, parse_state_t *state) { - join_t *j = (join_t*)env; - parse_result_t *result = do_parse(j->parser, state); - size_t num_tokens = g_sequence_get_length((GSequence*)result->ast); - if (0 < num_tokens) { - gpointer sz = GSIZE_TO_POINTER(0); - // aggregate length of tokens in AST - g_sequence_foreach((GSequence*)result->ast, join_collect, sz); - // plus aggregate length of all separators - size_t ret_len = GPOINTER_TO_SIZE(sz) + (num_tokens - 1) * j->len; - gpointer ret_str = g_malloc(ret_len); - // first the first token ... - GSequenceIter *it = g_sequence_get_begin_iter((GSequence*)result->ast); - parsed_token_t *tok = g_sequence_get(it); - memcpy(ret_str, tok->token, tok->len); - ret_str += tok->len; - // if there was only one token, don't enter the while loop - it = g_sequence_iter_next(it); - while (!g_sequence_iter_is_end(it)) { - // add a separator - memcpy(ret_str, j->sep, j->len); - ret_str += j->len; - // then the next token - tok = g_sequence_get(it); - memcpy(ret_str, tok->token, tok->len); - // finally, advance the pointer and the iterator - ret_str += tok->len; - it = g_sequence_iter_next(it); - } - // reset the return pointer and construct the return parse_result_t - ret_str -= ret_len; - parsed_token_t *ret_tok = g_new(parsed_token_t, 1); - ret_tok->token = ret_str; ret_tok->len = ret_len; - GSequence *ast = g_sequence_new(NULL); - g_sequence_append(ast, tok); - return make_result(ast); - } else { - return NULL; - } -} - -const parser_t* join_action(const parser_t* p, const uint8_t *sep, const size_t len) { - join_t *j = g_new(join_t, 1); - j->parser = (parser_t*)p; j->sep = (uint8_t*)sep; j->len = len; - parser_t *ret = g_new(parser_t, 1); - ret->fn = parse_join; ret->env = (void*)j; - return (const parser_t*)ret; -} - const parser_t* left_factor_action(const parser_t* p) { return NULL; } static parse_result_t* parse_negate(void *env, parse_state_t *state) { @@ -241,10 +174,8 @@ static parse_result_t* parse_negate(void *env, parse_state_t *state) { if (NULL == result) { uint8_t r = (uint8_t)read_bits(&state->input_stream, 8, false); parsed_token_t *tok = g_new(parsed_token_t, 1); - tok->token = GUINT_TO_POINTER(r); tok->len = 1; - GSequence *ast = g_sequence_new(NULL); - g_sequence_append(ast, tok); - return make_result(ast); + tok->token_type = TT_UINT; tok->uint = r; + return make_result(tok); } else { return NULL; } @@ -276,8 +207,54 @@ const parser_t* nothing_p() { // not a mistake, this parser always fails return NULL; } -const parser_t* sequence(const parser_t* p_array[]) { return NULL; } -const parser_t* choice(const parser_t* p_array[]) { return NULL; } + +typedef struct { + size_t len; + const parser_t **p_array; +} sequence_t; + +static parse_result_t* parse_sequence(void *env, parse_state_t *state) { + sequence_t *s = (sequence_t*)env; + GSequence *seq = g_sequence_new(NULL); + for (int i=0; i<s->len; ++i) { + parse_result_t *tmp = do_parse(s->p_array[i], state); + g_sequence_append(seq, tmp); + } + parsed_token_t *tok = g_new(parsed_token_t, 1); + tok->token_type = TT_SEQUENCE; tok->seq = seq; + return make_result(tok); +} + +const parser_t* sequence(const parser_t* p_array[]) { + size_t len = sizeof(p_array) / sizeof(parser_t*); + sequence_t *s = g_new(sequence_t, 1); + s->p_array = (const parser_t**)p_array; s->len = len; + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_sequence; ret->env = (void*)s; + return ret; +} + +static parse_result_t* parse_choice(void *env, parse_state_t *state) { + sequence_t *s = (sequence_t*)env; + GSequence *seq = g_sequence_new(NULL); + for (int i=0; i<s->len; ++i) { + parse_result_t *tmp = do_parse(s->p_array[i], state); + if (NULL != tmp) + return tmp; + } + // nothing succeeded, so fail + return NULL; +} + +const parser_t* choice(const parser_t* p_array[]) { + size_t len = sizeof(p_array) / sizeof(parser_t*); + sequence_t *s = g_new(sequence_t, 1); + s->p_array = (const parser_t**)p_array; s->len = len; + parser_t *ret = g_new(parser_t, 1); + ret->fn = parse_choice; ret->env = (void*)s; + return ret; +} + const parser_t* butnot(const parser_t* p1, const parser_t* p2) { return NULL; } const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; } const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; } diff --git a/src/hammer.h b/src/hammer.h index add5c743..f2e46add 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -54,13 +54,30 @@ typedef struct parse_state { input_stream_t input_stream; } parse_state_t; +typedef enum token_type { + TT_NONE, + TT_BYTES, + TT_SINT, + TT_UINT, + TT_SEQUENCE, + TT_MAX +} token_type_t; + typedef struct parsed_token { - const uint8_t *token; - size_t len; + token_type_t token_type; + union { + struct { + const uint8_t *token; + size_t len; + } bytes; + int64_t sint; + uint64_t uint; + GSequence *seq; + }; } parsed_token_t; typedef struct parse_result { - const GSequence *ast; + const parsed_token_t *ast; } parse_result_t; typedef struct parser { @@ -85,9 +102,6 @@ const parser_t* whitespace(const parser_t* p); /* Given another parser, p, and a function f, returns a parser that applies p, then applies f to everything in the AST of p's result. */ //const parser_t* action(const parser_t* p, /* fptr to action on AST */); -/* Given another parser, p, and a separator, sep, returns a parser that applies p, then joins everything in the AST of p's result with sep. For example, if the AST of p's result is {"dog", "cat", "hedgehog"} and sep is "|", the AST of this parser's result will be {"dog|cat|hedgehog"}. */ -const parser_t* join_action(const parser_t* p, const uint8_t *sep, const size_t len); - const parser_t* left_factor_action(const parser_t* p); /* Given a single-character parser, p, returns a single-character parser that will parse any character *other* than the character p would parse. */ @@ -99,7 +113,7 @@ const parser_t* end_p(); /* This parser always fails. */ const parser_t* nothing_p(); - +/* Given an array of parsers, p_array, apply each parser in order. The parse succeeds only if all parsers succeed. */ const parser_t* sequence(const parser_t* p_array[]); const parser_t* choice(const parser_t* p_array[]); const parser_t* butnot(const parser_t* p1, const parser_t* p2); -- GitLab