diff --git a/src/hammer.h b/src/hammer.h index ecb606545e56dd2e3bf8e17041d20bba1f4df02a..e337b49374f07db7745a28aa218149f42236a074 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -110,15 +110,12 @@ typedef const HParsedToken* (*HAction)(const HParseResult *p); */ typedef bool (*HPredicate)(HParseResult *p); -typedef struct HParserVtable_ { - HParseResult* (*parse)(void *env, HParseState *state); - bool (*isValidRegular)(void *env); - bool (*isValidCF)(void *env); -} HParserVtable; +typedef struct HParserVtable_ HParserVtable; typedef struct HParser_ { const HParserVtable *vtable; void *env; + void *data; /* e.g., parse tables */ } HParser; // {{{ Stuff for benchmarking diff --git a/src/internal.h b/src/internal.h index 67ecb22e4cbf227a8334479ec898cbab83e6d778..7e03657374f86116543e1b4d807324dad69a30b5 100644 --- a/src/internal.h +++ b/src/internal.h @@ -18,6 +18,7 @@ #ifndef HAMMER_INTERNAL__H #define HAMMER_INTERNAL__H #include <err.h> +#include <string.h> #include "hammer.h" #ifdef NDEBUG @@ -70,6 +71,25 @@ typedef struct HSlist_ { struct HArena_ *arena; } HSlist; +typedef unsigned int *HCharset; + +static inline HCharset new_charset(HAllocator* mm__) { + HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int)); + memset(cs, 0, 256); + return cs; +} + +static inline int charset_isset(HCharset cs, uint8_t pos) { + return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs)))); +} + +static inline void charset_set(HCharset cs, uint8_t pos, int val) { + cs[pos / sizeof(*cs)] = + val + ? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs))) + : cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs))); +} + typedef unsigned int HHashValue; typedef HHashValue (*HHashFunc)(const void* key); typedef bool (*HEqualFunc)(const void* key1, const void* key2); @@ -222,6 +242,34 @@ int h_hashtable_present(HHashTable* ht, void* key); void h_hashtable_del(HHashTable* ht, void* key); void h_hashtable_free(HHashTable* ht); +typedef struct HCFSequence_ HCFSequence; + +typedef struct HCFChoice_ { + enum { + HCF_END, + HCF_CHOICE, + HCF_CHARSET, + HCF_CHAR + } type; + union { + HCharset charset; + HCFSequence** seq; + uint8_t chr; + }; + HAction action; +} HCFChoice; + +struct HCFSequence_ { + HCFChoice **items; // last one is NULL +}; + +struct HParserVtable_ { + HParseResult* (*parse)(void *env, HParseState *state); + bool (*isValidRegular)(void *env); + bool (*isValidCF)(void *env); + HCFChoice* (*desugar)(HAllocator *mm__, void *env); +}; + #if 0 #include <stdlib.h> #define h_arena_malloc(a, s) malloc(s) diff --git a/src/parsers/action.c b/src/parsers/action.c index 33e33bc38844fbdb114605cae12368fa3cc96190..aec422180d5525ce7bb09ed81eef887a98c79735 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -19,6 +19,21 @@ static HParseResult* parse_action(void *env, HParseState *state) { return NULL; } +static HCFChoice* desugar_action(HAllocator *mm__, void *env) { + HParseAction *a = (HParseAction*)env; + HCFSequence *seq = h_new(HCFSequence, 1); + seq->items = h_new(HCFChoice*, 2); + seq->items[0] = a->p->vtable->desugar(mm__, a->p->env); + seq->items[1] = NULL; + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHOICE; + ret->seq = h_new(HCFSequence*, 2); + ret->seq[0] = seq; + ret->seq[1] = NULL; + ret->action = a->action; + return ret; +} + static bool action_isValidRegular(void *env) { HParseAction *a = (HParseAction*)env; return a->p->vtable->isValidRegular(a->p->env); @@ -33,6 +48,7 @@ static const HParserVtable action_vt = { .parse = parse_action, .isValidRegular = action_isValidRegular, .isValidCF = action_isValidCF, + .desugar = desugar_action, }; const HParser* h_action(const HParser* p, const HAction a) { diff --git a/src/parsers/and.c b/src/parsers/and.c index ac51be2902a12c8d1a327c7868dccae2beaf6ba9..7f870ba49cd5dc9c7c7db6d0d1ad2bafcd495bcc 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -9,6 +9,11 @@ static HParseResult *parse_and(void* env, HParseState* state) { return NULL; } +static const HCFChoice* desugar_and(HAllocator *mm__, void *env) { + assert_message(0, "Not context-free, can't be desugared"); + return NULL; +} + static const HParserVtable and_vt = { .parse = parse_and, .isValidRegular = h_false, /* TODO: strictly speaking this should be regular, @@ -16,6 +21,7 @@ static const HParserVtable and_vt = { to get right, so we're leaving it for a future revision. --mlp, 18/12/12 */ .isValidCF = h_false, /* despite TODO above, this remains false. */ + .desugar = desugar_and, }; diff --git a/src/parsers/bits.c b/src/parsers/bits.c index c3a40da656cce7e9755601fbda5e5975fd9bed4c..ee3dde9a6d0835980d546e3859adfef2f7800cc5 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -16,10 +16,36 @@ static HParseResult* parse_bits(void* env, HParseState *state) { return make_result(state, result); } +static HCFChoice* desugar_bits(HAllocator *mm__, void *env) { + struct bits_env *bits = (struct bits_env*)env; + if (0 != bits->length % 8) + return NULL; // can't handle non-byte-aligned for now + HCFSequence *seq = h_new(HCFSequence, 1); + seq->items = h_new(HCFChoice*, bits->length/8); + HCharset match_all = new_charset(mm__); + HCFChoice *match_all_choice = h_new(HCFChoice, 1); + match_all_choice->type = HCF_CHARSET; + match_all_choice->charset = match_all; + match_all_choice->action = NULL; + for (int i = 0; i < 256; i++) + charset_set(match_all, i, 1); + for (size_t i=0; i<bits->length/8; ++i) { + seq->items[i] = match_all_choice; + } + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHOICE; + ret->seq = h_new(HCFSequence*, 2); + ret->seq[0] = seq; + ret->seq[1] = NULL; + ret->action = NULL; + return ret; +} + static const HParserVtable bits_vt = { .parse = parse_bits, .isValidRegular = h_true, .isValidCF = h_true, + .desugar = desugar_bits, }; const HParser* h_bits(size_t len, bool sign) { return h_bits__m(&system_allocator, len, sign); diff --git a/src/parsers/ch.c b/src/parsers/ch.c index 8b67744e16aebe1325faaea4b2d130f819026277..f0e91c199f8175be232c75768f56c93ea1dc790b 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -12,10 +12,19 @@ static HParseResult* parse_ch(void* env, HParseState *state) { } } +static HCFChoice* desugar_ch(HAllocator *mm__, void *env) { + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHAR; + ret->chr = (uint8_t)(unsigned long)(env); + ret->action = NULL; + return ret; +} + static const HParserVtable ch_vt = { .parse = parse_ch, .isValidRegular = h_true, .isValidCF = h_true, + .desugar = desugar_ch, }; const HParser* h_ch(const uint8_t c) { diff --git a/src/parsers/charset.c b/src/parsers/charset.c index 7341e0074372bab16bd219aacf5c46a3c4346959..5a76239fe77ef9b6141ce21bf366f43139b588b2 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -1,25 +1,7 @@ #include <string.h> +#include "../internal.h" #include "parser_internal.h" -typedef unsigned int *HCharset; - -static inline HCharset new_charset(HAllocator* mm__) { - HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int)); - memset(cs, 0, 256); - return cs; -} - -static inline int charset_isset(HCharset cs, uint8_t pos) { - return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs)))); -} - -static inline void charset_set(HCharset cs, uint8_t pos, int val) { - cs[pos / sizeof(*cs)] = - val - ? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs))) - : cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs))); -} - static HParseResult* parse_charset(void *env, HParseState *state) { uint8_t in = h_read_bits(&state->input_stream, 8, false); HCharset cs = (HCharset)env; @@ -32,10 +14,19 @@ static HParseResult* parse_charset(void *env, HParseState *state) { return NULL; } +static HCFChoice* desugar_charset(HAllocator *mm__, void *env) { + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHARSET; + ret->charset = (HCharset)env; + ret->action = NULL; + return ret; +} + static const HParserVtable charset_vt = { .parse = parse_charset, .isValidRegular = h_true, .isValidCF = h_true, + .desugar = desugar_charset, }; const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { diff --git a/src/parsers/choice.c b/src/parsers/choice.c index d48ed229db32b6d9b542f3054298514a8f717542..c4a931abdc1a76b31748ac48d4be776f114a776b 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -39,10 +39,26 @@ static bool choice_isValidCF(void *env) { return true; } +static HCFChoice* desugar_choice(HAllocator *mm__, void *env) { + HSequence *s = (HSequence*)env; + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHOICE; + ret->seq = h_new(HCFSequence*, 1+s->len); + for (size_t i=0; i<s->len; ++i) { + ret->seq[i] = h_new(HCFSequence, 1); + ret->seq[i]->items = h_new(HCFChoice*, 2); + ret->seq[i]->items[0] = s->p_array[i]->vtable->desugar(mm__, s->p_array[i]->env); + ret->seq[i]->items[1] = NULL; + } + ret->seq[s->len] = NULL; + return ret; +} + static const HParserVtable choice_vt = { .parse = parse_choice, .isValidRegular = choice_isValidRegular, .isValidCF = choice_isValidCF, + .desugar = desugar_choice, }; const HParser* h_choice(const HParser* p, ...) { diff --git a/src/parsers/end.c b/src/parsers/end.c index 45ba37d99ae0df6a282df0540513064b3ecf030a..26bd0e93ccd5d48fb3c26afd4ac8dc16130c5481 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -10,10 +10,18 @@ static HParseResult* parse_end(void *env, HParseState *state) { } } +static const HCFChoice* desugar_end(HAllocator *mm__, void *env) { + static HCFChoice ret = { + .type = HCF_END + }; + return &ret; +} + static const HParserVtable end_vt = { .parse = parse_end, .isValidRegular = h_true, .isValidCF = h_true, + .desugar = desugar_end, }; const HParser* h_end_p() { diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index 5ae91e360fcc3b273e1345ff057d764edabd0312..8b73b96e851daffeb57ac1c61465e882ceece11b 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -8,10 +8,21 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) { return res; } +static HCFChoice* desugar_epsilon(HAllocator *mm__, void *env) { + static HCFSequence res_seq = {NULL}; + static HCFChoice res_ch = { + .type = HCF_CHOICE, + .seq = &res_seq + }; + + return &res_ch; +} + static const HParserVtable epsilon_vt = { .parse = parse_epsilon, .isValidRegular = h_true, .isValidCF = h_true, + .desugar = desugar_epsilon, }; static const HParser epsilon_p = { diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c index 319f1ebe3eb3058a78e03bec6b39112b1a19f6d0..e248dc4aaf3c39ca0caa28b57da5c30e2abee148 100644 --- a/src/parsers/ignoreseq.c +++ b/src/parsers/ignoreseq.c @@ -26,6 +26,23 @@ static HParseResult* parse_ignoreseq(void* env, HParseState *state) { return res; } +static HCFChoice* desugar_ignoreseq(HAllocator *mm__, void *env) { + HIgnoreSeq *seq = (HIgnoreSeq*)env; + HCFSequence *hseq = h_new(HCFSequence, 1); + hseq->items = h_new(HCFChoice*, 1+seq->len); + for (size_t i=0; i<seq->len; ++i) { + hseq->items[i] = seq->parsers[i]->vtable->desugar(mm__, seq->parsers[i]->env); + } + hseq->items[seq->len] = NULL; + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHOICE; + ret->seq = h_new(HCFSequence*, 2); + ret->seq[0] = hseq; + ret->seq[1] = NULL; + ret->action = NULL; + return ret; +} + static bool is_isValidRegular(void *env) { HIgnoreSeq *seq = (HIgnoreSeq*)env; for (size_t i=0; i<seq->len; ++i) { @@ -48,6 +65,7 @@ static const HParserVtable ignoreseq_vt = { .parse = parse_ignoreseq, .isValidRegular = is_isValidRegular, .isValidCF = is_isValidCF, + .desugar = desugar_ignoreseq, }; diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c index d1920589a90bbda21ad1ebf8af48135e69a45774..bcd896ad409d3cac118bc578cf4f43ee46f1a94c 100644 --- a/src/parsers/int_range.c +++ b/src/parsers/int_range.c @@ -28,10 +28,57 @@ static HParseResult* parse_int_range(void *env, HParseState *state) { } } +HCFChoice* gen_int_range(HAllocator *mm__, uint64_t low, uint64_t high, uint8_t bytes) { + if (1 == bytes) { + HCFChoice *cs = h_new(HCFChoice, 1); + cs->type = HCF_CHARSET; + cs->charset = new_charset(mm__); + for (uint64_t i=low; i<=high; ++i) { + charset_set(cs->charset, i, 1); + } + cs->action = NULL; + return cs; + } + else if (1 < bytes) { + HCFChoice *root = h_new(HCFChoice, 1); + root->type = HCF_CHOICE; + root->seq = h_new(HCFSequence*, 4); + root->seq[0] = h_new(HCFSequence, 1); + root->seq[0]->items = h_new(HCFChoice*, 2); + root->seq[0]->items[0] = gen_int_range(mm__, low, high, FIXME); + root->seq[0]->items[1] = NULL; + root->seq[1] = h_new(HCFSequence, 1); + root->seq[1]->items = h_new(HCFChoice*, 2); + root->seq[1]->items[0] = h_new(HCFChoice, 1); + /* do something with root->seq[1]->items[0] */ + root->seq[1]->items[1] = NULL; + root->seq[2] = h_new(HCFSequence, 1); + root->seq[2]->items = h_new(HCFChoice*, 2); + root->seq[2]->items[0] = gen_int_range(mm__, low, high, FIXME); + root->seq[2]->items[1] = NULL; + root->seq[3] = NULL; + root->action = NULL; + return root; + } + else { // idk why this would ever be <1, but whatever + return NULL; + } +} + +static HCFChoice* desugar_int_range(HAllocator *mm__, void *env) { + HRange *r = (HRange*)env; + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHOICE; + uint8_t bytes = r->p->env->length / 8; + HCFSequence *seq = h_new(HCFSequence, 1); + +} + static const HParserVtable int_range_vt = { .parse = parse_int_range, .isValidRegular = h_true, .isValidCF = h_true, + .desugar = desugar_int_range, }; const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index d8b36514ce76a0f3ce6f464c2bdfff5df8713cf4..d966eccf6af3baf5d051ca2395a581595dbaf664 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -23,5 +23,7 @@ static inline size_t token_length(HParseResult *pr) { } } +static inline bool h_true(void *env) { return true; } +static inline bool h_false(void *env) { return false; } #endif // HAMMER_PARSE_INTERNAL__H diff --git a/src/parsers/token.c b/src/parsers/token.c index f5df9d4c3c3a84829a026b66fcb02c849db1d530..40779b998fc55d3e4ebc41a1d8cd83fb5067a2f3 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -5,8 +5,6 @@ typedef struct { uint8_t len; } HToken; - - static HParseResult* parse_token(void *env, HParseState *state) { HToken *t = (HToken*)env; for (int i=0; i<t->len; ++i) { @@ -20,10 +18,30 @@ static HParseResult* parse_token(void *env, HParseState *state) { return make_result(state, tok); } +static HCFChoice* desugar_token(HAllocator *mm__, void *env) { + HToken *tok = (HToken*)env; + HCFSequence *seq = h_new(HCFSequence, 1); + seq->items = h_new(HCFChoice*, 1+tok->len); + for (size_t i=0; i<tok->len; ++i) { + seq->items[i] = h_new(HCFChoice, 1); + seq->items[i]->type = HCF_CHAR; + seq->items[i]->chr = tok->str[i]; + } + seq->items[tok->len] = NULL; + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHOICE; + ret->seq = h_new(HCFSequence*, 2); + ret->seq[0] = seq; + ret->seq[1] = NULL; + ret->action = NULL; + return ret; +} + const HParserVtable token_vt = { .parse = parse_token, .isValidRegular = h_true, .isValidCF = h_true, + .desugar = desugar_token, }; const HParser* h_token(const uint8_t *str, const size_t len) { diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index 32838ce5ce499569491c4fd6599d97a4947a54b5..1bd288b533567381da69774e5c579e1889dd8b96 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -16,6 +16,7 @@ static const HParserVtable unimplemented_vt = { .parse = parse_unimplemented, .isValidRegular = h_false, .isValidCF = h_false, + .desugar = NULL, }; static HParser unimplemented = { diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 7b2477bef0bfd67a41adcce99c0e5b0662b87c08..92e5bd0d55e851645b48dfd97b278b4d1aab8213 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -14,6 +14,31 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { return h_do_parse((HParser*)env, state); } +static HCFChoice* desugar_whitespace(HAllocator *mm__, void *env) { + HCFChoice *ret = h_new(HCFChoice, 1); + ret->type = HCF_CHOICE; + ret->seq = h_new(HCFSequence*, 3); + HCFSequence *nonempty = h_new(HCFSequence, 1); + nonempty->items = h_new(HCFChoice*, 3); + nonempty->items[0] = h_new(HCFChoice, 1); + nonempty->items[0]->type = HCF_CHARSET; + nonempty->items[0]->charset = new_charset(mm__); + charset_set(nonempty->items[0]->charset, '\t', 1); + charset_set(nonempty->items[0]->charset, ' ', 1); + charset_set(nonempty->items[0]->charset, '\n', 1); + charset_set(nonempty->items[0]->charset, '\r', 1); + nonempty->items[1] = ret; // yay circular pointer! + nonempty->items[2] = NULL; + ret->seq[0] = nonempty; + HCFSequence *empty = h_new(HCFSequence, 1); + empty->items = h_new(HCFChoice*, 1); + empty->items[0] = NULL; + ret->seq[1] = empty; + ret->seq[2] = NULL; + ret->action = NULL; + return ret; +} + static bool ws_isValidRegular(void *env) { HParser *p = (HParser*)env; return p->vtable->isValidRegular(p->env); @@ -28,6 +53,7 @@ static const HParserVtable whitespace_vt = { .parse = parse_whitespace, .isValidRegular = ws_isValidRegular, .isValidCF = ws_isValidCF, + .desugar = desugar_whitespace, }; const HParser* h_whitespace(const HParser* p) {