From ec404ca8fea61b9547553a5f11991c5862348922 Mon Sep 17 00:00:00 2001 From: Dan Hirsch <thequux@upstandinghackers.com> Date: Sat, 25 May 2013 03:35:42 +0200 Subject: [PATCH] Refactored all of the desugaring code to no longer depend on memory being initialized to 0. Everything is about 12% faster now. --- HACKING | 6 +- Makefile | 10 ++- src/backends/contextfree.h | 147 ++++++++++++++++++++++++++++++++++ src/backends/llk.c | 2 +- src/backends/regex.c | 3 + src/cfgrammar.c | 4 +- src/desugar.c | 14 +++- src/glue.c | 2 +- src/internal.h | 12 +-- src/parsers/action.c | 22 +++-- src/parsers/and.c | 6 -- src/parsers/attr_bool.c | 22 +++-- src/parsers/bits.c | 42 +++------- src/parsers/butnot.c | 6 -- src/parsers/ch.c | 8 +- src/parsers/charset.c | 8 +- src/parsers/choice.c | 22 +++-- src/parsers/difference.c | 6 -- src/parsers/end.c | 7 +- src/parsers/epsilon.c | 3 + src/parsers/ignore.c | 23 ++---- src/parsers/ignoreseq.c | 41 ++++------ src/parsers/indirect.c | 5 +- src/parsers/int_range.c | 105 +++++++++--------------- src/parsers/many.c | 94 +++++++++------------- src/parsers/not.c | 8 +- src/parsers/nothing.c | 10 +-- src/parsers/optional.c | 41 ++++------ src/parsers/parser_internal.h | 18 ++--- src/parsers/sequence.c | 23 ++---- src/parsers/token.c | 26 ++---- src/parsers/unimplemented.c | 2 +- src/parsers/whitespace.c | 49 +++++------- src/parsers/xor.c | 6 -- src/system_allocator.c | 13 ++- src/test_suite.h | 16 ++-- 36 files changed, 413 insertions(+), 419 deletions(-) create mode 100644 src/backends/contextfree.h diff --git a/HACKING b/HACKING index 7bffb4c9..d923217e 100644 --- a/HACKING +++ b/HACKING @@ -2,14 +2,16 @@ Privileged arguments ==================== As a matter of convenience, there are several identifiers that -internal macros use. Chances are that if you use these names for other -things, you're gonna have a bad time. +internal anaphoric macros use. Chances are that if you use these names +for other things, you're gonna have a bad time. In particular, these names, and the macros that use them, are: - state: Used by a_new and company. Should be an HParseState* - mm__: Used by h_new and h_free. Should be an HAllocator* +- stk__: + Used in desugaring. Should be an HCFStack* Function suffixes ================= diff --git a/Makefile b/Makefile index dfca1177..6c8f3863 100644 --- a/Makefile +++ b/Makefile @@ -24,17 +24,19 @@ examples/all: src/all examples/compile: src/compile define SUBDIR_TEMPLATE -$(1)/%: - $$(MAKE) -C $(1) $$* +$(1)/%: force + $(MAKE) -C $(1) $$* endef +force: + $(foreach dir,$(SUBDIRS),$(eval $(call SUBDIR_TEMPLATE,$(dir)))) #.DEFAULT: # $(if $(findstring ./,$(dir $@)),$(error No rule to make target `$@'),$(MAKE) -C $(dir $@) $(notdir $@)) -TAGS: $(shell find * -name "*.c") - etags $^ +TAGS: force + etags $(shell find * -name "*.c" -o -name "*.h") config: @printf "%30s %s\n" $(foreach var,$(CONFIG_VARS),$(var) $($(var)) ) diff --git a/src/backends/contextfree.h b/src/backends/contextfree.h new file mode 100644 index 00000000..9c2ec459 --- /dev/null +++ b/src/backends/contextfree.h @@ -0,0 +1,147 @@ +// This is an internal header; it provides macros to make desugaring cleaner. +#include <assert.h> +#include "../internal.h" +#ifndef HAMMER_CONTEXTFREE__H +#define HAMMER_CONTEXTFREE__H + + +// HCFStack +struct HCFStack_ { + HCFChoice **stack; + int count; + int cap; + HCFChoice *last_completed; // Last completed choice. +}; + +#ifndef UNUSED +#define UNUSED __attribute__((unused)) +#endif + +static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) UNUSED; +static inline void h_cfstack_begin_choice(HAllocator *mm__, HCFStack *stk__) UNUSED; +static HCFStack* h_cfstack_new(HAllocator *mm__) UNUSED; +static HCFStack* h_cfstack_new(HAllocator *mm__) { + HCFStack *stack = h_new(HCFStack, 1); + stack->count = 0; + stack->cap = 4; + stack->stack = h_new(HCFChoice*, stack->cap); + return stack; +} + +static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) UNUSED; +static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) { + h_free(stk__->stack); + h_free(stk__); +} + +static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFChoice *item) UNUSED; +static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFChoice *item) { + HCFChoice *cur_top = stk__->stack[stk__->count-1]; + assert(cur_top->type == HCF_CHOICE); + assert(cur_top->seq[0] != NULL); // There must be at least one sequence... + stk__->last_completed = item; + for (int i = 0;; i++) { + if (cur_top->seq[i+1] == NULL) { + assert(cur_top->seq[i]->items != NULL); + for (int j = 0;; j++) { + if (cur_top->seq[i]->items[j] == NULL) { + cur_top->seq[i]->items = mm__->realloc(mm__, cur_top->seq[i]->items, sizeof(HCFChoice*) * (j+2)); + cur_top->seq[i]->items[j] = item; + cur_top->seq[i]->items[j+1] = NULL; + return; + } + } + } + } +} + +static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *ret = h_new(HCFChoice, 1); + ret->reshape = NULL; + ret->action = NULL; + ret->pred = NULL; + ret->type = ~0; // invalid type + // Add it to the current sequence... + if (stk__->count > 0) { + h_cfstack_add_to_seq(mm__, stk__, ret); + } + + return ret; +} + +static inline void h_cfstack_add_charset(HAllocator *mm__, HCFStack *stk__, HCharset charset) { + HCFChoice *ni = h_cfstack_new_choice_raw(mm__, stk__); + ni->type = HCF_CHARSET; + ni->charset = charset; + stk__->last_completed = ni; +} + + +static inline void h_cfstack_add_char(HAllocator *mm__, HCFStack *stk__, uint8_t chr) { + HCFChoice *ni = h_cfstack_new_choice_raw(mm__, stk__); + ni->type = HCF_CHAR; + ni->chr = chr; + stk__->last_completed = ni; +} + +static inline void h_cfstack_add_end(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *ni = h_cfstack_new_choice_raw(mm__, stk__); + ni->type = HCF_END; + stk__->last_completed = ni; +} + +static inline void h_cfstack_begin_choice(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *choice = h_cfstack_new_choice_raw(mm__, stk__); + choice->type = HCF_CHOICE; + choice->seq = h_new(HCFSequence*, 1); + choice->seq[0] = NULL; + + if (stk__->count + 1 > stk__->cap) { + assert(stk__->cap > 0); + stk__->cap *= 2; + stk__->stack = mm__->realloc(mm__, stk__->stack, stk__->cap * sizeof(HCFChoice*)); + } + assert(stk__->cap >= 1); + stk__->stack[stk__->count++] = choice; +} + +static inline void h_cfstack_begin_seq(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *top = stk__->stack[stk__->count-1]; + for (int i = 0;; i++) { + if (top->seq[i] == NULL) { + top->seq = mm__->realloc(mm__, top->seq, sizeof(HCFSequence*) * (i+2)); + HCFSequence *seq = top->seq[i] = h_new(HCFSequence, 1); + top->seq[i+1] = NULL; + seq->items = h_new(HCFChoice*, 1); + seq->items[0] = NULL; + return; + } + } +} + +static inline void h_cfstack_end_seq(HAllocator *mm__, HCFStack *stk__) UNUSED; +static inline void h_cfstack_end_seq(HAllocator *mm__, HCFStack *stk__) { + // do nothing. You should call this anyway. +} + +static inline void h_cfstack_end_choice(HAllocator *mm__, HCFStack *stk__) UNUSED; +static inline void h_cfstack_end_choice(HAllocator *mm__, HCFStack *stk__) { + assert(stk__->count > 0); + stk__->last_completed = stk__->stack[stk__->count-1]; + stk__->count--; +} + +#define HCFS_APPEND(choice) h_cfstack_add_to_seq(mm__, stk__, (choice)) +#define HCFS_DESUGAR(parser) h_desugar(mm__, stk__, parser) +#define HCFS_ADD_CHARSET(charset) h_cfstack_add_charset(mm__, stk__, (charset)) +#define HCFS_ADD_CHAR(chr) h_cfstack_add_char(mm__, stk__, (chr)) +#define HCFS_ADD_END() h_cfstack_add_end(mm__, stk__) +// The semicolons on BEGIN macros are intentional; pretend that they +// are control structures. +#define HCFS_BEGIN_CHOICE() h_cfstack_begin_choice(mm__, stk__); +#define HCFS_BEGIN_SEQ() h_cfstack_begin_seq(mm__, stk__); +#define HCFS_END_CHOICE() h_cfstack_end_choice(mm__, stk__) +#define HCFS_END_SEQ() h_cfstack_end_seq(mm__, stk__) +#define HCFS_THIS_CHOICE (stk__->stack[stk__->count-1]) + +#endif diff --git a/src/backends/llk.c b/src/backends/llk.c index 6e8a9836..50e11bfa 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -374,7 +374,7 @@ int test_llk(void) printf("first(A) = "); h_pprint_stringset(stdout, g, h_first(2, g, g->start), 0); printf("follow(C) = "); - h_pprint_stringset(stdout, g, h_follow(2, g, h_desugar(&system_allocator, c)), 0); + h_pprint_stringset(stdout, g, h_follow(2, g, h_desugar(&system_allocator, NULL, c)), 0); h_compile(p, PB_LLk, NULL); diff --git a/src/backends/regex.c b/src/backends/regex.c index 4389bc91..6f069bec 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -354,6 +354,9 @@ static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params if (!parser->vtable->isValidRegular(parser->env)) return 1; HRVMProg *prog = h_new(HRVMProg, 1); + prog->length = prog->action_count = 0; + prog->insns = NULL; + prog->actions = NULL; prog->allocator = mm__; if (!h_compile_regex(prog, parser)) { h_free(prog->insns); diff --git a/src/cfgrammar.c b/src/cfgrammar.c index a5a9b1ac..d3168adc 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -46,7 +46,7 @@ static void collect_geneps(HCFGrammar *grammar); HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) { // convert parser to CFG form ("desugar"). - HCFChoice *desugared = h_desugar(mm__, parser); + HCFChoice *desugared = h_desugar(mm__, NULL, parser); if(desugared == NULL) return NULL; // -> backend not suitable for this parser @@ -65,6 +65,8 @@ HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) nt->seq[0]->items[0] = desugared; nt->seq[0]->items[1] = NULL; nt->seq[1] = NULL; + nt->pred = NULL; + nt->action = NULL; nt->reshape = h_act_first; h_hashset_put(g->nts, nt); g->start = nt; diff --git a/src/desugar.c b/src/desugar.c index 6117c221..ce87ca32 100644 --- a/src/desugar.c +++ b/src/desugar.c @@ -1,10 +1,20 @@ #include "hammer.h" #include "internal.h" +#include "backends/contextfree.h" -HCFChoice *h_desugar(HAllocator *mm__, const HParser *parser) { +HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) { + HCFStack *nstk__ = stk__; if(parser->desugared == NULL) { + if (nstk__ == NULL) { + nstk__ = h_cfstack_new(mm__); + } // we're going to do something naughty and cast away the const to memoize - ((HParser *)parser)->desugared = parser->vtable->desugar(mm__, parser->env); + parser->vtable->desugar(mm__, nstk__, parser->env); + ((HParser *)parser)->desugared = nstk__->last_completed; + if (stk__ == NULL) + h_cfstack_free(mm__, nstk__); + } else if (stk__ != NULL) { + HCFS_APPEND(parser->desugared); } return parser->desugared; diff --git a/src/glue.c b/src/glue.c index 48bd222e..5e3804d0 100644 --- a/src/glue.c +++ b/src/glue.c @@ -207,7 +207,7 @@ const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) switch(p->token_type) { case TT_SEQUENCE: // Flatten and append all. - for(size_t i; i<p->seq->used; i++) { + for(size_t i = 0; i<p->seq->used; i++) { h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); } break; diff --git a/src/internal.h b/src/internal.h index 889e5c3b..d0fb53a0 100644 --- a/src/internal.h +++ b/src/internal.h @@ -49,6 +49,7 @@ static inline void h_generic_free(HAllocator *allocator, void* ptr) { } extern HAllocator system_allocator; +typedef struct HCFStack_ HCFStack; typedef struct HInputStream_ { @@ -236,7 +237,7 @@ HParser *h_new_parser(HAllocator *mm__, const HParserVtable *vt, void *env) { return p; } -HCFChoice *h_desugar(HAllocator *mm__, const HParser *parser); +HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser); HCountedArray *h_carray_new_sized(HArena * arena, size_t size); HCountedArray *h_carray_new(HArena * arena); @@ -276,8 +277,9 @@ HHashValue h_hash_ptr(const void *p); typedef struct HCFSequence_ HCFSequence; -typedef struct HCFChoice_ { - enum { + +struct HCFChoice_ { + enum HCFChoiceType { HCF_END, HCF_CHOICE, HCF_CHARSET, @@ -292,7 +294,7 @@ typedef struct HCFChoice_ { // to execute before action and pred are applied. HAction action; HPredicate pred; -} HCFChoice; +}; struct HCFSequence_ { HCFChoice **items; // last one is NULL @@ -303,7 +305,7 @@ struct HParserVtable_ { bool (*isValidRegular)(void *env); bool (*isValidCF)(void *env); bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean. - HCFChoice* (*desugar)(HAllocator *mm__, void *env); + void (*desugar)(HAllocator *mm__, HCFStack *stk__, void *env); }; bool h_false(void*); diff --git a/src/parsers/action.c b/src/parsers/action.c index b00426a7..52c9bc1e 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -20,20 +20,16 @@ static HParseResult* parse_action(void *env, HParseState *state) { return NULL; } -static HCFChoice* desugar_action(HAllocator *mm__, void *env) { +static void desugar_action(HAllocator *mm__, HCFStack *stk__, void *env) { HParseAction *a = (HParseAction*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, 2); - seq->items[0] = h_desugar(mm__, a->p); - seq->items[1] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = a->action; - ret->reshape = h_act_first; - return ret; + + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(a->p); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->action = a->action; + HCFS_THIS_CHOICE->reshape = h_act_first; + } HCFS_END_CHOICE(); } static bool action_isValidRegular(void *env) { diff --git a/src/parsers/and.c b/src/parsers/and.c index 49d43870..dfd91871 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -9,11 +9,6 @@ static HParseResult *parse_and(void* env, HParseState* state) { return NULL; } -static HCFChoice* desugar_and(HAllocator *mm__, void *env) { - assert_message(0, "Not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable and_vt = { .parse = parse_and, .isValidRegular = h_false, /* TODO: strictly speaking this should be regular, @@ -21,7 +16,6 @@ static const HParserVtable and_vt = { to get right, so we're leaving it for a future revision. --mlp, 18/12/12 */ .isValidCF = h_false, /* despite TODO above, this remains false. */ - .desugar = desugar_and, .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c index a05caa1f..fc980b24 100644 --- a/src/parsers/attr_bool.c +++ b/src/parsers/attr_bool.c @@ -33,20 +33,16 @@ static bool ab_isValidCF(void *env) { return ab->p->vtable->isValidCF(ab->p->env); } -static HCFChoice* desugar_ab(HAllocator *mm__, void *env) { +static void desugar_ab(HAllocator *mm__, HCFStack *stk__, void *env) { + HAttrBool *a = (HAttrBool*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, 2); - seq->items[0] = h_desugar(mm__, a->p); - seq->items[1] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->pred = a->pred; - ret->reshape = h_act_first; - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(a->p); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->pred = a->pred; + HCFS_THIS_CHOICE->reshape = h_act_first; + } HCFS_END_CHOICE(); } static bool h_svm_action_attr_bool(HArena *arena, HSVMContext *ctx, void* arg) { diff --git a/src/parsers/bits.c b/src/parsers/bits.c index e153e3a8..93b4aef6 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -52,41 +52,25 @@ static HParsedToken *reshape_bits_signed(const HParseResult *p) { return reshape_bits(p, true); } -static HCFChoice* desugar_bits(HAllocator *mm__, void *env) { +static void desugar_bits(HAllocator *mm__, HCFStack *stk__, void *env) { struct bits_env *bits = (struct bits_env*)env; - if (0 != bits->length % 8) - return NULL; // can't handle non-byte-aligned for now + assert (0 == bits->length % 8); HCharset match_all = new_charset(mm__); for (int i = 0; i < 256; i++) charset_set(match_all, i, 1); - HCFChoice *match_all_choice = h_new(HCFChoice, 1); - match_all_choice->type = HCF_CHARSET; - match_all_choice->charset = match_all; - match_all_choice->action = NULL; - - size_t n = bits->length/8; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, n+1); - for (size_t i=0; i<n; ++i) { - seq->items[i] = match_all_choice; - } - seq->items[n] = NULL; - - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = NULL; - - if(bits->signedp) - ret->reshape = reshape_bits_signed; - else - ret->reshape = reshape_bits_unsigned; - - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + size_t n = bits->length/8; + for (size_t i=0; i<n; ++i) { + HCFS_ADD_CHARSET(match_all); + } + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = bits->signedp + ? reshape_bits_signed + : reshape_bits_unsigned; + } HCFS_END_CHOICE(); } static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c index 1400e365..f114a1fa 100644 --- a/src/parsers/butnot.c +++ b/src/parsers/butnot.c @@ -35,16 +35,10 @@ static HParseResult* parse_butnot(void *env, HParseState *state) { } } -static HCFChoice* desugar_butnot(HAllocator *mm__, void *env) { - assert_message(0, "'h_butnot' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable butnot_vt = { .parse = parse_butnot, .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? - .desugar = desugar_butnot, .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/ch.c b/src/parsers/ch.c index 0de61e49..9ee3f293 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -13,12 +13,8 @@ static HParseResult* parse_ch(void* env, HParseState *state) { } } -static HCFChoice* desugar_ch(HAllocator *mm__, void *env) { - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHAR; - ret->chr = (uint8_t)(unsigned long)(env); - ret->action = NULL; - return ret; +static void desugar_ch(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_ADD_CHAR( (uint8_t)(unsigned long)(env) ); } static bool h_svm_action_ch(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/charset.c b/src/parsers/charset.c index db4c2e77..e1a910f8 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -15,12 +15,8 @@ static HParseResult* parse_charset(void *env, HParseState *state) { return NULL; } -static HCFChoice* desugar_charset(HAllocator *mm__, void *env) { - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHARSET; - ret->charset = (HCharset)env; - ret->action = NULL; - return ret; +static void desugar_charset(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_ADD_CHARSET( (HCharset)env ); } static bool h_svm_action_ch(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/choice.c b/src/parsers/choice.c index 67b37420..6db1378f 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -39,20 +39,16 @@ static bool choice_isValidCF(void *env) { return true; } -static HCFChoice* desugar_choice(HAllocator *mm__, void *env) { +static void desugar_choice(HAllocator *mm__, HCFStack *stk__, void *env) { HSequence *s = (HSequence*)env; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 1+s->len); - for (size_t i=0; i<s->len; ++i) { - ret->seq[i] = h_new(HCFSequence, 1); - ret->seq[i]->items = h_new(HCFChoice*, 2); - ret->seq[i]->items[0] = h_desugar(mm__, s->p_array[i]); - ret->seq[i]->items[1] = NULL; - } - ret->seq[s->len] = NULL; - ret->reshape = h_act_first; - return ret; + HCFS_BEGIN_CHOICE() { + for (size_t i = 0; i < s->len; i++) { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(s->p_array[i]); + } HCFS_END_SEQ(); + } + HCFS_THIS_CHOICE->reshape = h_act_first; + } HCFS_END_CHOICE(); } static bool choice_ctrvm(HRVMProg *prog, void* env) { diff --git a/src/parsers/difference.c b/src/parsers/difference.c index 4da05214..76a2cc44 100644 --- a/src/parsers/difference.c +++ b/src/parsers/difference.c @@ -34,16 +34,10 @@ static HParseResult* parse_difference(void *env, HParseState *state) { } } -static HCFChoice* desugar_difference(HAllocator *mm__, void *env) { - assert_message(0, "'h_difference' is not context-free, can't be desugared"); - return NULL; -} - static HParserVtable difference_vt = { .parse = parse_difference, .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? - .desugar = desugar_difference, .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/end.c b/src/parsers/end.c index fa8ab8b3..30b3ba12 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -10,11 +10,8 @@ static HParseResult* parse_end(void *env, HParseState *state) { } } -static HCFChoice* desugar_end(HAllocator *mm__, void *env) { - static HCFChoice ret = { - .type = HCF_END - }; - return &ret; +static void desugar_end(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_ADD_END(); } static bool end_ctrvm(HRVMProg *prog, void *env) { diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index 92e39477..e8ef525f 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -25,6 +25,9 @@ HParser* h_epsilon_p() { } HParser* h_epsilon_p__m(HAllocator* mm__) { HParser *epsilon_p = h_new(HParser, 1); + epsilon_p->desugared = NULL; + epsilon_p->backend_data = NULL; + epsilon_p->backend = 0; epsilon_p->vtable = &epsilon_vt; return epsilon_p; } diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index 178d9707..9aa993ae 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -21,22 +21,13 @@ static bool ignore_isValidCF(void *env) { return (p->vtable->isValidCF(p->env)); } -static HCFChoice* desugar_ignore(HAllocator *mm__, void *env) { - HParser *p = (HParser*)env; - - HCFChoice *ret = h_new(HCFChoice, 1); - HCFChoice *a = h_desugar(mm__, p); - - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = h_new(HCFSequence, 1); - ret->seq[0]->items = h_new(HCFChoice*, 2); - ret->seq[0]->items[0] = a; - ret->seq[0]->items[1] = NULL; - ret->seq[1] = NULL; - ret->reshape = h_act_ignore; - - return ret; +static void desugar_ignore(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR( (HParser*)env ); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = h_act_ignore; + } HCFS_END_CHOICE(); } static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) { diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c index 8fcc143b..e562136f 100644 --- a/src/parsers/ignoreseq.c +++ b/src/parsers/ignoreseq.c @@ -31,31 +31,24 @@ static HParseResult* parse_ignoreseq(void* env, HParseState *state) { return res; } -static HCFChoice* desugar_ignoreseq(HAllocator *mm__, void *env) { +static void desugar_ignoreseq(HAllocator *mm__, HCFStack *stk__, void *env) { HIgnoreSeq *seq = (HIgnoreSeq*)env; - HCFSequence *hseq = h_new(HCFSequence, 1); - hseq->items = h_new(HCFChoice*, 1+seq->len); - for (size_t i=0; i<seq->len; ++i) { - hseq->items[i] = h_desugar(mm__, seq->parsers[i]); - } - hseq->items[seq->len] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = hseq; - ret->seq[1] = NULL; - ret->action = NULL; - - if(seq->which == 0) - ret->reshape = h_act_first; - else if(seq->which == 1) - ret->reshape = h_act_second; // for h_middle - else if(seq->which == seq->len-1) - ret->reshape = h_act_last; - else - ret->reshape = NULL; // XXX - - return ret; + + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i=0; i<seq->len; ++i) + HCFS_DESUGAR(seq->parsers[i]); + } HCFS_END_SEQ(); + + if(seq->which == 0) + HCFS_THIS_CHOICE->reshape = h_act_first; + else if(seq->which == 1) + HCFS_THIS_CHOICE->reshape = h_act_second; // for h_middle + else if(seq->which == seq->len-1) + HCFS_THIS_CHOICE->reshape = h_act_last; + else + assert(!"Ignoreseq must select item 0, 1, or n-1"); + } HCFS_END_CHOICE(); } static bool is_isValidRegular(void *env) { diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 5835d59d..746f1a9e 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -9,9 +9,8 @@ static bool indirect_isValidCF(void *env) { return p->vtable->isValidCF(p->env); } -static HCFChoice* desugar_indirect(HAllocator *mm__, void *env) { - HParser *p = (HParser*)env; - return h_desugar(mm__, p); +static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_DESUGAR( (HParser*)env ); } static const HParserVtable indirect_vt = { diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c index d67a786f..29379930 100644 --- a/src/parsers/int_range.c +++ b/src/parsers/int_range.c @@ -28,85 +28,54 @@ static HParseResult* parse_int_range(void *env, HParseState *state) { } } -HCFChoice* gen_int_range(HAllocator *mm__, uint64_t low, uint64_t high, uint8_t bytes) { +void gen_int_range(HAllocator *mm__, HCFStack *stk__, uint64_t low, uint64_t high, uint8_t bytes) { /* Possible FIXME: TallerThanMe */ if (1 == bytes) { - HCFChoice *cs = h_new(HCFChoice, 1); - cs->type = HCF_CHARSET; - cs->charset = new_charset(mm__); + HCharset cs = new_charset(mm__); for (uint64_t i=low; i<=high; ++i) { - charset_set(cs->charset, i, 1); + charset_set(cs, i, 1); } - cs->action = NULL; - return cs; + HCFS_ADD_CHARSET(cs); } else if (1 < bytes) { uint8_t low_head, hi_head; low_head = ((low >> (8*(bytes - 1))) & 0xFF); hi_head = ((high >> (8*(bytes - 1))) & 0xFF); if (low_head != hi_head) { - HCFChoice *root = h_new(HCFChoice, 1); - root->type = HCF_CHOICE; - root->seq = h_new(HCFSequence*, 4); - root->seq[0] = h_new(HCFSequence, 1); - root->seq[0]->items = h_new(HCFChoice*, 3); - root->seq[0]->items[0] = h_new(HCFChoice, 1); - root->seq[0]->items[0]->type = HCF_CHAR; - root->seq[0]->items[0]->chr = low_head; - root->seq[0]->items[0]->action = NULL; - root->seq[0]->items[1] = gen_int_range(mm__, low & ((1 << (8 * (bytes - 1))) - 1), ((1 << (8*(bytes-1)))-1), bytes-1); - root->seq[0]->items[2] = NULL; - root->seq[1] = h_new(HCFSequence, 1); - root->seq[1]->items = h_new(HCFChoice*, bytes+1); - root->seq[1]->items[0] = h_new(HCFChoice, 2); - root->seq[1]->items[0]->type = HCF_CHARSET; - root->seq[1]->items[0]->charset = new_charset(mm__); - root->seq[1]->items[0]->action = NULL; - root->seq[1]->items[1] = root->seq[1]->items[0] + 1; - root->seq[1]->items[1]->type = HCF_CHARSET; - root->seq[1]->items[1]->charset = new_charset(mm__); - for (int i = 0; i < 256; i++) { - charset_set(root->seq[1]->items[0]->charset, i, (i > low_head && i < hi_head)); - charset_set(root->seq[1]->items[1]->charset, i, 1); - } - root->seq[1]->items[1]->action = NULL; - for (int i = 2; i < bytes; i++) - root->seq[1]->items[i] = root->seq[1]->items[1]; - root->seq[1]->items[bytes] = NULL; - root->seq[2] = h_new(HCFSequence, 1); - root->seq[2]->items = h_new(HCFChoice*, 3); - root->seq[2]->items[0] = h_new(HCFChoice, 1); - root->seq[2]->items[0]->type = HCF_CHAR; - root->seq[2]->items[0]->type = hi_head; - root->seq[2]->items[0]->action = NULL; - root->seq[2]->items[1] = gen_int_range(mm__, 0, high & ((1 << (8 * (bytes - 1))) - 1), bytes-1); - root->seq[2]->items[2] = NULL; - root->seq[3] = NULL; - root->action = NULL; - return root; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHAR(low_head); + gen_int_range(mm__, stk__, low & ((1 << (8 * (bytes - 1))) - 1), ((1 << (8*(bytes-1)))-1), bytes-1); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + HCharset hd = new_charset(mm__); + HCharset rest = new_charset(mm__); + for (int i = 0; i < 256; i++) { + charset_set(hd, i, (i > low_head && i < hi_head)); + charset_set(rest, i, 1); + } + HCFS_ADD_CHARSET(hd); + for (int i = 2; i < bytes; i++) + HCFS_ADD_CHARSET(rest); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHAR(hi_head); + gen_int_range(mm__, stk__, 0, high & ((1 << (8 * (bytes - 1))) - 1), bytes-1); + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); } else { - HCFChoice *root = h_new(HCFChoice, 1); - root->type = HCF_CHOICE; - root->seq = h_new(HCFSequence*, 2); - root->seq[0] = h_new(HCFSequence, 1); - root->seq[0]->items = h_new(HCFChoice*, 3); - root->seq[0]->items[0] = h_new(HCFChoice, 1); - root->seq[0]->items[0]->type = HCF_CHAR; - root->seq[0]->items[0]->chr = low_head; - root->seq[0]->items[0]->action = NULL; - root->seq[0]->items[1] = gen_int_range(mm__, - low & ((1 << (8 * (bytes - 1))) - 1), - high & ((1 << (8 * (bytes - 1))) - 1), - bytes - 1); - root->seq[0]->items[2] = NULL; - root->seq[1] = NULL; - root->action = NULL; - return root; + // TODO: find a way to merge this with the higher-up SEQ + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHAR(low_head); + gen_int_range(mm__, stk__, + low & ((1 << (8 * (bytes - 1))) - 1), + high & ((1 << (8 * (bytes - 1))) - 1), + bytes - 1); + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); } } - else { // idk why this would ever be <1, but whatever - return NULL; - } } struct bits_env { @@ -114,11 +83,11 @@ struct bits_env { uint8_t signedp; }; -static HCFChoice* desugar_int_range(HAllocator *mm__, void *env) { +static void desugar_int_range(HAllocator *mm__, HCFStack *stk__, void *env) { HRange *r = (HRange*)env; struct bits_env* be = (struct bits_env*)r->p->env; uint8_t bytes = be->length / 8; - return gen_int_range(mm__, r->lower, r->upper, bytes); + gen_int_range(mm__, stk__, r->lower, r->upper, bytes); } bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/many.c b/src/parsers/many.c index 8185203c..a095940c 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -59,11 +59,25 @@ static bool many_isValidCF(void *env) { repeat->sep->vtable->isValidCF(repeat->sep->env))); } -static HCFChoice* desugar_many(HAllocator *mm__, void *env) { +static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { + // TODO: refactor this. HRepeat *repeat = (HRepeat*)env; + if (!repeat->min_p) { + assert(!"Unreachable"); + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i = 0; i < repeat->count; i++) { + if (i != 0 && repeat->sep != NULL) + HCFS_DESUGAR(repeat->sep); // Should be ignored. + HCFS_DESUGAR(repeat->p); + } + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); + return; + } if(repeat->count > 1) { assert_message(0, "'h_repeat_n' is not context-free, can't be desugared"); - return NULL; + return; } /* many(A) => @@ -73,53 +87,29 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) { -> \epsilon */ - HParser *epsilon = h_epsilon_p__m(mm__); - - HCFChoice *sep = h_desugar(mm__, (repeat->sep != NULL) ? repeat->sep : epsilon); - HCFChoice *a = h_desugar(mm__, repeat->p); - HCFChoice *ma = h_new(HCFChoice, 1); - HCFChoice *mar = h_new(HCFChoice, 1); - HCFChoice *eps = desugar_epsilon(mm__, NULL); - - /* create first subrule */ - ma->type = HCF_CHOICE; - ma->seq = h_new(HCFSequence*, 3); /* enough for 2 productions */ - ma->seq[0] = h_new(HCFSequence, 1); - ma->seq[0]->items = h_new(HCFChoice*, 3); - ma->seq[0]->items[0] = a; - ma->seq[0]->items[1] = mar; - ma->seq[0]->items[2] = NULL; - ma->seq[1] = NULL; - - /* if not many1/sepBy1, attach epsilon */ - if (repeat->count == 0) { - ma->seq[1] = h_new(HCFSequence, 1); - ma->seq[1]->items = h_new(HCFChoice*, 2); - ma->seq[1]->items[0] = eps; - ma->seq[1]->items[1] = NULL; - ma->seq[2] = NULL; - } - - /* create second subrule */ - mar->type = HCF_CHOICE; - mar->seq = h_new(HCFSequence*, 3); - mar->seq[0] = h_new(HCFSequence, 1); - mar->seq[0]->items = h_new(HCFChoice*, 4); - mar->seq[0]->items[0] = sep; - mar->seq[0]->items[1] = a; - mar->seq[0]->items[2] = mar; // woo recursion! - mar->seq[0]->items[3] = NULL; - mar->seq[1] = h_new(HCFSequence, 1); - mar->seq[1]->items = h_new(HCFChoice*, 2); - mar->seq[1]->items[0] = eps; - mar->seq[1]->items[1] = NULL; - mar->seq[2] = NULL; - - /* attach reshapers */ - sep->reshape = h_act_ignore; - ma->reshape = h_act_flatten; - - return ma; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(repeat->p); + HCFS_BEGIN_CHOICE() { // Mar + HCFS_BEGIN_SEQ() { + if (repeat->sep != NULL) { + HCFS_DESUGAR(h_ignore__m(mm__, repeat->sep)); + } + //stk__->last_completed->reshape = h_act_ignore; // BUG: This modifies a memoized entry. + HCFS_DESUGAR(repeat->p); + HCFS_APPEND(HCFS_THIS_CHOICE); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); // Mar + } + if (repeat->count == 0) { + HCFS_BEGIN_SEQ() { + //HCFS_DESUGAR(h_ignore__m(mm__, h_epsilon_p())); + } HCFS_END_SEQ(); + } + HCFS_THIS_CHOICE->reshape = h_act_flatten; + } HCFS_END_CHOICE(); } static bool many_ctrvm(HRVMProg *prog, void *env) { @@ -266,16 +256,10 @@ static HParseResult* parse_length_value(void *env, HParseState *state) { return parse_many(&repeat, state); } -static HCFChoice* desugar_length_value(HAllocator *mm__, void *env) { - assert_message(0, "'h_length_value' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable length_value_vt = { .parse = parse_length_value, .isValidRegular = h_false, .isValidCF = h_false, - .desugar = desugar_length_value, }; HParser* h_length_value(const HParser* length, const HParser* value) { diff --git a/src/parsers/not.c b/src/parsers/not.c index 61b63221..8bf45dda 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -10,16 +10,10 @@ static HParseResult* parse_not(void* env, HParseState* state) { } } -static HCFChoice* desugar_not(HAllocator *mm__, void *env) { - assert_message(0, "'h_not' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable not_vt = { .parse = parse_not, .isValidRegular = h_false, /* see and.c for why */ - .isValidCF = h_false, /* also see and.c for why */ - .desugar = desugar_not, + .isValidCF = h_false, .compile_to_rvm = h_not_regular, // Is actually regular, but the generation step is currently unable to handle it. TODO: fix this. }; diff --git a/src/parsers/nothing.c b/src/parsers/nothing.c index 645a2137..120c1e01 100644 --- a/src/parsers/nothing.c +++ b/src/parsers/nothing.c @@ -5,13 +5,9 @@ static HParseResult* parse_nothing() { return NULL; } -static HCFChoice *desugar_nothing(HAllocator *mm__, void *env) { - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 1); - ret->seq[0] = NULL; - ret->action = NULL; - return ret; +static void desugar_nothing(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_BEGIN_CHOICE() { + } HCFS_END_CHOICE(); } static bool nothing_ctrvm(HRVMProg *prog, void* env) { diff --git a/src/parsers/optional.c b/src/parsers/optional.c index 6cb5331a..c4282a91 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -25,18 +25,19 @@ static bool opt_isValidCF(void *env) { static HParsedToken* reshape_optional(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); - assert(p->ast->seq->used > 0); - HParsedToken *res = p->ast->seq->elements[0]; - if(res) - return res; + if (p->ast->seq->used > 0) { + HParsedToken *res = p->ast->seq->elements[0]; + if(res) + return res; + } HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); ret->token_type = TT_NONE; return ret; } -static HCFChoice* desugar_optional(HAllocator *mm__, void *env) { +static void desugar_optional(HAllocator *mm__, HCFStack *stk__, void *env) { HParser *p = (HParser*) env; /* optional(A) => @@ -44,28 +45,14 @@ static HCFChoice* desugar_optional(HAllocator *mm__, void *env) { -> \epsilon */ - HCFChoice *ret = h_new(HCFChoice, 1); - HCFChoice *a = h_desugar(mm__, p); - HCFChoice *eps = desugar_epsilon(mm__, NULL); - - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 3); /* enough for 2 productions */ - - ret->seq[0] = h_new(HCFSequence, 1); - ret->seq[0]->items = h_new(HCFChoice*, 2); - ret->seq[0]->items[0] = a; - ret->seq[0]->items[1] = NULL; - - ret->seq[1] = h_new(HCFSequence, 1); - ret->seq[1]->items = h_new(HCFChoice*, 2); - ret->seq[1]->items[0] = eps; - ret->seq[1]->items[1] = NULL; - - ret->seq[2] = NULL; - - ret->reshape = reshape_optional; - - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(p); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = reshape_optional; + } HCFS_END_CHOICE(); } static bool h_svm_action_optional(HArena *arena, HSVMContext *ctx, void *env) { diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index af5f9595..aeb202bf 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -3,6 +3,7 @@ #include "../hammer.h" #include "../internal.h" #include "../backends/regex.h" +#include "../backends/contextfree.h" #define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count))) #define a_new(typ, count) a_new_(state->arena, typ, count) @@ -25,17 +26,12 @@ static inline size_t token_length(HParseResult *pr) { } /* Epsilon rules happen during desugaring. This handles them. */ -static inline HCFChoice* desugar_epsilon(HAllocator *mm__, void *env) { - static HCFChoice *res_seq_l[] = {NULL}; - static HCFSequence res_seq = {res_seq_l}; - static HCFSequence *res_ch_l[] = {&res_seq, NULL}; - static HCFChoice res_ch = { - .type = HCF_CHOICE, - .seq = res_ch_l, - .action = NULL, - .reshape = h_act_ignore - }; - return &res_ch; +static inline void desugar_epsilon(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = h_act_ignore; + } HCFS_END_CHOICE(); } #endif // HAMMER_PARSE_INTERNAL__H diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index aa600231..eff46109 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -64,22 +64,15 @@ static HParsedToken *reshape_sequence(const HParseResult *p) { return res; } -static HCFChoice* desugar_sequence(HAllocator *mm__, void *env) { +static void desugar_sequence(HAllocator *mm__, HCFStack *stk__, void *env) { HSequence *s = (HSequence*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, s->len+1); - for (size_t i=0; i<s->len; ++i) { - seq->items[i] = h_desugar(mm__, s->p_array[i]); - } - seq->items[s->len] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = NULL; - ret->reshape = reshape_sequence; - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i = 0; i < s->len; i++) + HCFS_DESUGAR(s->p_array[i]); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = reshape_sequence; + } HCFS_END_CHOICE(); } static bool sequence_ctrvm(HRVMProg *prog, void *env) { diff --git a/src/parsers/token.c b/src/parsers/token.c index 2346a45e..97886b7a 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -44,25 +44,15 @@ static HParsedToken *reshape_token(const HParseResult *p) { return tok; } -static HCFChoice* desugar_token(HAllocator *mm__, void *env) { +static void desugar_token(HAllocator *mm__, HCFStack *stk__, void *env) { HToken *tok = (HToken*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, 1+tok->len); - for (size_t i=0; i<tok->len; ++i) { - seq->items[i] = h_new(HCFChoice, 1); - seq->items[i]->type = HCF_CHAR; - seq->items[i]->chr = tok->str[i]; - } - seq->items[tok->len] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = NULL; - ret->pred = NULL; - ret->reshape = reshape_token; - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i = 0; i < tok->len; i++) + HCFS_ADD_CHAR(tok->str[i]); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = reshape_token; + } HCFS_END_CHOICE(); } static bool token_ctrvm(HRVMProg *prog, void *env) { diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index 3b8a8236..18255ac2 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -12,7 +12,7 @@ static HParseResult* parse_unimplemented(void* env, HParseState *state) { return &result; } -static HCFChoice* desugar_unimplemented(HAllocator *mm__, void *env) { +static HCFChoice* desugar_unimplemented(HAllocator *mm__, HCFStack *stk__, void *env) { assert_message(0, "'h_unimplemented' is not context-free, can't be desugared"); return NULL; } diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 454e04ed..737affde 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -17,39 +17,26 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { static const char SPACE_CHRS[6] = {' ', '\f', '\n', '\r', '\t', '\v'}; -static HCFChoice* desugar_whitespace(HAllocator *mm__, void *env) { - HCFChoice *ws = h_new(HCFChoice, 1); - ws->type = HCF_CHOICE; - ws->seq = h_new(HCFSequence*, 3); - HCFSequence *nonempty = h_new(HCFSequence, 1); - nonempty->items = h_new(HCFChoice*, 3); - nonempty->items[0] = h_new(HCFChoice, 1); - nonempty->items[0]->type = HCF_CHARSET; - nonempty->items[0]->charset = new_charset(mm__); - for(size_t i=0; i<sizeof(SPACE_CHRS); i++) - charset_set(nonempty->items[0]->charset, SPACE_CHRS[i], 1); - nonempty->items[1] = ws; // yay circular pointer! - nonempty->items[2] = NULL; - ws->seq[0] = nonempty; - HCFSequence *empty = h_new(HCFSequence, 1); - empty->items = h_new(HCFChoice*, 1); - empty->items[0] = NULL; - ws->seq[1] = empty; - ws->seq[2] = NULL; - - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = h_new(HCFSequence, 1); - ret->seq[0]->items = h_new(HCFChoice*, 3); - ret->seq[0]->items[0] = ws; - ret->seq[0]->items[1] = h_desugar(mm__, (HParser *)env); - ret->seq[0]->items[2] = NULL; - ret->seq[1] = NULL; +static void desugar_whitespace(HAllocator *mm__, HCFStack *stk__, void *env) { - ret->reshape = h_act_last; + HCharset ws_cs = new_charset(mm__); + for(size_t i=0; i<sizeof(SPACE_CHRS); i++) + charset_set(ws_cs, SPACE_CHRS[i], 1); - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHARSET(ws_cs); + HCFS_APPEND(HCFS_THIS_CHOICE); // yay circular pointer! + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); + HCFS_DESUGAR( (HParser*)env ); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = h_act_last; + } HCFS_END_CHOICE(); } static bool ws_isValidRegular(void *env) { diff --git a/src/parsers/xor.c b/src/parsers/xor.c index ccd37fdc..e031d5d5 100644 --- a/src/parsers/xor.c +++ b/src/parsers/xor.c @@ -31,16 +31,10 @@ static HParseResult* parse_xor(void *env, HParseState *state) { } } -static HCFChoice* desugar_xor(HAllocator *mm__, void *env) { - assert_message(0, "'h_xor' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable xor_vt = { .parse = parse_xor, .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? - .desugar = desugar_xor, .compile_to_rvm = h_not_regular, }; diff --git a/src/system_allocator.c b/src/system_allocator.c index 5f3e4844..b34810fa 100644 --- a/src/system_allocator.c +++ b/src/system_allocator.c @@ -2,9 +2,14 @@ #include <stdlib.h> #include "internal.h" +//#define DEBUG__MEMFILL 0xFF + static void* system_alloc(HAllocator *allocator, size_t size) { - void* ptr = calloc(size + sizeof(size_t), 1); + void* ptr = malloc(size + sizeof(size_t)); +#ifdef DEBUG__MEMFILL + memset(ptr, DEBUG__MEMFILL, size + sizeof(size_t)); +#endif *(size_t*)ptr = size; return ptr + sizeof(size_t); } @@ -13,10 +18,12 @@ static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) { if (ptr == NULL) return system_alloc(allocator, size); ptr = realloc(ptr - sizeof(size_t), size + sizeof(size_t)); - size_t old_size = *(size_t*)ptr; *(size_t*)ptr = size; +#ifdef DEBUG__MEMFILL + size_t old_size = *(size_t*)ptr; if (size > old_size) - memset(ptr+sizeof(size_t)+old_size, 0, size - old_size); + memset(ptr+sizeof(size_t)+old_size, DEBUG__MEMFILL, size - old_size); +#endif return ptr + sizeof(size_t); } diff --git a/src/test_suite.h b/src/test_suite.h index f6187126..168ab641 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -162,28 +162,28 @@ #define g_check_terminal(grammar, parser) \ - g_check_hashtable_absent(grammar->nts, h_desugar(&system_allocator, parser)) + g_check_hashtable_absent(grammar->nts, h_desugar(&system_allocator, NULL, parser)) #define g_check_nonterminal(grammar, parser) \ - g_check_hashtable_present(grammar->nts, h_desugar(&system_allocator, parser)) + g_check_hashtable_present(grammar->nts, h_desugar(&system_allocator, NULL, parser)) #define g_check_derives_epsilon(grammar, parser) \ - g_check_hashtable_present(grammar->geneps, h_desugar(&system_allocator, parser)) + g_check_hashtable_present(grammar->geneps, h_desugar(&system_allocator, NULL, parser)) #define g_check_derives_epsilon_not(grammar, parser) \ - g_check_hashtable_absent(grammar->geneps, h_desugar(&system_allocator, parser)) + g_check_hashtable_absent(grammar->geneps, h_desugar(&system_allocator, NULL, parser)) #define g_check_firstset_present(k, grammar, parser, str) \ - g_check_stringmap_present(h_first(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_present(h_first(k, grammar, h_desugar(&system_allocator, NULL, parser)), str) #define g_check_firstset_absent(k, grammar, parser, str) \ - g_check_stringmap_absent(h_first(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_absent(h_first(k, grammar, h_desugar(&system_allocator, NULL, parser)), str) #define g_check_followset_present(k, grammar, parser, str) \ - g_check_stringmap_present(h_follow(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_present(h_follow(k, grammar, h_desugar(&system_allocator, NULL, parser)), str) #define g_check_followset_absent(k, grammar, parser, str) \ - g_check_stringmap_absent(h_follow(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_absent(h_follow(k, grammar, h_desugar(&system_allocator, NULL, parser)), str) -- GitLab